Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -5,89 +5,89 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.hip = value;
|
||||
acc.hip = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[key] = value;
|
||||
}
|
||||
acc[key] = value;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeSearch(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user