forked from DebaucheryLibrarian/traxxx
Resolving actor birth and residence place before storage. Layout improvements.
This commit is contained in:
130
src/actors.js
130
src/actors.js
@@ -6,6 +6,7 @@ const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const whereOr = require('./utils/where-or');
|
||||
const resolvePlace = require('./utils/resolve-place');
|
||||
const { createActorMediaDirectory, storeAvatars } = require('./media');
|
||||
|
||||
async function curateActor(actor) {
|
||||
@@ -18,27 +19,15 @@ async function curateActor(actor) {
|
||||
.where({ domain: 'actors', target_id: actor.id }),
|
||||
]);
|
||||
|
||||
return {
|
||||
const curatedActor = {
|
||||
id: actor.id,
|
||||
gender: actor.gender,
|
||||
name: actor.name,
|
||||
description: actor.description,
|
||||
birthdate: actor.birthdate && new Date(actor.birthdate),
|
||||
country: actor.country_alpha2,
|
||||
residencePlace: actor.residence_place,
|
||||
residenceCountry: actor.residence_country_alpha2
|
||||
? {
|
||||
alpha2: actor.residence_country_alpha2,
|
||||
name: actor.residence_country_name,
|
||||
}
|
||||
: null,
|
||||
birthPlace: actor.birth_place,
|
||||
birthCountry: actor.birth_country_alpha2
|
||||
? {
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
name: actor.birth_country_name,
|
||||
}
|
||||
: null,
|
||||
origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null,
|
||||
residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null,
|
||||
ethnicity: actor.ethnicity,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
@@ -50,9 +39,35 @@ async function curateActor(actor) {
|
||||
slug: actor.slug,
|
||||
avatar: photos.find(photo => photo.role === 'avatar'),
|
||||
photos: photos.filter(photo => photo.role === 'photo'),
|
||||
hasTattoos: actor.has_tattoos,
|
||||
hasPiercings: actor.has_piercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
social,
|
||||
scrapedAt: actor.scraped_at,
|
||||
};
|
||||
|
||||
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
|
||||
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
|
||||
|
||||
if (actor.birth_country_alpha2) {
|
||||
curatedActor.origin.country = {
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
name: actor.birth_country_name,
|
||||
};
|
||||
}
|
||||
|
||||
if (actor.residence_city) curatedActor.residence.city = actor.residence_city;
|
||||
if (actor.residence_state) curatedActor.residence.state = actor.residence_state;
|
||||
|
||||
if (actor.residence_country_alpha2) {
|
||||
curatedActor.residence.country = {
|
||||
alpha2: actor.residence_country_alpha2,
|
||||
name: actor.residence_country_name,
|
||||
};
|
||||
}
|
||||
|
||||
return curatedActor;
|
||||
}
|
||||
|
||||
function curateActors(releases) {
|
||||
@@ -70,10 +85,6 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
description: actor.description,
|
||||
gender: actor.gender,
|
||||
ethnicity: actor.ethnicity,
|
||||
birth_country_alpha2: actor.birthCountry,
|
||||
residence_country_alpha2: actor.residenceCountry,
|
||||
birth_place: actor.birthPlace,
|
||||
residence_place: actor.residencePlace,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
@@ -92,6 +103,18 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
curatedActor.id = actor.id;
|
||||
}
|
||||
|
||||
if (actor.birthPlace) {
|
||||
curatedActor.birth_city = actor.birthPlace.city;
|
||||
curatedActor.birth_state = actor.birthPlace.state;
|
||||
curatedActor.birth_country_alpha2 = actor.birthPlace.country;
|
||||
}
|
||||
|
||||
if (actor.residencePlace) {
|
||||
curatedActor.residence_city = actor.residencePlace.city;
|
||||
curatedActor.residence_state = actor.residencePlace.state;
|
||||
curatedActor.residence_country_alpha2 = actor.residencePlace.country;
|
||||
}
|
||||
|
||||
if (scraped) {
|
||||
curatedActor.scraped_at = new Date();
|
||||
curatedActor.scrape_success = scrapeSuccess;
|
||||
@@ -102,7 +125,7 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
|
||||
function curateSocialEntry(url, actorId) {
|
||||
const { hostname, origin, pathname } = new URL(url);
|
||||
const platform = ['facebook', 'twitter', 'instagram', 'tumblr', 'snapchat', 'amazon', 'youtube'].find(platformName => hostname.match(platformName));
|
||||
const platform = ['facebook', 'twitter', 'instagram', 'tumblr', 'snapchat', 'amazon', 'youtube', 'fancentro'].find(platformName => hostname.match(platformName));
|
||||
|
||||
return {
|
||||
url: `${origin}${pathname}`,
|
||||
@@ -184,8 +207,8 @@ async function updateActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
return actorEntry;
|
||||
}
|
||||
|
||||
function mergeProfiles(profiles, actor) {
|
||||
return profiles.reduce((prevProfile, profile) => {
|
||||
async function mergeProfiles(profiles, actor) {
|
||||
const mergedProfile = profiles.reduce((prevProfile, profile) => {
|
||||
if (profile === null) {
|
||||
return prevProfile;
|
||||
}
|
||||
@@ -196,21 +219,19 @@ function mergeProfiles(profiles, actor) {
|
||||
description: prevProfile.description || profile.description,
|
||||
gender: prevProfile.gender || profile.gender,
|
||||
birthdate: Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
|
||||
birthCountry: prevProfile.birthCountry || profile.birthCountry,
|
||||
residenceCountry: prevProfile.residenceCountry || profile.residenceCountry,
|
||||
birthPlace: prevProfile.birthPlace || profile.birthPlace,
|
||||
residencePlace: prevProfile.residencePlace || profile.residencePlace,
|
||||
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
||||
bust: prevProfile.bust || profile.bust,
|
||||
waist: prevProfile.waist || profile.waist,
|
||||
hip: prevProfile.hip || profile.hip,
|
||||
naturalBoobs: prevProfile.naturalBoobs || profile.naturalBoobs,
|
||||
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
|
||||
height: prevProfile.height || profile.height,
|
||||
weight: prevProfile.weight || profile.weight,
|
||||
hair: prevProfile.hair || profile.hair,
|
||||
eyes: prevProfile.eyes || profile.eyes,
|
||||
hasPiercings: prevProfile.hasPiercings || profile.hasPiercings,
|
||||
hasTattoos: prevProfile.hasTattoos || profile.hasTattoos,
|
||||
hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings,
|
||||
hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos,
|
||||
piercings: prevProfile.piercings || profile.piercings,
|
||||
tattoos: prevProfile.tattoos || profile.tattoos,
|
||||
social: prevProfile.social.concat(profile.social || []),
|
||||
@@ -220,6 +241,16 @@ function mergeProfiles(profiles, actor) {
|
||||
social: [],
|
||||
avatars: [],
|
||||
});
|
||||
|
||||
const [birthPlace, residencePlace] = await Promise.all([
|
||||
resolvePlace(mergedProfile.birthPlace),
|
||||
resolvePlace(mergedProfile.residencePlace),
|
||||
]);
|
||||
|
||||
mergedProfile.birthPlace = birthPlace;
|
||||
mergedProfile.residencePlace = residencePlace;
|
||||
|
||||
return mergedProfile;
|
||||
}
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
@@ -228,35 +259,44 @@ async function scrapeActors(actorNames) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
||||
const profiles = await Promise.all(
|
||||
Object.values(scrapers.actors)
|
||||
.map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)),
|
||||
);
|
||||
const profiles = await Promise.map(Object.entries(scrapers.actors), async ([scraperSlug, scraper]) => {
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName);
|
||||
|
||||
const profile = mergeProfiles(profiles, actorEntry);
|
||||
return {
|
||||
scraper: scraperSlug,
|
||||
...profile,
|
||||
};
|
||||
});
|
||||
|
||||
const profile = await mergeProfiles(profiles, actorEntry);
|
||||
|
||||
if (profile === null) {
|
||||
console.log(`Could not find profile for actor '${actorName}'`);
|
||||
await updateActor(profile, true, false);
|
||||
|
||||
if (argv.save) {
|
||||
await updateActor(profile, true, false);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (actorEntry && profile) {
|
||||
await createActorMediaDirectory(profile, actorEntry);
|
||||
if (argv.save) {
|
||||
if (actorEntry && profile) {
|
||||
await createActorMediaDirectory(profile, actorEntry);
|
||||
|
||||
await Promise.all([
|
||||
updateActor(profile, true, true),
|
||||
storeAvatars(profile, actorEntry),
|
||||
]);
|
||||
await Promise.all([
|
||||
updateActor(profile, true, true),
|
||||
storeAvatars(profile, actorEntry),
|
||||
]);
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
const newActorEntry = await storeActor(profile, true, true);
|
||||
|
||||
await createActorMediaDirectory(profile, newActorEntry);
|
||||
await storeAvatars(profile, newActorEntry);
|
||||
}
|
||||
|
||||
const newActorEntry = await storeActor(profile, true, true);
|
||||
|
||||
await createActorMediaDirectory(profile, newActorEntry);
|
||||
await storeAvatars(profile, newActorEntry);
|
||||
} catch (error) {
|
||||
console.warn(actorName, error);
|
||||
}
|
||||
|
||||
21
src/media.js
21
src/media.js
@@ -188,10 +188,10 @@ async function storeAvatars(profile, actor) {
|
||||
console.log(`Storing ${profile.avatars.length} avatars for '${profile.name}'`);
|
||||
|
||||
const files = await Promise.map(profile.avatars, async (avatarUrl, index) => {
|
||||
const { pathname } = new URL(avatarUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
|
||||
try {
|
||||
const { pathname } = new URL(avatarUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
|
||||
const res = await bhttp.get(avatarUrl);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
@@ -220,7 +220,7 @@ async function storeAvatars(profile, actor) {
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed to store avatar ${index + 1} for '${profile.name}'`);
|
||||
console.warn(`Failed to store avatar ${index + 1} for '${profile.name}': ${avatarUrl}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -228,17 +228,12 @@ async function storeAvatars(profile, actor) {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
||||
const avatars = files.filter(file => file);
|
||||
|
||||
const existingAvatars = await knex('media')
|
||||
.whereIn('hash', files.map(file => file.hash));
|
||||
|
||||
const newAvatars = files.filter((file) => {
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return !existingAvatars.some(avatar => file.hash === avatar.hash);
|
||||
});
|
||||
.whereIn('hash', avatars.map(file => file.hash));
|
||||
|
||||
const newAvatars = avatars.filter(file => !existingAvatars.some(avatar => file.hash === avatar.hash));
|
||||
const hasAvatar = existingAvatars.some(avatar => avatar.role === 'avatar');
|
||||
|
||||
await knex('media')
|
||||
|
||||
@@ -157,10 +157,8 @@ function scrapeProfile(html, url, actorName) {
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
|
||||
if (bio['Body Art']) {
|
||||
profile.hasTattoo = !!bio['Body Art'].match('Tattoo');
|
||||
profile.hasPiercing = !!bio['Body Art'].match('Piercing');
|
||||
}
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
@@ -5,8 +5,6 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
async function scrapeProfileFrontpage(html, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
@@ -18,55 +16,47 @@ async function scrapeProfileFrontpage(html, url, name) {
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const birthdate = birthdateString && birthdateString !== 'Unknown (Add)'
|
||||
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
|
||||
: null;
|
||||
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
const naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
|
||||
const residenceCountryName = bio['Country of Origin:'];
|
||||
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
||||
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
||||
const birthPlace = bio['Place of Birth:'];
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const hair = bio['Hair Color:'].toLowerCase();
|
||||
const eyes = bio['Eye Color:'].toLowerCase();
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
const piercings = hasPiercings && piercingsString;
|
||||
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
const tattoos = hasTattoos && tattoosString;
|
||||
|
||||
const social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
bio: {
|
||||
name,
|
||||
gender: 'female',
|
||||
birthdate,
|
||||
residenceCountry,
|
||||
birthPlace,
|
||||
naturalBoobs,
|
||||
bust,
|
||||
waist,
|
||||
hip,
|
||||
hair,
|
||||
eyes,
|
||||
piercings,
|
||||
tattoos,
|
||||
social,
|
||||
},
|
||||
profile,
|
||||
url: bioUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeProfileBio(html, frontpageBio, url, name) {
|
||||
async function scrapeProfileBio(html, frontpageProfile, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
|
||||
@@ -75,58 +65,46 @@ async function scrapeProfileBio(html, frontpageBio, url, name) {
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const birthdate = birthdateString && birthdateString !== 'Unknown'
|
||||
? moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate()
|
||||
: null;
|
||||
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const [bust, waist, hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
const boobsNatural = bio['Fake boobs:'] === 'No';
|
||||
const ethnicity = bio['Ethnicity:'];
|
||||
|
||||
const residenceCountryName = bio['Country of Origin:'];
|
||||
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
|
||||
const residenceCountry = countryEntry ? countryEntry.alpha2 : null;
|
||||
const birthPlace = bio['Place of Birth:'];
|
||||
|
||||
const hair = bio['Hair Color:'].toLowerCase();
|
||||
const eyes = bio['Eye Color:'].toLowerCase();
|
||||
const height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const hasPiercings = !!(piercingsString !== undefined && piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
const piercings = hasPiercings && piercingsString;
|
||||
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const hasTattoos = !!(tattoosString !== undefined && tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
const tattoos = hasTattoos && tattoosString;
|
||||
|
||||
const social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
...frontpageBio,
|
||||
const profile = {
|
||||
...frontpageProfile,
|
||||
name,
|
||||
gender: 'female',
|
||||
birthdate,
|
||||
residenceCountry,
|
||||
birthPlace,
|
||||
ethnicity,
|
||||
naturalBoobs: boobsNatural,
|
||||
bust,
|
||||
waist,
|
||||
hip,
|
||||
height,
|
||||
weight,
|
||||
hair,
|
||||
eyes,
|
||||
hasPiercings,
|
||||
hasTattoos,
|
||||
piercings,
|
||||
tattoos,
|
||||
social,
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
|
||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
||||
profile.ethnicity = bio['Ethnicity:'];
|
||||
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
@@ -148,10 +126,10 @@ async function fetchProfile(actorName) {
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -224,13 +224,15 @@ function scrapeProfile(html, url, actorName) {
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
|
||||
|
||||
if (avatarEl) {
|
||||
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
|
||||
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
|
||||
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
|
||||
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7).trim();
|
||||
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7).trim();
|
||||
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6).trim();
|
||||
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6).trim();
|
||||
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6).trim();
|
||||
|
||||
profile.avatar = src3 || src2 || src1 || src0 || src;
|
||||
const avatar = src3 || src2 || src1 || src0 || src;
|
||||
|
||||
if (avatar) profile.avatar = avatar;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
|
||||
@@ -70,34 +70,6 @@ function scrapeLatest(html, site) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
const birthCountryName = bio.Nationality;
|
||||
|
||||
if (birthCountryName) {
|
||||
const countryEntry = await knex('countries').where({ name: birthCountryName }).first();
|
||||
|
||||
if (countryEntry) profile.birthCountry = countryEntry.alpha2;
|
||||
}
|
||||
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, useGallery) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new VideoPlayer")').html();
|
||||
@@ -158,6 +130,28 @@ async function scrapeScene(html, url, site, useGallery) {
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
profile.birthPlace = bio.Nationality;
|
||||
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
const ethnicityMap = {
|
||||
White: 'Caucasian',
|
||||
};
|
||||
@@ -14,10 +12,6 @@ const hairMap = {
|
||||
Brunette: 'brown',
|
||||
};
|
||||
|
||||
const countryMap = {
|
||||
'United States of America': 'United States',
|
||||
};
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
@@ -28,9 +22,7 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]');
|
||||
const birthPlaceString = bio['Birth Place'] || bio.Birthplace;
|
||||
const residencePlaceString = bio['City and Country'];
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
@@ -38,35 +30,20 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
if (bio.Birthday) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (birthPlaceString) {
|
||||
const birthPlaceSegments = birthPlaceString.split(',');
|
||||
const birthCountryName = birthPlaceSegments.slice(-1)[0].trim();
|
||||
const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first();
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
|
||||
profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (residencePlaceString) {
|
||||
const residencePlaceSegments = residencePlaceString.split(',');
|
||||
const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim();
|
||||
const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first();
|
||||
|
||||
profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim();
|
||||
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
|
||||
}
|
||||
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-').map(measurement => parseInt(measurement, 10) || null);
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.hasTattoos === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
28
src/utils/resolve-place.js
Normal file
28
src/utils/resolve-place.js
Normal file
@@ -0,0 +1,28 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
async function resolvePlace(query) {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
|
||||
const [item] = res.body;
|
||||
|
||||
if (item && item.address) {
|
||||
const rawPlace = item.address;
|
||||
const place = {};
|
||||
|
||||
if (rawPlace.city) place.city = rawPlace.city;
|
||||
if (rawPlace.state) place.state = rawPlace.state;
|
||||
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
|
||||
if (rawPlace.continent) place.continent = rawPlace.continent;
|
||||
|
||||
return place;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = resolvePlace;
|
||||
Reference in New Issue
Block a user