'use strict'; const Promise = require('bluebird'); const UrlPattern = require('url-pattern'); const knex = require('./knex'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const whereOr = require('./utils/where-or'); const resolvePlace = require('./utils/resolve-place'); const { createActorMediaDirectory, storeAvatars } = require('./media'); async function curateActor(actor) { const [aliases, photos, social] = await Promise.all([ knex('actors').where({ alias_for: actor.id }), knex('media') .where({ domain: 'actors', target_id: actor.id }) .orderBy('index'), knex('social') .where({ domain: 'actors', target_id: actor.id }) .orderBy('platform', 'desc'), ]); const curatedActor = { id: actor.id, gender: actor.gender, name: actor.name, description: actor.description, birthdate: actor.birthdate && new Date(actor.birthdate), country: actor.country_alpha2, origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null, residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null, ethnicity: actor.ethnicity, height: actor.height, weight: actor.weight, bust: actor.bust, waist: actor.waist, hip: actor.hip, naturalBoobs: actor.natural_boobs, aliases: aliases.map(({ name }) => name), slug: actor.slug, avatar: photos.find(photo => photo.role === 'avatar'), photos: photos.filter(photo => photo.role === 'photo'), hasTattoos: actor.has_tattoos, hasPiercings: actor.has_piercings, tattoos: actor.tattoos, piercings: actor.piercings, social, scrapedAt: actor.scraped_at, }; if (actor.birth_city) curatedActor.origin.city = actor.birth_city; if (actor.birth_state) curatedActor.origin.state = actor.birth_state; if (actor.birth_country_alpha2) { curatedActor.origin.country = { alpha2: actor.birth_country_alpha2, name: actor.birth_country_name, alias: actor.birth_country_alias, }; } if (actor.residence_city) curatedActor.residence.city = actor.residence_city; if (actor.residence_state) curatedActor.residence.state = actor.residence_state; if (actor.residence_country_alpha2) { curatedActor.residence.country = { alpha2: actor.residence_country_alpha2, name: actor.residence_country_name, alias: actor.residence_country_alias, }; } return curatedActor; } function curateActors(releases) { return Promise.all(releases.map(async release => curateActor(release))); } function curateActorEntry(actor, scraped, scrapeSuccess) { const curatedActor = { name: actor.name .split(' ') .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`) .join(' '), slug: actor.name.toLowerCase().replace(/\s+/g, '-'), birthdate: actor.birthdate, description: actor.description, gender: actor.gender, ethnicity: actor.ethnicity, bust: actor.bust, waist: actor.waist, hip: actor.hip, natural_boobs: actor.naturalBoobs, height: actor.height, weight: actor.weight, hair: actor.hair, eyes: actor.eyes, has_tattoos: actor.hasTattoos, has_piercings: actor.hasPiercings, tattoos: actor.tattoos, piercings: actor.piercings, }; if (actor.id) { curatedActor.id = actor.id; } if (actor.birthPlace) { curatedActor.birth_city = actor.birthPlace.city; curatedActor.birth_state = actor.birthPlace.state; curatedActor.birth_country_alpha2 = actor.birthPlace.country; } if (actor.residencePlace) { curatedActor.residence_city = actor.residencePlace.city; curatedActor.residence_state = actor.residencePlace.state; curatedActor.residence_country_alpha2 = actor.residencePlace.country; } if (scraped) { curatedActor.scraped_at = new Date(); curatedActor.scrape_success = scrapeSuccess; } return curatedActor; } function curateSocialEntry(url, actorId) { const platforms = [ // links supplied by PH often look like domain.com/domain.com/username { label: 'twitter', pattern: 'http(s)\\://(*)twitter.com/:username(/)(?*)', format: username => `https://www.twitter.com/${username}`, }, { label: 'youtube', pattern: 'http(s)\\://(*)youtube.com/channel/:username(?*)', format: username => `https://www.youtube.com/channel/${username}`, }, { label: 'instagram', pattern: 'http(s)\\://(*)instagram.com/:username(/)(?*)', format: username => `https://www.instagram.com/${username}`, }, { label: 'snapchat', pattern: 'http(s)\\://(*)snapchat.com/add/:username(/)(?*)', format: username => `https://www.snapchat.com/add/${username}`, }, { label: 'tumblr', pattern: 'http(s)\\://:username.tumblr.com(*)', format: username => `https://${username}.tumblr.com`, }, { label: 'onlyfans', pattern: 'http(s)\\://(*)onlyfans.com/:username(/)(?*)', format: username => `https://www.onlyfans.com/${username}`, }, { label: 'fancentro', pattern: 'http(s)\\://(*)fancentro.com/:username(/)(?*)', format: username => `https://www.fancentro.com/${username}`, }, { label: 'modelhub', pattern: 'http(s)\\://(*)modelhub.com/:username(/)(?*)', format: username => `https://www.modelhub.com/${username}`, }, ]; const match = platforms.reduce((acc, platform) => { if (acc) return acc; const patternMatch = new UrlPattern(platform.pattern).match(url); if (patternMatch) { return { platform: platform.label, original: url, username: patternMatch.username, url: platform.format ? platform.format(patternMatch.username) : url, }; } return null; }, null) || { url }; return { url: match.url, platform: match.platform, domain: 'actors', target_id: actorId, }; } async function curateSocialEntries(urls, actorId) { if (!urls) { return []; } const existingSocialLinks = await knex('social').where({ domain: 'actors', target_id: actorId, }); return urls.reduce((acc, url) => { const socialEntry = curateSocialEntry(url, actorId); if (acc.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase()) || existingSocialLinks.some(entry => socialEntry.url.toLowerCase() === entry.url.toLowerCase())) { // prevent duplicates return acc; } return [...acc, socialEntry]; }, []); } async function fetchActors(queryObject, limit = 100) { const releases = await knex('actors') .select( 'actors.*', 'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias', 'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', 'residence_countries.alias as residence_country_alias', ) .leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2') .leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2') .orderBy(['actors.name', 'actors.gender']) .where(builder => whereOr(queryObject, 'actors', builder)) .limit(limit); return curateActors(releases); } async function storeSocialLinks(urls, actorId) { const curatedSocialEntries = await curateSocialEntries(urls, actorId); await knex('social').insert(curatedSocialEntries); } async function storeActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .insert(curatedActor) .returning('*'); await storeSocialLinks(actor.social, actorEntry.id); console.log(`Added new entry for actor '${actor.name}'`); return actorEntry; } async function updateActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .where({ id: actor.id }) .update(curatedActor) .returning('*'); await storeSocialLinks(actor.social, actor.id); console.log(`Updated entry for actor '${actor.name}'`); return actorEntry; } async function mergeProfiles(profiles, actor) { const mergedProfile = profiles.reduce((prevProfile, profile) => { if (profile === null) { return prevProfile; } return { id: actor ? actor.id : null, name: actor ? actor.name : (prevProfile.name || profile.name), description: prevProfile.description || profile.description, gender: prevProfile.gender || profile.gender, birthdate: Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate, birthPlace: prevProfile.birthPlace || profile.birthPlace, residencePlace: prevProfile.residencePlace || profile.residencePlace, ethnicity: prevProfile.ethnicity || profile.ethnicity, bust: prevProfile.bust || profile.bust, waist: prevProfile.waist || profile.waist, hip: prevProfile.hip || profile.hip, naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs, height: prevProfile.height || profile.height, weight: prevProfile.weight || profile.weight, hair: prevProfile.hair || profile.hair, eyes: prevProfile.eyes || profile.eyes, hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings, hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos, piercings: prevProfile.piercings || profile.piercings, tattoos: prevProfile.tattoos || profile.tattoos, social: prevProfile.social.concat(profile.social || []), avatars: prevProfile.avatars.concat(profile.avatar || []), }; }, { social: [], avatars: [], }); const [birthPlace, residencePlace] = await Promise.all([ resolvePlace(mergedProfile.birthPlace), resolvePlace(mergedProfile.residencePlace), ]); mergedProfile.birthPlace = birthPlace; mergedProfile.residencePlace = residencePlace; return mergedProfile; } async function scrapeActors(actorNames) { await Promise.map(actorNames || argv.actors, async (actorName) => { try { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); const sources = argv.sources ? argv.sources.map(source => [source, scrapers.actors[source]]) : Object.entries(scrapers.actors); const profiles = await Promise.map(sources, async ([scraperSlug, scraper]) => { const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName); return { scraper: scraperSlug, ...profile, }; }); const profile = await mergeProfiles(profiles, actorEntry); if (profile === null) { console.log(`Could not find profile for actor '${actorName}'`); if (argv.save) { await updateActor(profile, true, false); } return; } if (argv.save) { if (actorEntry && profile) { await createActorMediaDirectory(profile, actorEntry); await Promise.all([ updateActor(profile, true, true), storeAvatars(profile, actorEntry), ]); return; } const newActorEntry = await storeActor(profile, true, true); await createActorMediaDirectory(profile, newActorEntry); await storeAvatars(profile, newActorEntry); } } catch (error) { console.warn(actorName, error); } }, { concurrency: 3, }); } async function scrapeBasicActors() { const basicActors = await knex('actors').where('scraped_at', null); return scrapeActors(basicActors.map(actor => actor.name)); } async function associateActors(mappedActors, releases) { const [existingActorEntries, existingAssociationEntries] = await Promise.all([ knex('actors').whereIn('name', Object.keys(mappedActors)), knex('actors_associated').whereIn('release_id', releases.map(release => release.id)), ]); const associations = await Promise.map(Object.entries(mappedActors), async ([actorName, releaseIds]) => { const actorEntry = existingActorEntries.find(actor => actor.name === actorName) || await storeActor({ name: actorName }); return releaseIds .map(releaseId => ({ release_id: releaseId, actor_id: actorEntry.id, })) .filter(association => !existingAssociationEntries // remove associations already in database .some(associationEntry => associationEntry.actor_id === association.actor_id && associationEntry.release_id === association.release_id)); }); await Promise.all([ knex('actors_associated').insert(associations.flat()), scrapeBasicActors(), ]); } module.exports = { associateActors, fetchActors, scrapeActors, scrapeBasicActors, };