'use strict'; const Promise = require('bluebird'); const knex = require('./knex'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const whereOr = require('./utils/where-or'); const { createActorMediaDirectory, storeAvatars } = require('./media'); async function curateActor(actor) { const [aliases, avatars, social] = await Promise.all([ knex('actors').where({ alias_for: actor.id }), knex('media') .where({ domain: 'actors', target_id: actor.id }) .orderBy('index'), knex('social') .where({ domain: 'actors', target_id: actor.id }), ]); return { id: actor.id, gender: actor.gender, name: actor.name, description: actor.description, birthdate: actor.birthdate && new Date(actor.birthdate), country: actor.country_alpha2, residencePlace: actor.residence_place, residenceCountry: actor.residence_country_alpha2 ? { alpha2: actor.residence_country_alpha2, name: actor.residence_country_name, } : null, birthPlace: actor.birth_place, birthCountry: actor.birth_country_alpha2 ? { alpha2: actor.birth_country_alpha2, name: actor.birth_country_name, } : null, ethnicity: actor.ethnicity, height: actor.height, bust: actor.bust, waist: actor.waist, hip: actor.hip, naturalBoobs: actor.natural_boobs, aliases: aliases.map(({ name }) => name), slug: actor.slug, avatars, social, }; } function curateActors(releases) { return Promise.all(releases.map(async release => curateActor(release))); } function curateActorEntry(actor, scraped, scrapeSuccess) { const curatedActor = { name: actor.name .split(' ') .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`) .join(' '), slug: actor.name.toLowerCase().replace(/\s+/g, '-'), birthdate: actor.birthdate, description: actor.description, gender: actor.gender, ethnicity: actor.ethnicity, birth_country_alpha2: actor.birthCountry, residence_country_alpha2: actor.residenceCountry, birth_place: actor.birthPlace, residence_place: actor.residencePlace, bust: actor.bust, waist: actor.waist, hip: actor.hip, natural_boobs: actor.naturalBoobs, height: actor.height, weight: actor.weight, hair: actor.hair, eyes: actor.eyes, has_tattoos: actor.hasTattoos, has_piercings: actor.hasPiercings, tattoos: actor.tattoos, piercings: actor.piercings, }; if (actor.id) { curatedActor.id = actor.id; } if (scraped) { curatedActor.scraped_at = new Date(); curatedActor.scrape_success = scrapeSuccess; } return curatedActor; } function curateSocialEntry(url, actor) { const { hostname, origin, pathname } = new URL(url); const platform = ['twitter', 'instagram', 'snapchat', 'modelhub', 'youtube'].find(platformName => hostname.match(platformName)); return { url: `${origin}${pathname}`, platform, domain: 'actors', target_id: actor.id, }; } function curateSocialEntries(urls, actor) { if (!urls) { return []; } return urls.reduce((acc, url) => { const socialEntry = curateSocialEntry(url, actor); if (acc.some(entry => socialEntry.url === entry.url)) { // prevent duplicates return acc; } return [...acc, socialEntry]; }, []); } async function fetchActors(queryObject) { const releases = await knex('actors') .select( 'actors.*', 'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', ) .leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2') .leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2') .where(builder => whereOr(queryObject, 'actors', builder)) .limit(100); return curateActors(releases); } async function storeActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .insert(curatedActor) .returning('*'); await knex('social').insert(curateSocialEntries(actor.social, actor)); console.log(`Added new entry for actor '${actor.name}'`); return actorEntry; } async function updateActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .where({ id: actor.id }) .update(curatedActor) .returning('*'); await knex('social').insert(curateSocialEntries(actor.social, actor)); console.log(`Updated entry for actor '${actor.name}'`); return actorEntry; } function mergeProfiles(profiles, actor) { return profiles.reduce((prevProfile, profile) => { if (profile === null) { return prevProfile; } return { id: actor ? actor.id : null, name: actor ? actor.name : profile.name, description: prevProfile.description || profile.description, gender: prevProfile.gender || profile.gender, birthdate: Number.isNaN(prevProfile.birthdate) ? profile.birthdate : prevProfile.birthdate, birthCountry: prevProfile.birthCountry || profile.birthCountry, residenceCountry: prevProfile.residenceCountry || profile.residenceCountry, birthPlace: prevProfile.birthPlace || profile.birthPlace, residencePlace: prevProfile.residencePlace || profile.residencePlace, ethnicity: prevProfile.ethnicity || profile.ethnicity, bust: prevProfile.bust || profile.bust, waist: prevProfile.waist || profile.waist, hip: prevProfile.hip || profile.hip, naturalBoobs: prevProfile.naturalBoobs || profile.naturalBoobs, height: prevProfile.height || profile.height, weight: prevProfile.weight || profile.weight, hair: prevProfile.hair || profile.hair, eyes: prevProfile.eyes || profile.eyes, hasPiercings: prevProfile.hasPiercings || profile.hasPiercings, hasTattoos: prevProfile.hasTattoos || profile.hasTattoos, piercings: prevProfile.piercings || profile.piercings, tattoos: prevProfile.tattoos || profile.tattoos, social: prevProfile.social.concat(profile.social || []), avatars: prevProfile.avatars.concat(profile.avatar || []), }; }, { social: [], avatars: [], }); } async function scrapeActors(actorNames) { await Promise.map(actorNames || argv.actors, async (actorName) => { try { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); const profiles = await Promise.all( Object.values(scrapers.actors) .map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)), ); const profile = mergeProfiles(profiles, actorEntry); if (profile === null) { console.log(`Could not find profile for actor '${actorName}'`); await updateActor(profile, true, false); return; } if (actorEntry && profile) { await createActorMediaDirectory(profile, actorEntry); await Promise.all([ updateActor(profile, true, true), storeAvatars(profile, actorEntry), ]); return; } const newActorEntry = await storeActor(profile, true, true); await createActorMediaDirectory(profile, newActorEntry); await storeAvatars(profile, newActorEntry); } catch (error) { console.warn(actorName, error); } }, { concurrency: 3, }); } async function scrapeBasicActors() { const basicActors = await knex('actors').where('scraped_at', null); return scrapeActors(basicActors.map(actor => actor.name)); } async function associateActors(release, releaseId) { const actorEntries = await knex('actors').whereIn('name', release.actors); const newActors = release.actors .map(actorName => actorName.trim()) .filter(actorName => !actorEntries.some(actor => actor.name === actorName)); const [newActorEntries, associatedActors] = await Promise.all([ Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))), knex('actors_associated').where('release_id', releaseId), ]); const newlyAssociatedActors = actorEntries .concat(newActorEntries) .filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id)) .map(actor => ({ release_id: releaseId, actor_id: actor.id, })); await knex('actors_associated') .insert(newlyAssociatedActors); } module.exports = { associateActors, fetchActors, scrapeActors, scrapeBasicActors, };