'use strict'; const Promise = require('bluebird'); const knex = require('./knex'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const whereOr = require('./utils/where-or'); const resolvePlace = require('./utils/resolve-place'); const { createActorMediaDirectory, storeAvatars } = require('./media'); async function curateActor(actor) { const [aliases, photos, social] = await Promise.all([ knex('actors').where({ alias_for: actor.id }), knex('media') .where({ domain: 'actors', target_id: actor.id }) .orderBy('index'), knex('social') .where({ domain: 'actors', target_id: actor.id }), ]); const curatedActor = { id: actor.id, gender: actor.gender, name: actor.name, description: actor.description, birthdate: actor.birthdate && new Date(actor.birthdate), country: actor.country_alpha2, origin: (actor.birth_city || actor.birth_state || actor.birth_country_alpha2) ? {} : null, residence: (actor.residence_city || actor.residence_state || actor.residence_country_alpha2) ? {} : null, ethnicity: actor.ethnicity, height: actor.height, weight: actor.weight, bust: actor.bust, waist: actor.waist, hip: actor.hip, naturalBoobs: actor.natural_boobs, aliases: aliases.map(({ name }) => name), slug: actor.slug, avatar: photos.find(photo => photo.role === 'avatar'), photos: photos.filter(photo => photo.role === 'photo'), hasTattoos: actor.has_tattoos, hasPiercings: actor.has_piercings, tattoos: actor.tattoos, piercings: actor.piercings, social, scrapedAt: actor.scraped_at, }; if (actor.birth_city) curatedActor.origin.city = actor.birth_city; if (actor.birth_state) curatedActor.origin.state = actor.birth_state; if (actor.birth_country_alpha2) { curatedActor.origin.country = { alpha2: actor.birth_country_alpha2, name: actor.birth_country_name, }; } if (actor.residence_city) curatedActor.residence.city = actor.residence_city; if (actor.residence_state) curatedActor.residence.state = actor.residence_state; if (actor.residence_country_alpha2) { curatedActor.residence.country = { alpha2: actor.residence_country_alpha2, name: actor.residence_country_name, }; } return curatedActor; } function curateActors(releases) { return Promise.all(releases.map(async release => curateActor(release))); } function curateActorEntry(actor, scraped, scrapeSuccess) { const curatedActor = { name: actor.name .split(' ') .map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`) .join(' '), slug: actor.name.toLowerCase().replace(/\s+/g, '-'), birthdate: actor.birthdate, description: actor.description, gender: actor.gender, ethnicity: actor.ethnicity, bust: actor.bust, waist: actor.waist, hip: actor.hip, natural_boobs: actor.naturalBoobs, height: actor.height, weight: actor.weight, hair: actor.hair, eyes: actor.eyes, has_tattoos: actor.hasTattoos, has_piercings: actor.hasPiercings, tattoos: actor.tattoos, piercings: actor.piercings, }; if (actor.id) { curatedActor.id = actor.id; } if (actor.birthPlace) { curatedActor.birth_city = actor.birthPlace.city; curatedActor.birth_state = actor.birthPlace.state; curatedActor.birth_country_alpha2 = actor.birthPlace.country; } if (actor.residencePlace) { curatedActor.residence_city = actor.residencePlace.city; curatedActor.residence_state = actor.residencePlace.state; curatedActor.residence_country_alpha2 = actor.residencePlace.country; } if (scraped) { curatedActor.scraped_at = new Date(); curatedActor.scrape_success = scrapeSuccess; } return curatedActor; } function curateSocialEntry(url, actorId) { const { hostname, origin, pathname } = new URL(url); const platform = ['facebook', 'twitter', 'instagram', 'tumblr', 'snapchat', 'amazon', 'youtube', 'fancentro'].find(platformName => hostname.match(platformName)); return { url: `${origin}${pathname}`, platform, domain: 'actors', target_id: actorId, }; } async function curateSocialEntries(urls, actorId) { if (!urls) { return []; } const existingSocialLinks = await knex('social').where({ domain: 'actors', target_id: actorId, }); return urls.reduce((acc, url) => { const socialEntry = curateSocialEntry(url, actorId); if (acc.some(entry => socialEntry.url === entry.url) || existingSocialLinks.some(entry => socialEntry.url === entry.url)) { // prevent duplicates return acc; } return [...acc, socialEntry]; }, []); } async function fetchActors(queryObject) { const releases = await knex('actors') .select( 'actors.*', 'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name', ) .leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2') .leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2') .where(builder => whereOr(queryObject, 'actors', builder)) .limit(100); return curateActors(releases); } async function storeSocialLinks(urls, actorId) { const curatedSocialEntries = await curateSocialEntries(urls, actorId); await knex('social').insert(curatedSocialEntries); } async function storeActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .insert(curatedActor) .returning('*'); await storeSocialLinks(actor.social, actorEntry.id); console.log(`Added new entry for actor '${actor.name}'`); return actorEntry; } async function updateActor(actor, scraped = false, scrapeSuccess = false) { const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const [actorEntry] = await knex('actors') .where({ id: actor.id }) .update(curatedActor) .returning('*'); await storeSocialLinks(actor.social, actor.id); console.log(`Updated entry for actor '${actor.name}'`); return actorEntry; } async function mergeProfiles(profiles, actor) { const mergedProfile = profiles.reduce((prevProfile, profile) => { if (profile === null) { return prevProfile; } return { id: actor ? actor.id : null, name: actor ? actor.name : profile.name, description: prevProfile.description || profile.description, gender: prevProfile.gender || profile.gender, birthdate: Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate, birthPlace: prevProfile.birthPlace || profile.birthPlace, residencePlace: prevProfile.residencePlace || profile.residencePlace, ethnicity: prevProfile.ethnicity || profile.ethnicity, bust: prevProfile.bust || profile.bust, waist: prevProfile.waist || profile.waist, hip: prevProfile.hip || profile.hip, naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs, height: prevProfile.height || profile.height, weight: prevProfile.weight || profile.weight, hair: prevProfile.hair || profile.hair, eyes: prevProfile.eyes || profile.eyes, hasPiercings: prevProfile.hasPiercings === undefined ? profile.hasPiercings : prevProfile.hasPiercings, hasTattoos: prevProfile.hasTattoos === undefined ? profile.hasTattoos : prevProfile.hasTattoos, piercings: prevProfile.piercings || profile.piercings, tattoos: prevProfile.tattoos || profile.tattoos, social: prevProfile.social.concat(profile.social || []), avatars: prevProfile.avatars.concat(profile.avatar || []), }; }, { social: [], avatars: [], }); const [birthPlace, residencePlace] = await Promise.all([ resolvePlace(mergedProfile.birthPlace), resolvePlace(mergedProfile.residencePlace), ]); mergedProfile.birthPlace = birthPlace; mergedProfile.residencePlace = residencePlace; return mergedProfile; } async function scrapeActors(actorNames) { await Promise.map(actorNames || argv.actors, async (actorName) => { try { const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); const actorEntry = await knex('actors').where({ slug: actorSlug }).first(); const profiles = await Promise.map(Object.entries(scrapers.actors), async ([scraperSlug, scraper]) => { const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName); return { scraper: scraperSlug, ...profile, }; }); const profile = await mergeProfiles(profiles, actorEntry); if (profile === null) { console.log(`Could not find profile for actor '${actorName}'`); if (argv.save) { await updateActor(profile, true, false); } return; } if (argv.save) { if (actorEntry && profile) { await createActorMediaDirectory(profile, actorEntry); await Promise.all([ updateActor(profile, true, true), storeAvatars(profile, actorEntry), ]); return; } const newActorEntry = await storeActor(profile, true, true); await createActorMediaDirectory(profile, newActorEntry); await storeAvatars(profile, newActorEntry); } } catch (error) { console.warn(actorName, error); } }, { concurrency: 3, }); } async function scrapeBasicActors() { const basicActors = await knex('actors').where('scraped_at', null); return scrapeActors(basicActors.map(actor => actor.name)); } async function associateActors(release, releaseId) { const actorEntries = await knex('actors').whereIn('name', release.actors); const newActors = release.actors .map(actorName => actorName.trim()) .filter(actorName => !actorEntries.some(actor => actor.name === actorName)); const [newActorEntries, associatedActors] = await Promise.all([ Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))), knex('actors_associated').where('release_id', releaseId), ]); const newlyAssociatedActors = actorEntries .concat(newActorEntries) .filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id)) .map(actor => ({ release_id: releaseId, actor_id: actor.id, })); await knex('actors_associated') .insert(newlyAssociatedActors); } module.exports = { associateActors, fetchActors, scrapeActors, scrapeBasicActors, };