traxxx/src/actors.js

260 lines
8.6 KiB
JavaScript
Raw Normal View History

'use strict';
2019-11-17 02:56:45 +00:00
const Promise = require('bluebird');
const knex = require('./knex');
2019-11-17 02:56:45 +00:00
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or');
const { createActorMediaDirectory, storeAvatars } = require('./media');
async function curateActor(actor) {
const [aliases, avatars] = await Promise.all([
knex('actors').where({ alias_for: actor.id }),
knex('media')
.where({ domain: 'actors', target_id: actor.id })
.orderBy('index'),
]);
return {
id: actor.id,
gender: actor.gender,
name: actor.name,
description: actor.description,
birthdate: actor.birthdate && new Date(actor.birthdate),
country: actor.country_alpha2,
2019-11-17 02:56:45 +00:00
residencePlace: actor.residence_place,
residenceCountry: actor.residence_country_alpha2
? {
alpha2: actor.residence_country_alpha2,
name: actor.residence_country_name,
}
: null,
birthPlace: actor.birth_place,
birthCountry: actor.birth_country_alpha2
? {
alpha2: actor.birth_country_alpha2,
name: actor.birth_country_name,
}
: null,
ethnicity: actor.ethnicity,
height: actor.height,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
avatars,
};
}
function curateActors(releases) {
return Promise.all(releases.map(async release => curateActor(release)));
}
function curateActorEntry(actor, scraped, scrapeSuccess) {
const curatedActor = {
name: actor.name
.split(' ')
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
.join(' '),
2019-11-17 02:56:45 +00:00
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
birthdate: actor.birthdate,
description: actor.description,
gender: actor.gender,
ethnicity: actor.ethnicity,
birth_country_alpha2: actor.birthCountry,
residence_country_alpha2: actor.residenceCountry,
birth_place: actor.birthPlace,
residence_place: actor.residencePlace,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
natural_boobs: actor.naturalBoobs,
2019-11-17 02:56:45 +00:00
height: actor.height,
weight: actor.weight,
2019-11-17 02:56:45 +00:00
hair: actor.hair,
eyes: actor.eyes,
has_tattoos: actor.hasTattoos,
has_piercings: actor.hasPiercings,
2019-11-17 02:56:45 +00:00
tattoos: actor.tattoos,
piercings: actor.piercings,
};
if (actor.id) {
curatedActor.id = actor.id;
}
if (scraped) {
curatedActor.scraped_at = new Date();
curatedActor.scrape_success = scrapeSuccess;
}
return curatedActor;
2019-11-17 02:56:45 +00:00
}
async function fetchActors(queryObject) {
const releases = await knex('actors')
2019-11-17 02:56:45 +00:00
.select(
'actors.*',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name',
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name',
)
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
.where(builder => whereOr(queryObject, 'actors', builder))
.limit(100);
return curateActors(releases);
}
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
2019-11-17 02:56:45 +00:00
const actorEntries = await knex('actors')
.insert(curatedActor)
.returning('*');
if (actorEntries.length) {
const actorEntry = actorEntries[0];
console.log(`Added new entry for actor '${actor.name}'`);
return actorEntry;
}
console.error(`Unable to save profile for '${actor.name}'`);
return null;
}
async function updateActor(actor, scraped = false, scrapeSuccess = false) {
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
2019-11-17 02:56:45 +00:00
const actorEntries = await knex('actors')
.where({ id: actor.id })
2019-11-17 02:56:45 +00:00
.update(curatedActor)
.returning('*');
console.log(`Updated entry for actor '${actor.name}'`);
return actorEntries[0];
}
function mergeProfiles(profiles, actor) {
return profiles.reduce((prevProfile, profile) => {
if (profile === null) {
return prevProfile;
}
return {
id: actor ? actor.id : null,
name: actor ? actor.name : profile.name,
description: prevProfile.description || profile.description,
gender: prevProfile.gender || profile.gender,
birthdate: Number.isNaN(prevProfile.birthdate) ? profile.birthdate : prevProfile.birthdate,
birthCountry: prevProfile.birthCountry || profile.birthCountry,
residenceCountry: prevProfile.residenceCountry || profile.residenceCountry,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || profile.bust,
waist: prevProfile.waist || profile.waist,
hip: prevProfile.hip || profile.hip,
naturalBoobs: prevProfile.naturalBoobs || profile.naturalBoobs,
height: prevProfile.height || profile.height,
weight: prevProfile.weight || profile.weight,
hair: prevProfile.hair || profile.hair,
eyes: prevProfile.eyes || profile.eyes,
hasPiercings: prevProfile.hasPiercings || profile.hasPiercings,
hasTattoos: prevProfile.hasTattoos || profile.hasTattoos,
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
avatars: prevProfile.avatars.concat(profile.avatar || []),
};
}, {
social: [],
avatars: [],
...actor,
});
}
2019-11-17 02:56:45 +00:00
async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const profiles = await Promise.all(
Object.values(scrapers.actors)
.map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)),
);
const profile = mergeProfiles(profiles, actorEntry);
2019-11-17 02:56:45 +00:00
if (profile === null) {
console.log(`Could not find profile for actor '${actorName}'`);
await updateActor(profile, true, false);
return;
2019-11-17 02:56:45 +00:00
}
if (actorEntry && profile) {
await createActorMediaDirectory(profile, actorEntry);
await Promise.all([
updateActor(profile, true, true),
storeAvatars(profile, actorEntry),
]);
return;
}
const newActorEntry = await storeActor(profile, true, true);
await createActorMediaDirectory(profile, newActorEntry);
await storeAvatars(profile, newActorEntry);
2019-11-17 02:56:45 +00:00
}, {
concurrency: 3,
2019-11-17 02:56:45 +00:00
});
}
async function scrapeBasicActors() {
const basicActors = await knex('actors').where('scraped_at', null);
return scrapeActors(basicActors.map(actor => actor.name));
}
async function associateActors(release, releaseId) {
2019-11-17 02:56:45 +00:00
const actorEntries = await knex('actors').whereIn('name', release.actors);
const newActors = release.actors
.map(actorName => actorName.trim())
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
const [newActorEntries, associatedActors] = await Promise.all([
Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
knex('actors_associated').where('release_id', releaseId),
]);
const newlyAssociatedActors = actorEntries
.concat(newActorEntries)
.filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
.map(actor => ({
release_id: releaseId,
2019-11-17 02:56:45 +00:00
actor_id: actor.id,
}));
await knex('actors_associated')
.insert(newlyAssociatedActors);
}
module.exports = {
associateActors,
fetchActors,
2019-11-17 02:56:45 +00:00
scrapeActors,
scrapeBasicActors,
};