traxxx/src/actors.js

156 lines
4.8 KiB
JavaScript
Raw Normal View History

'use strict';
2019-11-17 02:56:45 +00:00
const Promise = require('bluebird');
const knex = require('./knex');
2019-11-17 02:56:45 +00:00
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or');
async function curateActor(actor) {
const aliases = await knex('actors')
.where({ alias_for: actor.id });
return {
id: actor.id,
name: actor.name,
description: actor.description,
birthdate: actor.birthdate && new Date(actor.birthdate),
country: actor.country_alpha2,
2019-11-17 02:56:45 +00:00
residencePlace: actor.residence_place,
residenceCountry: actor.residence_country_alpha2
? {
alpha2: actor.residence_country_alpha2,
name: actor.residence_country_name,
}
: null,
birthPlace: actor.birth_place,
birthCountry: actor.birth_country_alpha2
? {
alpha2: actor.birth_country_alpha2,
name: actor.birth_country_name,
}
: null,
ethnicity: actor.ethnicity,
height: actor.height,
boobSize: actor.boobs_size,
boobsNatural: actor.boobs_natural,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
};
}
function curateActors(releases) {
return Promise.all(releases.map(async release => curateActor(release)));
}
2019-11-17 02:56:45 +00:00
function curateScrapedActor(actor) {
return {
id: actor.id,
name: actor.name,
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
birthdate: actor.birthdate,
description: actor.description,
gender: actor.gender,
ethnicity: actor.ethnicity,
birth_country_alpha2: actor.birthCountry,
residence_country_alpha2: actor.residenceCountry,
birth_place: actor.birthPlace,
residence_place: actor.residencePlace,
active: actor.active,
boobs_size: actor.boobs && actor.boobs.size,
boobs_natural: actor.boobs && actor.boobs.natural,
height: actor.height,
hair: actor.hair,
eyes: actor.eyes,
tattoos: actor.tattoos,
piercings: actor.piercings,
};
}
async function fetchActors(queryObject) {
const releases = await knex('actors')
2019-11-17 02:56:45 +00:00
.select(
'actors.*',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name',
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name',
)
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
.where(builder => whereOr(queryObject, 'actors', builder))
.limit(100);
return curateActors(releases);
}
2019-11-17 02:56:45 +00:00
async function storeActor(actor) {
const curatedActor = curateScrapedActor(actor);
const actorEntries = await knex('actors')
.insert(curatedActor)
.returning('*');
if (actorEntries.length) {
const actorEntry = actorEntries[0];
console.log(`Added new entry for actor '${actor.name}'`);
return actorEntry;
}
console.error(`Unable to save profile for '${actor.name}'`);
return null;
}
async function updateActor(actorEntry, actor) {
const curatedActor = curateScrapedActor(actor);
const actorEntries = await knex('actors')
.where({ id: actorEntry.id })
.update(curatedActor)
.returning('*');
console.log(`Updated entry for actor '${actor.name}'`);
return actorEntries[0];
}
async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => {
const [actorEntry] = await fetchActors({ name: actorName });
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorName)));
if (actorEntry) {
return updateActor(actorEntry, profiles[0]);
}
return storeActor(profiles[0]);
}, {
concurrency: 5,
});
}
async function storeActors(release, releaseEntry) {
2019-11-17 02:56:45 +00:00
const actorEntries = await knex('actors').whereIn('name', release.actors);
const newActors = release.actors
.map(actorName => actorName.trim())
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
const newActorEntries = await Promise.all(newActors.map(async actorName => storeActor({ name: actorName })));
2019-11-17 02:56:45 +00:00
await knex('actors_associated')
.insert(actorEntries.concat(newActorEntries).map(actor => ({
release_id: releaseEntry.id,
actor_id: actor.id,
})), '*');
2019-11-17 02:56:45 +00:00
scrapeActors(newActorEntries.map(actor => actor.name));
}
module.exports = {
fetchActors,
2019-11-17 02:56:45 +00:00
scrapeActors,
storeActors,
};