2019-11-10 03:20:22 +00:00
|
|
|
'use strict';
|
|
|
|
|
2019-11-17 02:56:45 +00:00
|
|
|
const Promise = require('bluebird');
|
2019-11-20 03:53:36 +00:00
|
|
|
|
2019-11-10 03:20:22 +00:00
|
|
|
const knex = require('./knex');
|
2019-11-17 02:56:45 +00:00
|
|
|
const argv = require('./argv');
|
|
|
|
const scrapers = require('./scrapers/scrapers');
|
2019-11-13 02:14:24 +00:00
|
|
|
const whereOr = require('./utils/where-or');
|
2019-11-20 03:53:36 +00:00
|
|
|
const { createActorMediaDirectory, storeAvatars } = require('./media');
|
2019-11-10 03:20:22 +00:00
|
|
|
|
|
|
|
async function curateActor(actor) {
|
2019-11-20 03:53:36 +00:00
|
|
|
const [aliases, avatars] = await Promise.all([
|
|
|
|
knex('actors').where({ alias_for: actor.id }),
|
|
|
|
knex('media').where({ domain: 'actors', target_id: actor.id }),
|
|
|
|
]);
|
2019-11-10 03:20:22 +00:00
|
|
|
|
|
|
|
return {
|
|
|
|
id: actor.id,
|
2019-11-19 03:36:15 +00:00
|
|
|
gender: actor.gender,
|
2019-11-10 03:20:22 +00:00
|
|
|
name: actor.name,
|
|
|
|
description: actor.description,
|
|
|
|
birthdate: actor.birthdate && new Date(actor.birthdate),
|
|
|
|
country: actor.country_alpha2,
|
2019-11-17 02:56:45 +00:00
|
|
|
residencePlace: actor.residence_place,
|
|
|
|
residenceCountry: actor.residence_country_alpha2
|
|
|
|
? {
|
|
|
|
alpha2: actor.residence_country_alpha2,
|
|
|
|
name: actor.residence_country_name,
|
|
|
|
}
|
|
|
|
: null,
|
|
|
|
birthPlace: actor.birth_place,
|
|
|
|
birthCountry: actor.birth_country_alpha2
|
|
|
|
? {
|
|
|
|
alpha2: actor.birth_country_alpha2,
|
|
|
|
name: actor.birth_country_name,
|
|
|
|
}
|
|
|
|
: null,
|
2019-11-10 03:20:22 +00:00
|
|
|
ethnicity: actor.ethnicity,
|
|
|
|
height: actor.height,
|
|
|
|
boobSize: actor.boobs_size,
|
|
|
|
boobsNatural: actor.boobs_natural,
|
|
|
|
aliases: aliases.map(({ name }) => name),
|
|
|
|
slug: actor.slug,
|
2019-11-20 03:53:36 +00:00
|
|
|
avatars,
|
2019-11-10 03:20:22 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
function curateActors(releases) {
|
|
|
|
return Promise.all(releases.map(async release => curateActor(release)));
|
|
|
|
}
|
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
function curateActorEntry(actor, scraped, scrapeSuccess) {
|
|
|
|
const curatedActor = {
|
2019-11-17 02:56:45 +00:00
|
|
|
id: actor.id,
|
2019-11-19 03:36:15 +00:00
|
|
|
name: actor.name
|
|
|
|
.split(' ')
|
|
|
|
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
|
|
|
|
.join(' '),
|
2019-11-17 02:56:45 +00:00
|
|
|
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
|
|
|
|
birthdate: actor.birthdate,
|
|
|
|
description: actor.description,
|
|
|
|
gender: actor.gender,
|
|
|
|
ethnicity: actor.ethnicity,
|
|
|
|
birth_country_alpha2: actor.birthCountry,
|
|
|
|
residence_country_alpha2: actor.residenceCountry,
|
|
|
|
birth_place: actor.birthPlace,
|
|
|
|
residence_place: actor.residencePlace,
|
|
|
|
boobs_size: actor.boobs && actor.boobs.size,
|
|
|
|
boobs_natural: actor.boobs && actor.boobs.natural,
|
|
|
|
height: actor.height,
|
2019-11-20 03:53:36 +00:00
|
|
|
weight: actor.weight,
|
2019-11-17 02:56:45 +00:00
|
|
|
hair: actor.hair,
|
|
|
|
eyes: actor.eyes,
|
|
|
|
tattoos: actor.tattoos,
|
|
|
|
piercings: actor.piercings,
|
|
|
|
};
|
2019-11-19 03:36:15 +00:00
|
|
|
|
|
|
|
if (scraped) {
|
|
|
|
return {
|
|
|
|
...curatedActor,
|
|
|
|
scraped_at: new Date(),
|
|
|
|
scrape_success: scrapeSuccess,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return curatedActor;
|
2019-11-17 02:56:45 +00:00
|
|
|
}
|
|
|
|
|
2019-11-13 02:14:24 +00:00
|
|
|
async function fetchActors(queryObject) {
|
2019-11-10 03:20:22 +00:00
|
|
|
const releases = await knex('actors')
|
2019-11-17 02:56:45 +00:00
|
|
|
.select(
|
|
|
|
'actors.*',
|
|
|
|
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name',
|
|
|
|
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name',
|
|
|
|
)
|
|
|
|
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
|
|
|
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
|
2019-11-13 02:14:24 +00:00
|
|
|
.where(builder => whereOr(queryObject, 'actors', builder))
|
2019-11-10 03:20:22 +00:00
|
|
|
.limit(100);
|
|
|
|
|
|
|
|
return curateActors(releases);
|
|
|
|
}
|
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
|
|
|
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const actorEntries = await knex('actors')
|
|
|
|
.insert(curatedActor)
|
|
|
|
.returning('*');
|
|
|
|
|
|
|
|
if (actorEntries.length) {
|
|
|
|
const actorEntry = actorEntries[0];
|
|
|
|
|
|
|
|
console.log(`Added new entry for actor '${actor.name}'`);
|
|
|
|
|
|
|
|
return actorEntry;
|
|
|
|
}
|
|
|
|
|
|
|
|
console.error(`Unable to save profile for '${actor.name}'`);
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
async function updateActor(actor, scraped = false, scrapeSuccess = false) {
|
2019-11-19 03:36:15 +00:00
|
|
|
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
2019-11-17 02:56:45 +00:00
|
|
|
|
|
|
|
const actorEntries = await knex('actors')
|
2019-11-20 03:53:36 +00:00
|
|
|
.where({ id: actor.id })
|
2019-11-17 02:56:45 +00:00
|
|
|
.update(curatedActor)
|
|
|
|
.returning('*');
|
|
|
|
|
|
|
|
console.log(`Updated entry for actor '${actor.name}'`);
|
|
|
|
|
|
|
|
return actorEntries[0];
|
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
function mergeProfiles(profiles, actor) {
|
|
|
|
return profiles.reduce((prevProfile, profile) => {
|
|
|
|
if (profile === null) {
|
|
|
|
return prevProfile;
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
id: actor.id,
|
|
|
|
name: actor.name,
|
|
|
|
gender: prevProfile.gender || profile.gender,
|
|
|
|
birthdate: prevProfile.birthdate || profile.birthdate,
|
|
|
|
residenceCountry: prevProfile.residenceCountry || profile.residenceCountry,
|
|
|
|
birthPlace: prevProfile.birthPlace || profile.birthPlace,
|
|
|
|
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
|
|
|
boobs: profile.boobs
|
|
|
|
? {
|
|
|
|
size: prevProfile.boobs.size || profile.boobs.size,
|
|
|
|
natural: prevProfile.boobs.natural || profile.boobs.natural,
|
|
|
|
}
|
|
|
|
: {},
|
|
|
|
height: prevProfile.height || profile.height,
|
|
|
|
weight: prevProfile.weight || profile.weight,
|
|
|
|
hair: prevProfile.hair || profile.hair,
|
|
|
|
eyes: prevProfile.eyes || profile.eyes,
|
|
|
|
piercings: prevProfile.piercings || profile.piercings,
|
|
|
|
tattoos: prevProfile.tattoos || profile.tattoos,
|
|
|
|
social: prevProfile.social.concat(profile.social || []),
|
|
|
|
avatars: prevProfile.avatars.concat(profile.avatar || []),
|
|
|
|
};
|
|
|
|
}, {
|
|
|
|
boobs: {},
|
|
|
|
social: [],
|
|
|
|
avatars: [],
|
|
|
|
...actor,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2019-11-17 02:56:45 +00:00
|
|
|
async function scrapeActors(actorNames) {
|
|
|
|
await Promise.map(actorNames || argv.actors, async (actorName) => {
|
2019-11-19 03:36:15 +00:00
|
|
|
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
|
|
|
|
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)));
|
|
|
|
const profile = mergeProfiles(profiles, actorEntry);
|
2019-11-17 02:56:45 +00:00
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
if (profile === null) {
|
2019-11-19 03:36:15 +00:00
|
|
|
console.log(`Could not find profile for actor '${actorName}'`);
|
2019-11-20 03:53:36 +00:00
|
|
|
await updateActor(profile, true, false);
|
|
|
|
|
|
|
|
return;
|
2019-11-17 02:56:45 +00:00
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
|
|
|
|
if (actorEntry && profile) {
|
|
|
|
await createActorMediaDirectory(profile, actorEntry);
|
|
|
|
|
|
|
|
await Promise.all([
|
|
|
|
updateActor(profile, true, true),
|
|
|
|
storeAvatars(profile, actorEntry),
|
|
|
|
]);
|
|
|
|
|
|
|
|
return;
|
2019-11-19 03:36:15 +00:00
|
|
|
}
|
|
|
|
|
2019-11-20 03:53:36 +00:00
|
|
|
const newActorEntry = await storeActor(profile, true, true);
|
|
|
|
|
|
|
|
await createActorMediaDirectory(profile, newActorEntry);
|
|
|
|
await storeAvatars(profile, newActorEntry);
|
2019-11-17 02:56:45 +00:00
|
|
|
}, {
|
2019-11-19 03:36:15 +00:00
|
|
|
concurrency: 1,
|
2019-11-17 02:56:45 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
async function scrapeBasicActors() {
|
|
|
|
const basicActors = await knex('actors').where('scraped_at', null);
|
|
|
|
|
|
|
|
return scrapeActors(basicActors.map(actor => actor.name));
|
|
|
|
}
|
|
|
|
|
|
|
|
async function associateActors(release, releaseId) {
|
2019-11-17 02:56:45 +00:00
|
|
|
const actorEntries = await knex('actors').whereIn('name', release.actors);
|
|
|
|
|
|
|
|
const newActors = release.actors
|
|
|
|
.map(actorName => actorName.trim())
|
|
|
|
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
|
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
const [newActorEntries, associatedActors] = await Promise.all([
|
|
|
|
Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
|
|
|
|
knex('actors_associated').where('release_id', releaseId),
|
|
|
|
]);
|
2019-11-11 02:20:00 +00:00
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
const newlyAssociatedActors = actorEntries
|
|
|
|
.concat(newActorEntries)
|
|
|
|
.filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
|
|
|
|
.map(actor => ({
|
|
|
|
release_id: releaseId,
|
2019-11-17 02:56:45 +00:00
|
|
|
actor_id: actor.id,
|
2019-11-19 03:36:15 +00:00
|
|
|
}));
|
2019-11-11 02:20:00 +00:00
|
|
|
|
2019-11-19 03:36:15 +00:00
|
|
|
await knex('actors_associated')
|
|
|
|
.insert(newlyAssociatedActors);
|
2019-11-11 02:20:00 +00:00
|
|
|
}
|
|
|
|
|
2019-11-10 03:20:22 +00:00
|
|
|
module.exports = {
|
2019-11-19 03:36:15 +00:00
|
|
|
associateActors,
|
2019-11-10 03:20:22 +00:00
|
|
|
fetchActors,
|
2019-11-17 02:56:45 +00:00
|
|
|
scrapeActors,
|
2019-11-19 03:36:15 +00:00
|
|
|
scrapeBasicActors,
|
2019-11-10 03:20:22 +00:00
|
|
|
};
|