Scraping actor profiles from FreeOnes.
This commit is contained in:
123
src/actors.js
123
src/actors.js
@@ -1,6 +1,9 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const whereOr = require('./utils/where-or');
|
||||
|
||||
async function curateActor(actor) {
|
||||
@@ -13,7 +16,20 @@ async function curateActor(actor) {
|
||||
description: actor.description,
|
||||
birthdate: actor.birthdate && new Date(actor.birthdate),
|
||||
country: actor.country_alpha2,
|
||||
city: actor.city,
|
||||
residencePlace: actor.residence_place,
|
||||
residenceCountry: actor.residence_country_alpha2
|
||||
? {
|
||||
alpha2: actor.residence_country_alpha2,
|
||||
name: actor.residence_country_name,
|
||||
}
|
||||
: null,
|
||||
birthPlace: actor.birth_place,
|
||||
birthCountry: actor.birth_country_alpha2
|
||||
? {
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
name: actor.birth_country_name,
|
||||
}
|
||||
: null,
|
||||
ethnicity: actor.ethnicity,
|
||||
height: actor.height,
|
||||
boobSize: actor.boobs_size,
|
||||
@@ -27,32 +43,113 @@ function curateActors(releases) {
|
||||
return Promise.all(releases.map(async release => curateActor(release)));
|
||||
}
|
||||
|
||||
function curateScrapedActor(actor) {
|
||||
return {
|
||||
id: actor.id,
|
||||
name: actor.name,
|
||||
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
|
||||
birthdate: actor.birthdate,
|
||||
description: actor.description,
|
||||
gender: actor.gender,
|
||||
ethnicity: actor.ethnicity,
|
||||
birth_country_alpha2: actor.birthCountry,
|
||||
residence_country_alpha2: actor.residenceCountry,
|
||||
birth_place: actor.birthPlace,
|
||||
residence_place: actor.residencePlace,
|
||||
active: actor.active,
|
||||
boobs_size: actor.boobs && actor.boobs.size,
|
||||
boobs_natural: actor.boobs && actor.boobs.natural,
|
||||
height: actor.height,
|
||||
hair: actor.hair,
|
||||
eyes: actor.eyes,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchActors(queryObject) {
|
||||
const releases = await knex('actors')
|
||||
.select(
|
||||
'actors.*',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name',
|
||||
'residence_countries.alpha2 as residence_country_alpha2', 'residence_countries.name as residence_country_name',
|
||||
)
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('countries as residence_countries', 'actors.residence_country_alpha2', 'residence_countries.alpha2')
|
||||
.where(builder => whereOr(queryObject, 'actors', builder))
|
||||
.limit(100);
|
||||
|
||||
return curateActors(releases);
|
||||
}
|
||||
|
||||
async function storeActor(actor) {
|
||||
const curatedActor = curateScrapedActor(actor);
|
||||
|
||||
const actorEntries = await knex('actors')
|
||||
.insert(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
if (actorEntries.length) {
|
||||
const actorEntry = actorEntries[0];
|
||||
|
||||
console.log(`Added new entry for actor '${actor.name}'`);
|
||||
|
||||
return actorEntry;
|
||||
}
|
||||
|
||||
console.error(`Unable to save profile for '${actor.name}'`);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function updateActor(actorEntry, actor) {
|
||||
const curatedActor = curateScrapedActor(actor);
|
||||
|
||||
const actorEntries = await knex('actors')
|
||||
.where({ id: actorEntry.id })
|
||||
.update(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
console.log(`Updated entry for actor '${actor.name}'`);
|
||||
|
||||
return actorEntries[0];
|
||||
}
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
await Promise.map(actorNames || argv.actors, async (actorName) => {
|
||||
const [actorEntry] = await fetchActors({ name: actorName });
|
||||
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorName)));
|
||||
|
||||
if (actorEntry) {
|
||||
return updateActor(actorEntry, profiles[0]);
|
||||
}
|
||||
|
||||
return storeActor(profiles[0]);
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
}
|
||||
|
||||
async function storeActors(release, releaseEntry) {
|
||||
const actors = await knex('actors').whereIn('name', release.actors);
|
||||
const newActors = release.actors.filter(actorName => !actors.some(actor => actor.name === actorName));
|
||||
const actorEntries = await knex('actors').whereIn('name', release.actors);
|
||||
|
||||
const { rows: insertedActors } = newActors.length
|
||||
? await knex.raw(`${knex('actors').insert(newActors.map(actorName => ({
|
||||
name: actorName,
|
||||
slug: actorName.toLowerCase().replace(/\s+/g, '-'),
|
||||
})))} ON CONFLICT DO NOTHING RETURNING *`)
|
||||
: { rows: [] };
|
||||
const newActors = release.actors
|
||||
.map(actorName => actorName.trim())
|
||||
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
|
||||
|
||||
return knex('actors_associated').insert(actors.concat(insertedActors).map(actor => ({
|
||||
release_id: releaseEntry.id,
|
||||
actor_id: actor.id,
|
||||
})), '*');
|
||||
const newActorEntries = await Promise.all(newActors.map(async actorName => storeActor({ name: actorName })));
|
||||
|
||||
await knex('actors_associated')
|
||||
.insert(actorEntries.concat(newActorEntries).map(actor => ({
|
||||
release_id: releaseEntry.id,
|
||||
actor_id: actor.id,
|
||||
})), '*');
|
||||
|
||||
scrapeActors(newActorEntries.map(actor => actor.name));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchActors,
|
||||
scrapeActors,
|
||||
storeActors,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user