Improved scraping and association behavior.
This commit is contained in:
@@ -12,6 +12,7 @@ async function curateActor(actor) {
|
||||
|
||||
return {
|
||||
id: actor.id,
|
||||
gender: actor.gender,
|
||||
name: actor.name,
|
||||
description: actor.description,
|
||||
birthdate: actor.birthdate && new Date(actor.birthdate),
|
||||
@@ -43,10 +44,13 @@ function curateActors(releases) {
|
||||
return Promise.all(releases.map(async release => curateActor(release)));
|
||||
}
|
||||
|
||||
function curateScrapedActor(actor) {
|
||||
return {
|
||||
function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
const curatedActor = {
|
||||
id: actor.id,
|
||||
name: actor.name,
|
||||
name: actor.name
|
||||
.split(' ')
|
||||
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
|
||||
.join(' '),
|
||||
slug: actor.name.toLowerCase().replace(/\s+/g, '-'),
|
||||
birthdate: actor.birthdate,
|
||||
description: actor.description,
|
||||
@@ -65,6 +69,16 @@ function curateScrapedActor(actor) {
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
};
|
||||
|
||||
if (scraped) {
|
||||
return {
|
||||
...curatedActor,
|
||||
scraped_at: new Date(),
|
||||
scrape_success: scrapeSuccess,
|
||||
};
|
||||
}
|
||||
|
||||
return curatedActor;
|
||||
}
|
||||
|
||||
async function fetchActors(queryObject) {
|
||||
@@ -82,8 +96,8 @@ async function fetchActors(queryObject) {
|
||||
return curateActors(releases);
|
||||
}
|
||||
|
||||
async function storeActor(actor) {
|
||||
const curatedActor = curateScrapedActor(actor);
|
||||
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
|
||||
const actorEntries = await knex('actors')
|
||||
.insert(curatedActor)
|
||||
@@ -102,8 +116,8 @@ async function storeActor(actor) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function updateActor(actorEntry, actor) {
|
||||
const curatedActor = curateScrapedActor(actor);
|
||||
async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = false) {
|
||||
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
|
||||
|
||||
const actorEntries = await knex('actors')
|
||||
.where({ id: actorEntry.id })
|
||||
@@ -117,39 +131,59 @@ async function updateActor(actorEntry, actor) {
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
await Promise.map(actorNames || argv.actors, async (actorName) => {
|
||||
const [actorEntry] = await fetchActors({ name: actorName });
|
||||
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorName)));
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
if (actorEntry) {
|
||||
return updateActor(actorEntry, profiles[0]);
|
||||
const [actorEntry] = await fetchActors({ slug: actorSlug });
|
||||
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorEntry ? actorEntry.name : actorName)));
|
||||
|
||||
if (profiles[0] === null) {
|
||||
console.log(`Could not find profile for actor '${actorName}'`);
|
||||
return updateActor(actorEntry, actorEntry, true, false);
|
||||
}
|
||||
|
||||
return storeActor(profiles[0]);
|
||||
if (actorEntry && profiles[0]) {
|
||||
return updateActor(actorEntry, profiles[0], true, true);
|
||||
}
|
||||
|
||||
return storeActor(profiles[0], true, true);
|
||||
}, {
|
||||
concurrency: 5,
|
||||
concurrency: 1,
|
||||
});
|
||||
}
|
||||
|
||||
async function storeActors(release, releaseEntry) {
|
||||
async function scrapeBasicActors() {
|
||||
const basicActors = await knex('actors').where('scraped_at', null);
|
||||
|
||||
return scrapeActors(basicActors.map(actor => actor.name));
|
||||
}
|
||||
|
||||
async function associateActors(release, releaseId) {
|
||||
const actorEntries = await knex('actors').whereIn('name', release.actors);
|
||||
|
||||
const newActors = release.actors
|
||||
.map(actorName => actorName.trim())
|
||||
.filter(actorName => !actorEntries.some(actor => actor.name === actorName));
|
||||
|
||||
const newActorEntries = await Promise.all(newActors.map(async actorName => storeActor({ name: actorName })));
|
||||
const [newActorEntries, associatedActors] = await Promise.all([
|
||||
Promise.all(newActors.map(async actorName => storeActor({ name: actorName }))),
|
||||
knex('actors_associated').where('release_id', releaseId),
|
||||
]);
|
||||
|
||||
const newlyAssociatedActors = actorEntries
|
||||
.concat(newActorEntries)
|
||||
.filter(actorEntry => !associatedActors.some(actor => actorEntry.id === actor.id))
|
||||
.map(actor => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actor.id,
|
||||
}));
|
||||
|
||||
await knex('actors_associated')
|
||||
.insert(actorEntries.concat(newActorEntries).map(actor => ({
|
||||
release_id: releaseEntry.id,
|
||||
actor_id: actor.id,
|
||||
})), '*');
|
||||
|
||||
scrapeActors(newActorEntries.map(actor => actor.name));
|
||||
.insert(newlyAssociatedActors);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateActors,
|
||||
fetchActors,
|
||||
scrapeActors,
|
||||
storeActors,
|
||||
scrapeBasicActors,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user