Improved and documented actor profile scraping.

This commit is contained in:
DebaucheryLibrarian
2020-08-12 20:51:08 +02:00
parent 5cabeed19d
commit 7413d7db25
5 changed files with 64 additions and 32 deletions

View File

@@ -582,9 +582,31 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
return profiles.filter(Boolean);
}
async function scrapeActors(actorNames) {
async function getActorNames(actorNames) {
if (actorNames.length > 0) {
return actorNames;
}
const actorsWithoutProfiles = await knex.raw(`
SELECT actors.name
FROM actors
WHERE NOT EXISTS (
SELECT *
FROM actors_profiles
WHERE actors_profiles.actor_id = actors.id
AND actors_profiles.updated_at <= (?)
)
`, [argv.actorsUpdate || new Date()]);
return actorsWithoutProfiles.rows.map(actor => actor.name);
}
async function scrapeActors(argNames) {
const actorNames = await getActorNames(argNames);
const baseActors = toBaseActors(actorNames);
logger.info(`Scraping profiles for ${actorNames.length} actors`);
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
const entitySlugs = sources.flat();
@@ -596,11 +618,7 @@ async function scrapeActors(actorNames) {
.orderBy('entities.type'),
knex('actors')
.select(['id', 'name', 'slug', 'entry_id'])
.modify((queryBuilder) => {
if (actorNames.length > 0) {
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
}
})
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('alias_for'),
]);