Improved and documented actor profile scraping.
This commit is contained in:
@@ -582,9 +582,31 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
return profiles.filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
async function getActorNames(actorNames) {
|
||||
if (actorNames.length > 0) {
|
||||
return actorNames;
|
||||
}
|
||||
|
||||
const actorsWithoutProfiles = await knex.raw(`
|
||||
SELECT actors.name
|
||||
FROM actors
|
||||
WHERE NOT EXISTS (
|
||||
SELECT *
|
||||
FROM actors_profiles
|
||||
WHERE actors_profiles.actor_id = actors.id
|
||||
AND actors_profiles.updated_at <= (?)
|
||||
)
|
||||
`, [argv.actorsUpdate || new Date()]);
|
||||
|
||||
return actorsWithoutProfiles.rows.map(actor => actor.name);
|
||||
}
|
||||
|
||||
async function scrapeActors(argNames) {
|
||||
const actorNames = await getActorNames(argNames);
|
||||
const baseActors = toBaseActors(actorNames);
|
||||
|
||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||
|
||||
const sources = argv.sources || config.profiles || Object.keys(scrapers.actors);
|
||||
const entitySlugs = sources.flat();
|
||||
|
||||
@@ -596,11 +618,7 @@ async function scrapeActors(actorNames) {
|
||||
.orderBy('entities.type'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.modify((queryBuilder) => {
|
||||
if (actorNames.length > 0) {
|
||||
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
|
||||
}
|
||||
})
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('alias_for'),
|
||||
]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user