batch update actors

This commit is contained in:
SamPulsar1 2021-02-17 11:11:32 +10:30
parent 4b30398983
commit 86bc376b41
2 changed files with 147 additions and 114 deletions

View File

@ -322,6 +322,8 @@ function curateProfileEntry(profile) {
avatar_media_id: profile.avatarMediaId || null, avatar_media_id: profile.avatarMediaId || null,
}; };
if (profile.update) curatedProfileEntry.updated_at = new Date().toDateString();
return curatedProfileEntry; return curatedProfileEntry;
} }
@ -733,8 +735,9 @@ async function getActorNames(actorNames) {
SELECT * SELECT *
FROM actors_profiles FROM actors_profiles
WHERE actors_profiles.actor_id = actors.id WHERE actors_profiles.actor_id = actors.id
AND actors_profiles.updated_at <= (?) AND actors_profiles.updated_at >= (?)
) ) AND alias_for IS NULL
ORDER BY actors.name
`, [argv.actorsUpdate || new Date()]); `, [argv.actorsUpdate || new Date()]);
return actorsWithoutProfiles.rows.map(actor => actor.name); return actorsWithoutProfiles.rows.map(actor => actor.name);
@ -750,9 +753,27 @@ async function storeProfiles(profiles) {
async function scrapeActors(argNames) { async function scrapeActors(argNames) {
const actorNames = await getActorNames(argNames); const actorNames = await getActorNames(argNames);
const profiles = [];
const batchSize = argv.actorsBatch;
logger.info(`Scraping profiles for ${actorNames.length} actors`);
if (batchSize > 0) {
for (let i=0; i < actorNames.length; i=i+batchSize) {
logger.info(`Scraping profiles ${((i/actorNames.length)*100).toFixed(2)}%`);
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames.slice(i, i + batchSize)));
}
} else {
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames));
}
return profiles;
}
async function scrapeActorsBatch(actorNames) {
const baseActors = toBaseActors(actorNames); const baseActors = toBaseActors(actorNames);
logger.info(`Scraping profiles for ${actorNames.length} actors`); logger.info(`Actors: ${actorNames.join(', ')}`);
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors); const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
const entitySlugs = sources.flat(); const entitySlugs = sources.flat();
@ -760,7 +781,7 @@ async function scrapeActors(argNames) {
const [entitiesBySlug, existingActorEntries] = await Promise.all([ const [entitiesBySlug, existingActorEntries] = await Promise.all([
fetchEntitiesBySlug(entitySlugs, 'desc'), fetchEntitiesBySlug(entitySlugs, 'desc'),
knex('actors') knex('actors')
.select(['id', 'name', 'slug', 'entry_id']) .select(['id', 'name', 'slug', 'entry_id', 'gender'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('alias_for'), .whereNull('alias_for'),
]); ]);

View File

@ -23,6 +23,13 @@ function interpretAfter(after) {
.toDate(); .toDate();
} }
function interpretActorAfter(after) {
if (!after) {
return new Date();
}
return interpretAfter(after);
}
const { argv } = yargs const { argv } = yargs
.command('npm start') .command('npm start')
.option('server', { .option('server', {
@ -69,6 +76,11 @@ const { argv } = yargs
default: false, default: false,
alias: 'actor-scenes', alias: 'actor-scenes',
}) })
.option('actors-batch', {
describe: 'Bath size to scrape actors, if not seet then all are scraped in on pass',
type: 'number',
default: config?.actors?.batchSize === null ? 0 : config?.actors?.batchSize,
})
.option('actor-sources', { .option('actor-sources', {
describe: 'Use these scrapers for actor data', describe: 'Use these scrapers for actor data',
type: 'array', type: 'array',
@ -307,6 +319,6 @@ const { argv } = yargs
alias: ['delete-movie', 'remove-movies', 'remove-movies'], alias: ['delete-movie', 'remove-movies', 'remove-movies'],
}) })
.coerce('after', interpretAfter) .coerce('after', interpretAfter)
.coerce('actors-update', interpretAfter); .coerce('actors-update', interpretActorAfter);
module.exports = argv; module.exports = argv;