batch update actors

This commit is contained in:
SamPulsar1 2021-02-17 11:11:32 +10:30
parent 4b30398983
commit 86bc376b41
2 changed files with 147 additions and 114 deletions

View File

@ -322,6 +322,8 @@ function curateProfileEntry(profile) {
avatar_media_id: profile.avatarMediaId || null,
};
if (profile.update) curatedProfileEntry.updated_at = new Date().toDateString();
return curatedProfileEntry;
}
@ -733,8 +735,9 @@ async function getActorNames(actorNames) {
SELECT *
FROM actors_profiles
WHERE actors_profiles.actor_id = actors.id
AND actors_profiles.updated_at <= (?)
)
AND actors_profiles.updated_at >= (?)
) AND alias_for IS NULL
ORDER BY actors.name
`, [argv.actorsUpdate || new Date()]);
return actorsWithoutProfiles.rows.map(actor => actor.name);
@ -750,9 +753,27 @@ async function storeProfiles(profiles) {
async function scrapeActors(argNames) {
const actorNames = await getActorNames(argNames);
const profiles = [];
const batchSize = argv.actorsBatch;
logger.info(`Scraping profiles for ${actorNames.length} actors`);
if (batchSize > 0) {
for (let i=0; i < actorNames.length; i=i+batchSize) {
logger.info(`Scraping profiles ${((i/actorNames.length)*100).toFixed(2)}%`);
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames.slice(i, i + batchSize)));
}
} else {
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames));
}
return profiles;
}
async function scrapeActorsBatch(actorNames) {
const baseActors = toBaseActors(actorNames);
logger.info(`Scraping profiles for ${actorNames.length} actors`);
logger.info(`Actors: ${actorNames.join(', ')}`);
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
const entitySlugs = sources.flat();
@ -760,7 +781,7 @@ async function scrapeActors(argNames) {
const [entitiesBySlug, existingActorEntries] = await Promise.all([
fetchEntitiesBySlug(entitySlugs, 'desc'),
knex('actors')
.select(['id', 'name', 'slug', 'entry_id'])
.select(['id', 'name', 'slug', 'entry_id', 'gender'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('alias_for'),
]);

View File

@ -23,6 +23,13 @@ function interpretAfter(after) {
.toDate();
}
function interpretActorAfter(after) {
if (!after) {
return new Date();
}
return interpretAfter(after);
}
const { argv } = yargs
.command('npm start')
.option('server', {
@ -69,6 +76,11 @@ const { argv } = yargs
default: false,
alias: 'actor-scenes',
})
.option('actors-batch', {
describe: 'Bath size to scrape actors, if not seet then all are scraped in on pass',
type: 'number',
default: config?.actors?.batchSize === null ? 0 : config?.actors?.batchSize,
})
.option('actor-sources', {
describe: 'Use these scrapers for actor data',
type: 'array',
@ -307,6 +319,6 @@ const { argv } = yargs
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
})
.coerce('after', interpretAfter)
.coerce('actors-update', interpretAfter);
.coerce('actors-update', interpretActorAfter);
module.exports = argv;