batch update actors
This commit is contained in:
parent
4b30398983
commit
86bc376b41
|
@ -322,6 +322,8 @@ function curateProfileEntry(profile) {
|
|||
avatar_media_id: profile.avatarMediaId || null,
|
||||
};
|
||||
|
||||
if (profile.update) curatedProfileEntry.updated_at = new Date().toDateString();
|
||||
|
||||
return curatedProfileEntry;
|
||||
}
|
||||
|
||||
|
@ -733,8 +735,9 @@ async function getActorNames(actorNames) {
|
|||
SELECT *
|
||||
FROM actors_profiles
|
||||
WHERE actors_profiles.actor_id = actors.id
|
||||
AND actors_profiles.updated_at <= (?)
|
||||
)
|
||||
AND actors_profiles.updated_at >= (?)
|
||||
) AND alias_for IS NULL
|
||||
ORDER BY actors.name
|
||||
`, [argv.actorsUpdate || new Date()]);
|
||||
|
||||
return actorsWithoutProfiles.rows.map(actor => actor.name);
|
||||
|
@ -750,9 +753,27 @@ async function storeProfiles(profiles) {
|
|||
|
||||
async function scrapeActors(argNames) {
|
||||
const actorNames = await getActorNames(argNames);
|
||||
const profiles = [];
|
||||
|
||||
const batchSize = argv.actorsBatch;
|
||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||
|
||||
if (batchSize > 0) {
|
||||
for (let i=0; i < actorNames.length; i=i+batchSize) {
|
||||
logger.info(`Scraping profiles ${((i/actorNames.length)*100).toFixed(2)}%`);
|
||||
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames.slice(i, i + batchSize)));
|
||||
}
|
||||
} else {
|
||||
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames));
|
||||
}
|
||||
|
||||
return profiles;
|
||||
}
|
||||
|
||||
async function scrapeActorsBatch(actorNames) {
|
||||
const baseActors = toBaseActors(actorNames);
|
||||
|
||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||
logger.info(`Actors: ${actorNames.join(', ')}`);
|
||||
|
||||
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
||||
const entitySlugs = sources.flat();
|
||||
|
@ -760,7 +781,7 @@ async function scrapeActors(argNames) {
|
|||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.select(['id', 'name', 'slug', 'entry_id', 'gender'])
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
.whereNull('alias_for'),
|
||||
]);
|
||||
|
|
14
src/argv.js
14
src/argv.js
|
@ -23,6 +23,13 @@ function interpretAfter(after) {
|
|||
.toDate();
|
||||
}
|
||||
|
||||
function interpretActorAfter(after) {
|
||||
if (!after) {
|
||||
return new Date();
|
||||
}
|
||||
return interpretAfter(after);
|
||||
}
|
||||
|
||||
const { argv } = yargs
|
||||
.command('npm start')
|
||||
.option('server', {
|
||||
|
@ -69,6 +76,11 @@ const { argv } = yargs
|
|||
default: false,
|
||||
alias: 'actor-scenes',
|
||||
})
|
||||
.option('actors-batch', {
|
||||
describe: 'Bath size to scrape actors, if not seet then all are scraped in on pass',
|
||||
type: 'number',
|
||||
default: config?.actors?.batchSize === null ? 0 : config?.actors?.batchSize,
|
||||
})
|
||||
.option('actor-sources', {
|
||||
describe: 'Use these scrapers for actor data',
|
||||
type: 'array',
|
||||
|
@ -307,6 +319,6 @@ const { argv } = yargs
|
|||
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
||||
})
|
||||
.coerce('after', interpretAfter)
|
||||
.coerce('actors-update', interpretAfter);
|
||||
.coerce('actors-update', interpretActorAfter);
|
||||
|
||||
module.exports = argv;
|
||||
|
|
Loading…
Reference in New Issue