batch update actors
This commit is contained in:
parent
4b30398983
commit
86bc376b41
|
@ -322,6 +322,8 @@ function curateProfileEntry(profile) {
|
||||||
avatar_media_id: profile.avatarMediaId || null,
|
avatar_media_id: profile.avatarMediaId || null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (profile.update) curatedProfileEntry.updated_at = new Date().toDateString();
|
||||||
|
|
||||||
return curatedProfileEntry;
|
return curatedProfileEntry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -733,8 +735,9 @@ async function getActorNames(actorNames) {
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM actors_profiles
|
FROM actors_profiles
|
||||||
WHERE actors_profiles.actor_id = actors.id
|
WHERE actors_profiles.actor_id = actors.id
|
||||||
AND actors_profiles.updated_at <= (?)
|
AND actors_profiles.updated_at >= (?)
|
||||||
)
|
) AND alias_for IS NULL
|
||||||
|
ORDER BY actors.name
|
||||||
`, [argv.actorsUpdate || new Date()]);
|
`, [argv.actorsUpdate || new Date()]);
|
||||||
|
|
||||||
return actorsWithoutProfiles.rows.map(actor => actor.name);
|
return actorsWithoutProfiles.rows.map(actor => actor.name);
|
||||||
|
@ -750,9 +753,27 @@ async function storeProfiles(profiles) {
|
||||||
|
|
||||||
async function scrapeActors(argNames) {
|
async function scrapeActors(argNames) {
|
||||||
const actorNames = await getActorNames(argNames);
|
const actorNames = await getActorNames(argNames);
|
||||||
|
const profiles = [];
|
||||||
|
|
||||||
|
const batchSize = argv.actorsBatch;
|
||||||
|
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||||
|
|
||||||
|
if (batchSize > 0) {
|
||||||
|
for (let i=0; i < actorNames.length; i=i+batchSize) {
|
||||||
|
logger.info(`Scraping profiles ${((i/actorNames.length)*100).toFixed(2)}%`);
|
||||||
|
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames.slice(i, i + batchSize)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames));
|
||||||
|
}
|
||||||
|
|
||||||
|
return profiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeActorsBatch(actorNames) {
|
||||||
const baseActors = toBaseActors(actorNames);
|
const baseActors = toBaseActors(actorNames);
|
||||||
|
|
||||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
logger.info(`Actors: ${actorNames.join(', ')}`);
|
||||||
|
|
||||||
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
||||||
const entitySlugs = sources.flat();
|
const entitySlugs = sources.flat();
|
||||||
|
@ -760,7 +781,7 @@ async function scrapeActors(argNames) {
|
||||||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||||
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
||||||
knex('actors')
|
knex('actors')
|
||||||
.select(['id', 'name', 'slug', 'entry_id'])
|
.select(['id', 'name', 'slug', 'entry_id', 'gender'])
|
||||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||||
.whereNull('alias_for'),
|
.whereNull('alias_for'),
|
||||||
]);
|
]);
|
||||||
|
|
14
src/argv.js
14
src/argv.js
|
@ -23,6 +23,13 @@ function interpretAfter(after) {
|
||||||
.toDate();
|
.toDate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function interpretActorAfter(after) {
|
||||||
|
if (!after) {
|
||||||
|
return new Date();
|
||||||
|
}
|
||||||
|
return interpretAfter(after);
|
||||||
|
}
|
||||||
|
|
||||||
const { argv } = yargs
|
const { argv } = yargs
|
||||||
.command('npm start')
|
.command('npm start')
|
||||||
.option('server', {
|
.option('server', {
|
||||||
|
@ -69,6 +76,11 @@ const { argv } = yargs
|
||||||
default: false,
|
default: false,
|
||||||
alias: 'actor-scenes',
|
alias: 'actor-scenes',
|
||||||
})
|
})
|
||||||
|
.option('actors-batch', {
|
||||||
|
describe: 'Bath size to scrape actors, if not seet then all are scraped in on pass',
|
||||||
|
type: 'number',
|
||||||
|
default: config?.actors?.batchSize === null ? 0 : config?.actors?.batchSize,
|
||||||
|
})
|
||||||
.option('actor-sources', {
|
.option('actor-sources', {
|
||||||
describe: 'Use these scrapers for actor data',
|
describe: 'Use these scrapers for actor data',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
|
@ -307,6 +319,6 @@ const { argv } = yargs
|
||||||
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
||||||
})
|
})
|
||||||
.coerce('after', interpretAfter)
|
.coerce('after', interpretAfter)
|
||||||
.coerce('actors-update', interpretAfter);
|
.coerce('actors-update', interpretActorAfter);
|
||||||
|
|
||||||
module.exports = argv;
|
module.exports = argv;
|
||||||
|
|
Loading…
Reference in New Issue