Changed qu's HTML element detection. Passing base actor instead of actorName to profile scrapers.
This commit is contained in:
@@ -114,12 +114,15 @@ function getAverage(items) {
|
||||
|
||||
function toBaseActors(actorsOrNames, release) {
|
||||
return actorsOrNames.map((actorOrName) => {
|
||||
const name = capitalize(actorOrName.name || actorOrName);
|
||||
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
||||
|
||||
const name = capitalize(baseName);
|
||||
const slug = slugify(name);
|
||||
|
||||
const baseActor = {
|
||||
name,
|
||||
slug,
|
||||
entryId: entryId || null,
|
||||
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
|
||||
};
|
||||
|
||||
@@ -213,6 +216,7 @@ function curateActorEntry(baseActor, batchId) {
|
||||
name: baseActor.name,
|
||||
slug: baseActor.slug,
|
||||
entity_id: null,
|
||||
entry_id: baseActor.entry_id,
|
||||
batch_id: batchId,
|
||||
};
|
||||
}
|
||||
@@ -538,7 +542,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
|
||||
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
|
||||
|
||||
const profile = await scraper.fetchProfile(actor.name, context, include);
|
||||
const profile = await scraper.fetchProfile(actor, context, include);
|
||||
|
||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
||||
@@ -587,7 +591,7 @@ async function scrapeActors(actorNames) {
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.orderBy('entities.type'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug'])
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.modify((queryBuilder) => {
|
||||
if (actorNames.length > 0) {
|
||||
queryBuilder.whereIn('slug', baseActors.map(baseActor => baseActor.slug));
|
||||
@@ -598,12 +602,22 @@ async function scrapeActors(actorNames) {
|
||||
|
||||
const entitiesBySlug = entities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {});
|
||||
|
||||
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
|
||||
const existingActorEntriesBySlugAndEntryId = existingActorEntries.reduce((acc, actorEntry) => ({
|
||||
...acc,
|
||||
[actorEntry.slug]: {
|
||||
...acc[actorEntry.slug],
|
||||
[actorEntry.entryId || null]: actorEntry,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
|
||||
|
||||
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
|
||||
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);
|
||||
const newActorEntries = batchId && await knex('actors').insert(curatedActorEntries).returning(['id', 'name', 'slug']);
|
||||
|
||||
const newActorEntries = batchId && await knex('actors')
|
||||
.insert(curatedActorEntries)
|
||||
.returning(['id', 'name', 'slug', 'entry_id']);
|
||||
|
||||
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user