Refactored Aziani scraper. Improved actor profile update logic.
This commit is contained in:
@@ -168,7 +168,7 @@ function toBaseActors(actorsOrNames, release) {
|
||||
|
||||
// using top level parent widens the scope too much, e.g. different Gamma sites may not use the same actor database
|
||||
// const entity = getRecursiveParent(release?.entity);
|
||||
const entity = (release?.entity?.indepdendent && release?.entity)
|
||||
const entity = (release?.entity?.independent && release?.entity)
|
||||
|| release?.entity?.parent
|
||||
|| release?.entity
|
||||
|| null;
|
||||
@@ -308,7 +308,7 @@ function curateProfileEntry(profile) {
|
||||
}
|
||||
|
||||
const curatedProfileEntry = {
|
||||
...(profile.update !== false && { id: profile.update }),
|
||||
...(typeof profile.update === 'number' && { id: profile.update }),
|
||||
actor_id: profile.actorId,
|
||||
entity_id: profile.entity?.id || null,
|
||||
date_of_birth: profile.dateOfBirth,
|
||||
@@ -552,21 +552,34 @@ async function upsertProfiles(profiles) {
|
||||
const newProfileEntries = profiles.filter((profile) => !profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
|
||||
const updatingProfileEntries = profiles.filter((profile) => profile.update).map((profile) => curateProfileEntry(profile)).filter(Boolean);
|
||||
|
||||
const newProfiles = await insertProfiles(newProfileEntries);
|
||||
const newProfiles = newProfileEntries.length > 0
|
||||
? await insertProfiles(newProfileEntries)
|
||||
: [];
|
||||
|
||||
if (argv.force && updatingProfileEntries.length > 0) {
|
||||
const transaction = await knex.transaction();
|
||||
const queries = updatingProfileEntries.map((profileEntry) => knex('actors_profiles')
|
||||
.where('id', profileEntry.id)
|
||||
const queries = updatingProfileEntries.map(async (profileEntry) => knex('actors_profiles')
|
||||
.modify((builder) => {
|
||||
if (profileEntry.id) {
|
||||
builder.where('id', profileEntry.id);
|
||||
} else {
|
||||
builder
|
||||
.where('actor_id', profileEntry.actor_id)
|
||||
.where('entity_id', profileEntry.entity_id);
|
||||
}
|
||||
})
|
||||
.update(profileEntry)
|
||||
.returning(['id', 'actor_id'])
|
||||
.transacting(transaction));
|
||||
|
||||
await Promise.all(queries)
|
||||
.then(transaction.commit)
|
||||
.catch(transaction.rollback);
|
||||
.catch((error) => {
|
||||
logger.error(error.message);
|
||||
return transaction.rollback();
|
||||
});
|
||||
|
||||
logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`);
|
||||
logger.info(`Updated ${updatingProfileEntries.length} actor profiles`);
|
||||
}
|
||||
|
||||
if (profiles.length > 0) {
|
||||
@@ -586,10 +599,12 @@ async function upsertProfiles(profiles) {
|
||||
media_id: profile.avatarMediaId,
|
||||
}));
|
||||
|
||||
await knex('actors_avatars')
|
||||
.insert(avatars)
|
||||
.onConflict()
|
||||
.ignore();
|
||||
if (avatars.length > 0) {
|
||||
await knex('actors_avatars')
|
||||
.insert(avatars)
|
||||
.onConflict()
|
||||
.ignore();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -599,7 +614,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
const profiles = Promise.map(validSources, async (source) => {
|
||||
try {
|
||||
// config may group sources to try until success
|
||||
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
|
||||
return [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
|
||||
try {
|
||||
const entity = entitiesBySlug[scraperSlug] || null;
|
||||
|
||||
@@ -846,6 +861,13 @@ async function getOrCreateActors(baseActors, batchId) {
|
||||
OR actors.entry_id = base_actors.entry_id)
|
||||
`);
|
||||
|
||||
const actorIds = existingActors.map((actor) => actor.id);
|
||||
const entityIds = Array.from(new Set(baseActors.map((actor) => actor.entity?.id).filter(Boolean)));
|
||||
|
||||
const existingProfiles = await knex('actors_profiles')
|
||||
.whereIn('actor_id', actorIds)
|
||||
.whereIn('entity_id', entityIds);
|
||||
|
||||
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
|
||||
const existingActorSlugs = existingActors.reduce((acc, actor) => ({
|
||||
...acc,
|
||||
@@ -863,7 +885,7 @@ async function getOrCreateActors(baseActors, batchId) {
|
||||
|
||||
const newActors = await bulkInsert('actors', curatedActorEntries);
|
||||
|
||||
const newActorIdsByEntityIdEntryIdAndSlug = newActors.reduce((acc, actor) => ({
|
||||
const actorIdsByEntityIdEntryIdAndSlug = [...existingActors, ...newActors].reduce((acc, actor) => ({
|
||||
...acc,
|
||||
[actor.entity_id]: {
|
||||
...acc[actor.entity_id],
|
||||
@@ -874,13 +896,26 @@ async function getOrCreateActors(baseActors, batchId) {
|
||||
},
|
||||
}), {});
|
||||
|
||||
const profileIdsByActorIdAndEntityId = existingProfiles.reduce((acc, profile) => ({
|
||||
...acc,
|
||||
[profile.actor_id]: {
|
||||
...acc[profile.actor_id],
|
||||
[profile.entity_id]: profile.id,
|
||||
},
|
||||
}), {});
|
||||
|
||||
const newActorProfiles = await Promise.all(baseActors
|
||||
.filter((actor) => actor.hasProfile)
|
||||
.map((actor) => ({
|
||||
...actor,
|
||||
actorId: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug],
|
||||
}))
|
||||
.filter((actor) => !!actor.id)
|
||||
.map((actor) => {
|
||||
const actorId = actorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || actorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug];
|
||||
|
||||
return {
|
||||
...actor,
|
||||
actorId,
|
||||
update: profileIdsByActorIdAndEntityId[actorId]?.[actor.entity?.id],
|
||||
};
|
||||
})
|
||||
.filter((actor) => !!actor.actorId)
|
||||
.map((actor) => curateProfile(actor)));
|
||||
|
||||
await storeProfiles(newActorProfiles);
|
||||
|
||||
Reference in New Issue
Block a user