Passing context object with site or network instead of scraper slug and 'site or network' to all profile scrapers.

This commit is contained in:
2020-05-18 03:22:03 +02:00
parent 8733fdc657
commit 885aa4f627
22 changed files with 161 additions and 79 deletions

View File

@@ -137,11 +137,10 @@ async function curateProfile(profile) {
name: profile.name,
avatar: profile.avatar,
scraper: profile.scraper,
site: profile.site,
network: profile.network,
};
curatedProfile.site = profile.site.isNetwork ? null : profile.site;
curatedProfile.network = profile.site.isNetwork ? profile.site : null;
curatedProfile.description = profile.description?.trim() || null;
curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available
curatedProfile.ethnicity = profile.ethnicity?.trim() || null;
@@ -288,7 +287,7 @@ async function interpolateProfiles(actors) {
profile.tattoos = getLongest(valuesByProperty.tattoos);
profile.piercings = getLongest(valuesByProperty.piercings);
profile.avatar_media_id = avatars.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0].id;
profile.avatar_media_id = avatars.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null;
return profile;
});
@@ -368,21 +367,25 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
try {
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
const scraper = scrapers[scraperSlug];
const siteOrNetwork = networksBySlug[scraperSlug] || sitesBySlug[scraperSlug];
const context = {
site: sitesBySlug[scraperSlug] || null,
network: networksBySlug[scraperSlug] || null,
scraper: scraperSlug,
};
if (!scraper?.fetchProfile) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw new Error(`No profile profile scraper available for ${scraperSlug}`);
}
if (!siteOrNetwork) {
if (!context.site && !context.network) {
logger.warn(`No site or network found for ${scraperSlug}`);
throw new Error(`No site or network found for ${scraperSlug}`);
}
logger.verbose(`Searching profile for '${actor.name}' on '${scraperSlug}'`);
const profile = await scraper.fetchProfile(actor.name, scraperSlug, siteOrNetwork, include);
const profile = await scraper.fetchProfile(actor.name, context, include);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${scraperSlug}, scraper returned ${profile}`);
@@ -392,8 +395,7 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
return {
...actor,
...profile,
scraper: scraperSlug,
site: siteOrNetwork,
...context,
};
}), Promise.reject(new Error()));
} catch (error) {
@@ -424,7 +426,8 @@ async function scrapeActors(actorNames) {
]);
const existingActorEntriesBySlug = existingActorEntries.reduce((acc, actorEntry) => ({ ...acc, [actorEntry.slug]: actorEntry }), {});
const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: { ...network, isNetwork: true } }), {});
const networksBySlug = networks.reduce((acc, network) => ({ ...acc, [network.slug]: network }), {});
const sitesBySlug = sites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlug[baseActor.slug]);
@@ -456,6 +459,8 @@ async function scrapeActors(actorNames) {
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
}
return profiles;
}
async function getOrCreateActors(baseActors, batchId) {