Split up profile scrape runner. Fixed wrong search document date key. Added search update CLI.

This commit is contained in:
2020-03-10 23:46:55 +01:00
parent 791a6c1c72
commit d97b1ab894
5 changed files with 58 additions and 47 deletions

View File

@@ -376,6 +376,48 @@ async function mergeProfiles(profiles, actor) {
return mergedProfile;
}
async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
return Promise.map(sources, async (source) => {
// const [scraperSlug, scraper] = source;
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
try {
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
if (!scraper) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
}
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
if (profile) {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
return {
...profile,
name: actorName,
scraper: scraperSlug,
site,
};
}
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`);
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
}), Promise.reject(new Error()));
} catch (error) {
if (error.warn !== false) {
logger.warn(`Error in scraper ${source}: ${error.message}`);
// logger.error(error.stack);
}
}
return null;
});
}
async function scrapeActors(actorNames) {
return Promise.map(actorNames || argv.actors, async (actorName) => {
try {
@@ -399,46 +441,7 @@ async function scrapeActors(actorNames) {
const sites = await curateSites(siteEntries, true);
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const profiles = await Promise.map(finalSources, async (source) => {
// const [scraperSlug, scraper] = source;
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
try {
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
if (!scraper) {
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
}
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
if (profile) {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
return {
...profile,
name: actorName,
scraper: scraperSlug,
site,
};
}
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`);
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
}), Promise.reject(new Error()));
} catch (error) {
if (error.warn !== false) {
logger.warn(`Error in scraper ${source}: ${error.message}`);
// logger.error(error.stack);
}
}
return null;
});
const profiles = await scrapeProfiles(sources, sitesBySlug, actorName, actorEntry);
const profile = await mergeProfiles(profiles, actorEntry);
if (profile === null) {