Split up profile scrape runner. Fixed wrong search document date key. Added search update CLI.
This commit is contained in:
@@ -376,6 +376,48 @@ async function mergeProfiles(profiles, actor) {
|
||||
return mergedProfile;
|
||||
}
|
||||
|
||||
async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) {
|
||||
return Promise.map(sources, async (source) => {
|
||||
// const [scraperSlug, scraper] = source;
|
||||
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
||||
|
||||
try {
|
||||
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
|
||||
if (!scraper) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
|
||||
}
|
||||
|
||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
const site = sitesBySlug[scraperSlug] || null;
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
|
||||
|
||||
if (profile) {
|
||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
return {
|
||||
...profile,
|
||||
name: actorName,
|
||||
scraper: scraperSlug,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
|
||||
}), Promise.reject(new Error()));
|
||||
} catch (error) {
|
||||
if (error.warn !== false) {
|
||||
logger.warn(`Error in scraper ${source}: ${error.message}`);
|
||||
// logger.error(error.stack);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeActors(actorNames) {
|
||||
return Promise.map(actorNames || argv.actors, async (actorName) => {
|
||||
try {
|
||||
@@ -399,46 +441,7 @@ async function scrapeActors(actorNames) {
|
||||
const sites = await curateSites(siteEntries, true);
|
||||
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||
|
||||
const profiles = await Promise.map(finalSources, async (source) => {
|
||||
// const [scraperSlug, scraper] = source;
|
||||
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
|
||||
|
||||
try {
|
||||
return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => {
|
||||
if (!scraper) {
|
||||
logger.warn(`No profile profile scraper available for ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`));
|
||||
}
|
||||
|
||||
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
const site = sitesBySlug[scraperSlug] || null;
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
|
||||
|
||||
if (profile) {
|
||||
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
|
||||
|
||||
return {
|
||||
...profile,
|
||||
name: actorName,
|
||||
scraper: scraperSlug,
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`);
|
||||
throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false });
|
||||
}), Promise.reject(new Error()));
|
||||
} catch (error) {
|
||||
if (error.warn !== false) {
|
||||
logger.warn(`Error in scraper ${source}: ${error.message}`);
|
||||
// logger.error(error.stack);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
const profiles = await scrapeProfiles(sources, sitesBySlug, actorName, actorEntry);
|
||||
const profile = await mergeProfiles(profiles, actorEntry);
|
||||
|
||||
if (profile === null) {
|
||||
|
||||
Reference in New Issue
Block a user