Refactoring to use entities over sites and networks.

This commit is contained in:
ThePendulum 2020-06-17 04:07:24 +02:00
parent 1907ce1e54
commit f0a89df6ab
7 changed files with 93 additions and 97 deletions

View File

@ -1,11 +1,10 @@
const siteFragment = ` const siteFragment = `
site { site: entity {
id id
name name
slug slug
url url
independent network: parent {
network {
id id
name name
slug slug
@ -20,7 +19,6 @@ const sitesFragment = `
name name
slug slug
url url
independent
network { network {
id id
name name
@ -49,7 +47,7 @@ const actorFields = `
lazy lazy
} }
} }
network { network: entity {
id id
name name
slug slug

View File

@ -326,9 +326,9 @@ exports.up = knex => Promise.resolve()
table.text('real_name'); table.text('real_name');
table.integer('network_id', 12) table.integer('entity_id', 12)
.references('id') .references('id')
.inTable('networks'); .inTable('entities');
table.integer('alias_for', 12) table.integer('alias_for', 12)
.references('id') .references('id')
@ -393,15 +393,11 @@ exports.up = knex => Promise.resolve()
.references('id') .references('id')
.inTable('actors'); .inTable('actors');
table.integer('network_id', 12) table.integer('entity_id', 12)
.references('id') .references('id')
.inTable('networks'); .inTable('entities');
table.integer('site_id', 12) table.unique(['actor_id', 'entity_id']);
.references('id')
.inTable('sites');
table.unique(['actor_id', 'network_id', 'site_id']);
table.integer('priority', 4) table.integer('priority', 4)
.defaultTo(1); .defaultTo(1);
@ -680,13 +676,10 @@ exports.up = knex => Promise.resolve()
.then(() => knex.schema.createTable('releases', (table) => { .then(() => knex.schema.createTable('releases', (table) => {
table.increments('id', 16); table.increments('id', 16);
table.integer('site_id', 12) table.integer('entity_id', 12)
.references('id') .references('id')
.inTable('sites'); .inTable('entities')
.notNullable();
table.integer('network_id', 12)
.references('id')
.inTable('networks');
table.integer('studio_id', 12) table.integer('studio_id', 12)
.references('id') .references('id')
@ -697,7 +690,7 @@ exports.up = knex => Promise.resolve()
table.text('shoot_id'); table.text('shoot_id');
table.text('entry_id'); table.text('entry_id');
table.unique(['site_id', 'network_id', 'entry_id', 'type']); table.unique(['entity_id', 'entry_id', 'type']);
table.text('url', 1000); table.text('url', 1000);
table.text('title'); table.text('title');
@ -856,15 +849,6 @@ exports.up = knex => Promise.resolve()
.then(() => { // eslint-disable-line arrow-body-style .then(() => { // eslint-disable-line arrow-body-style
// allow vim fold // allow vim fold
return knex.raw(` return knex.raw(`
ALTER TABLE releases
ADD CONSTRAINT ensure_site_or_network CHECK (site_id IS NOT NULL OR network_id IS NOT NULL);
ALTER TABLE releases_search
ADD COLUMN document tsvector;
CREATE UNIQUE INDEX unique_actor_slugs_network ON actors (slug, network_id);
CREATE UNIQUE INDEX unique_actor_slugs ON actors (slug, (network_id IS NULL));
CREATE TEXT SEARCH DICTIONARY traxxx_dict ( CREATE TEXT SEARCH DICTIONARY traxxx_dict (
TEMPLATE = pg_catalog.simple, TEMPLATE = pg_catalog.simple,
stopwords = traxxx stopwords = traxxx
@ -874,6 +858,12 @@ exports.up = knex => Promise.resolve()
COPY = english COPY = english
); );
ALTER TABLE releases_search
ADD COLUMN document tsvector;
CREATE UNIQUE INDEX unique_actor_slugs_network ON actors (slug, entity_id);
CREATE UNIQUE INDEX unique_actor_slugs ON actors (slug, (entity_id IS NULL));
ALTER TEXT SEARCH CONFIGURATION traxxx ALTER TEXT SEARCH CONFIGURATION traxxx
ALTER MAPPING FOR word, numword, hword, numhword, hword_part, hword_numpart, asciiword, asciihword, hword_asciipart WITH traxxx_dict, simple, english_stem; ALTER MAPPING FOR word, numword, hword, numhword, hword_part, hword_numpart, asciiword, asciihword, hword_asciipart WITH traxxx_dict, simple, english_stem;

View File

@ -144,7 +144,7 @@ function curateActor(actor, withDetails = false) {
name: actor.name, name: actor.name,
slug: actor.slug, slug: actor.slug,
gender: actor.gender, gender: actor.gender,
networkId: actor.network_id, networkId: actor.entity_id,
aliasFor: actor.alias_for, aliasFor: actor.alias_for,
dateOfBirth: actor.date_of_birth, dateOfBirth: actor.date_of_birth,
birthCountry: actor.birth_country_alpha2, birthCountry: actor.birth_country_alpha2,
@ -211,7 +211,7 @@ function curateActorEntry(baseActor, batchId) {
return { return {
name: baseActor.name, name: baseActor.name,
slug: baseActor.slug, slug: baseActor.slug,
network_id: null, entity_id: null,
batch_id: batchId, batch_id: batchId,
}; };
} }
@ -225,7 +225,7 @@ function curateProfileEntry(profile) {
...(profile.update !== false && { id: profile.update }), ...(profile.update !== false && { id: profile.update }),
actor_id: profile.id, actor_id: profile.id,
site_id: profile.site?.id || null, site_id: profile.site?.id || null,
network_id: profile.network?.id || null, entity_id: profile.network?.id || null,
date_of_birth: profile.dateOfBirth, date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath, date_of_death: profile.dateOfDeath,
gender: profile.gender, gender: profile.gender,
@ -577,14 +577,17 @@ async function scrapeActors(actorNames) {
const siteSlugs = sources.flat(); const siteSlugs = sources.flat();
const [networks, sites, existingActorEntries] = await Promise.all([ const [networks, sites, existingActorEntries] = await Promise.all([
knex('networks').whereIn('slug', siteSlugs), knex('entities')
knex('sites') .where('type', 2)
.whereIn('slug', siteSlugs),
knex('entities')
.select( .select(
'sites.*', 'entities.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.description as network_description', 'parents.parameters as network_parameters',
) )
.whereIn('sites.slug', siteSlugs) .where('type', 2)
.leftJoin('networks', 'sites.network_id', 'networks.id'), .whereIn('entities.slug', siteSlugs)
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id'),
knex('actors') knex('actors')
.select(['id', 'name', 'slug']) .select(['id', 'name', 'slug'])
.modify((queryBuilder) => { .modify((queryBuilder) => {
@ -612,8 +615,8 @@ async function scrapeActors(actorNames) {
...acc, ...acc,
[profile.actor_id]: { [profile.actor_id]: {
...acc[profile.actor_id], ...acc[profile.actor_id],
[profile.network_id]: { [profile.entity_id]: {
...acc[profile.network_id], ...acc[profile.entity_id],
[profile.site_id]: profile, [profile.site_id]: profile,
}, },
}, },
@ -644,17 +647,19 @@ async function scrapeActors(actorNames) {
} }
async function getOrCreateActors(baseActors, batchId) { async function getOrCreateActors(baseActors, batchId) {
console.log(baseActors);
const existingActors = await knex('actors') const existingActors = await knex('actors')
.select('id', 'alias_for', 'name', 'slug', 'network_id') .select('id', 'alias_for', 'name', 'slug', 'entity_id')
.whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('network_id') .whereNull('entity_id')
.orWhereIn(['slug', 'network_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id])); .orWhereIn(['slug', 'entity_id'], baseActors.map(baseActor => [baseActor.slug, baseActor.network.id]));
// const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug));
const existingActorSlugs = existingActors.reduce((acc, actor) => ({ const existingActorSlugs = existingActors.reduce((acc, actor) => ({
...acc, ...acc,
[actor.network_id]: { [actor.entity_id]: {
...acc[actor.network_id], ...acc[actor.entity_id],
[actor.slug]: true, [actor.slug]: true,
}, },
}), {}); }), {});
@ -662,7 +667,7 @@ async function getOrCreateActors(baseActors, batchId) {
const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.network.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]);
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'network_id']); const newActors = await knex('actors').insert(curatedActorEntries, ['id', 'alias_for', 'name', 'slug', 'entity_id']);
if (Array.isArray(newActors)) { if (Array.isArray(newActors)) {
return newActors.concat(existingActors); return newActors.concat(existingActors);
@ -732,7 +737,7 @@ async function fetchActor(actorId) {
queryBuilder.where('actors.id', actorId); queryBuilder.where('actors.id', actorId);
}) })
.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for') .leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for')
.leftJoin('networks', 'networks.id', 'actors.network_id') .leftJoin('networks', 'networks.id', 'actors.entity_id')
.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2') .leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2')
.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2') .leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2')
.leftJoin('media', 'media.id', 'actors.avatar_media_id') .leftJoin('media', 'media.id', 'actors.avatar_media_id')

View File

@ -34,12 +34,15 @@ async function findSites(baseReleases) {
.filter(Boolean), .filter(Boolean),
)); ));
const siteEntries = await knex('sites') const siteEntries = await knex('entities')
.leftJoin('networks', 'networks.id', 'sites.network_id') .leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description') .select('entities.*', 'parents.id as network_id', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description')
.whereIn('sites.slug', siteSlugs); .where('entities.type', 2)
.whereIn('entities.slug', siteSlugs);
const networkEntries = await knex('networks').whereIn('slug', siteSlugs); const networkEntries = await knex('entities')
.where('type', 1)
.whereIn('slug', siteSlugs);
const sites = await curateSites(siteEntries, true, false); const sites = await curateSites(siteEntries, true, false);
const networks = await curateNetworks(networkEntries, true, false, false); const networks = await curateNetworks(networkEntries, true, false, false);

View File

@ -17,7 +17,10 @@ function curateEntity(entity, includeParameters = false) {
type: entity.type, type: entity.type,
parameters: includeParameters ? entity.parameters : null, parameters: includeParameters ? entity.parameters : null,
parent: entity.parent, parent: entity.parent,
children: (entity.children || []).map(child => curateEntity(child)), children: (entity.children || []).map(child => curateEntity({
...child,
parent: entity,
})),
}; };
return curatedEntity; return curatedEntity;
@ -28,7 +31,8 @@ async function curateEntities(entities, includeParameters) {
} }
async function fetchSitesFromArgv() { async function fetchSitesFromArgv() {
const rawEntities = await knex.raw(` const rawNetworks = await knex.raw(`
/* networks from argument with sites as children */
WITH RECURSIVE temp AS ( WITH RECURSIVE temp AS (
SELECT SELECT
id, parent_id, name, slug, type, url, description, parameters id, parent_id, name, slug, type, url, description, parameters
@ -57,8 +61,10 @@ async function fetchSitesFromArgv() {
GROUP BY GROUP BY
temp.parent_id, entities.id, entities.name, parents.id temp.parent_id, entities.id, entities.name, parents.id
UNION ALL UNION ALL
/* sites from argument as the child of network with parent */
SELECT SELECT
entities.*, row_to_json(parents) as parent, json_build_array(row_to_json(children)) entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
FROM FROM
entities AS children entities AS children
LEFT JOIN LEFT JOIN
@ -68,15 +74,13 @@ async function fetchSitesFromArgv() {
WHERE WHERE
children.slug = ANY(?) AND children.type = 2 children.slug = ANY(?) AND children.type = 2
GROUP BY GROUP BY
entities.id, parents.id, children.id; entities.id, parents.id;
`, [argv.networks || [], argv.sites || []]); `, [argv.networks || [], argv.sites || []]);
const curatedEntities = await curateEntities(rawEntities.rows, true); const curatedNetworks = await curateEntities(rawNetworks.rows, true);
logger.info(`Found ${curatedEntities.length} entities in database`); logger.info(`Found ${curatedNetworks.length} networks in database`);
console.log(rawEntities.rows); return curatedNetworks;
return curatedEntities;
} }
async function fetchSitesFromConfig() { async function fetchSitesFromConfig() {

View File

@ -20,8 +20,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
const curatedRelease = { const curatedRelease = {
title: release.title, title: release.title,
entry_id: release.entryId || null, entry_id: release.entryId || null,
site_id: release.site?.id, entity_id: release.site?.id,
network_id: release.site ? null : release.network?.id, // prefer site ID if available
shoot_id: release.shootId || null, shoot_id: release.shootId || null,
studio_id: release.studio?.id || null, studio_id: release.studio?.id || null,
url: release.url, url: release.url,
@ -49,10 +48,10 @@ function curateReleaseEntry(release, batchId, existingRelease) {
async function attachChannelSites(releases) { async function attachChannelSites(releases) {
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel)); const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel));
const channelSites = await knex('sites') const channelSites = await knex('entities')
.leftJoin('networks', 'networks.id', 'sites.network_id') .leftJoin('entities AS parents', 'parents.id', 'entities.parent_id')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.parameters as network_parameters', 'networks.description as network_description') .select('entities.*', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description')
.whereIn('sites.slug', releasesWithoutSite.map(release => release.channel)); .whereIn('entities.slug', releasesWithoutSite.map(release => release.channel));
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
@ -71,7 +70,6 @@ async function attachChannelSites(releases) {
return release; return release;
} }
if (release.site && release.site.isNetwork) { if (release.site && release.site.isNetwork) {
return { return {
...release, ...release,
@ -114,8 +112,8 @@ async function attachStudios(releases) {
function attachReleaseIds(releases, storedReleases) { function attachReleaseIds(releases, storedReleases) {
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => { const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {}; if (!acc[release.entity_id]) acc[release.entity_id] = {};
acc[release.site_id][release.entry_id] = release.id; acc[release.entity_id][release.entry_id] = release.id;
return acc; return acc;
}, {}); }, {});
@ -152,11 +150,11 @@ async function filterDuplicateReleases(releases) {
const internalUniqueReleases = filterInternalDuplicateReleases(releases); const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const duplicateReleaseEntries = await knex('releases') const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id])); .whereIn(['entry_id', 'entity_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id]));
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {}; if (!acc[release.entity_id]) acc[release.entity_id] = {};
acc[release.site_id][release.entry_id] = true; acc[release.entity_id][release.entry_id] = true;
return acc; return acc;
}, {}); }, {});
@ -180,13 +178,13 @@ async function updateReleasesSearch(releaseIds) {
TO_TSVECTOR( TO_TSVECTOR(
'traxxx', 'traxxx',
COALESCE(releases.title, '') || ' ' || COALESCE(releases.title, '') || ' ' ||
networks.name || ' ' || parents.name || ' ' ||
networks.slug || ' ' || parents.slug || ' ' ||
networks.url || ' ' || parents.url || ' ' ||
sites.name || ' ' || entities.name || ' ' ||
sites.slug || ' ' || entities.slug || ' ' ||
COALESCE(sites.url, '') || ' ' || COALESCE(entities.url, '') || ' ' ||
COALESCE(sites.alias, '') || ' ' || COALESCE(entities.alias, '') || ' ' ||
COALESCE(releases.shoot_id, '') || ' ' || COALESCE(releases.shoot_id, '') || ' ' ||
COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' || COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' ||
STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' ||
@ -194,15 +192,15 @@ async function updateReleasesSearch(releaseIds) {
STRING_AGG(COALESCE(tags_aliases.name, ''), ' ') STRING_AGG(COALESCE(tags_aliases.name, ''), ' ')
) as document ) as document
FROM releases FROM releases
LEFT JOIN sites ON releases.site_id = sites.id LEFT JOIN entities ON releases.entity_id = entities.id
LEFT JOIN networks ON sites.network_id = networks.id LEFT JOIN entities AS parents ON parents.id = entities.parent_id
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 7 LEFT JOIN tags ON local_tags.tag_id = tags.id AND tags.priority >= 7
LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for AND tags_aliases.secondary = true
${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''}
GROUP BY releases.id, sites.name, sites.slug, sites.alias, sites.url, networks.name, networks.slug, networks.url; GROUP BY releases.id, entities.name, entities.slug, entities.alias, entities.url, parents.name, parents.slug, parents.url;
`, releaseIds && [releaseIds]); `, releaseIds && [releaseIds]);
if (documents.rows?.length > 0) { if (documents.rows?.length > 0) {

View File

@ -30,14 +30,14 @@ async function filterUniqueReleases(latestReleases, accReleases) {
.map(release => [release.site.id, release.entryId]); .map(release => [release.site.id, release.entryId]);
const duplicateReleases = await knex('releases') const duplicateReleases = await knex('releases')
.whereIn(['site_id', 'entry_id'], latestReleaseIdentifiers); .whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers);
// add entry IDs of accumulated releases to prevent an infinite scrape loop // add entry IDs of accumulated releases to prevent an infinite scrape loop
// when one page contains the same release as the previous // when one page contains the same release as the previous
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
.concat(accReleases) .concat(accReleases)
.reduce((acc, release) => { .reduce((acc, release) => {
const siteId = release.site_id || release.site.id; const siteId = release.entity_id || release.site.id;
const entryId = release.entry_id || release.entryId; const entryId = release.entry_id || release.entryId;
if (!acc[siteId]) acc[siteId] = {}; if (!acc[siteId]) acc[siteId] = {};
@ -85,7 +85,7 @@ async function scrapeReleases(scraper, site, preData, upcoming = false) {
if (!Array.isArray(latestReleases)) { if (!Array.isArray(latestReleases)) {
// scraper is unable to fetch the releases and returned a HTTP code or null // scraper is unable to fetch the releases and returned a HTTP code or null
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.network.name})`); logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.parent?.name})`);
return accReleases; return accReleases;
} }
@ -102,7 +102,7 @@ async function scrapeReleases(scraper, site, preData, upcoming = false) {
const pageAccReleases = accReleases.concat(uniqueReleases); const pageAccReleases = accReleases.concat(uniqueReleases);
logger.verbose(`Scraped '${site.name}' (${site.network.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`); logger.verbose(`Scraped '${site.name}' (${site.parent?.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
if (needNextPage(uniqueReleases, pageAccReleases)) { if (needNextPage(uniqueReleases, pageAccReleases)) {
return scrapePage(page + 1, pageAccReleases); return scrapePage(page + 1, pageAccReleases);
@ -135,7 +135,7 @@ async function scrapeLatestReleases(scraper, site, preData) {
try { try {
return await scrapeReleases(scraper, site, preData, false); return await scrapeReleases(scraper, site, preData, false);
} catch (error) { } catch (error) {
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.network.slug}): ${error.message}`); logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.parent?.slug}): ${error.message}`);
} }
return []; return [];
@ -149,7 +149,7 @@ async function scrapeUpcomingReleases(scraper, site, preData) {
try { try {
return await scrapeReleases(scraper, site, preData, true); return await scrapeReleases(scraper, site, preData, true);
} catch (error) { } catch (error) {
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.network.slug}): ${error.message}`); logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.parent?.slug}): ${error.message}`);
} }
return []; return [];
@ -165,18 +165,18 @@ async function scrapeSiteReleases(scraper, site, preData) {
: [], : [],
]); ]);
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.network.name})`); logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.parent.name})`);
return [...latestReleases, ...upcomingReleases]; return [...latestReleases, ...upcomingReleases];
} }
async function scrapeSite(site, accSiteReleases) { async function scrapeSite(site, accSiteReleases) {
const scraper = scrapers.releases[site.slug] const scraper = scrapers.releases[site.slug]
|| scrapers.releases[site.network.slug] || scrapers.releases[site.parent?.slug]
|| scrapers.releases[site.network.parent?.slug]; || scrapers.releases[site.parent?.parent?.slug];
if (!scraper) { if (!scraper) {
logger.warn(`No scraper found for '${site.name}' (${site.network.name})`); logger.warn(`No scraper found for '${site.name}' (${site.parent.name})`);
return []; return [];
} }
@ -211,7 +211,7 @@ async function scrapeNetworkSequential(network) {
async function scrapeNetworkParallel(network) { async function scrapeNetworkParallel(network) {
return Promise.map( return Promise.map(
network.sites, network.children,
async site => scrapeSite(site, network), async site => scrapeSite(site, network),
{ concurrency: 3 }, { concurrency: 3 },
); );
@ -222,8 +222,6 @@ async function fetchUpdates() {
? await fetchSitesFromArgv() ? await fetchSitesFromArgv()
: await fetchSitesFromConfig(); : await fetchSitesFromConfig();
// console.log('included', includedNetworks);
const scrapedNetworks = await Promise.map( const scrapedNetworks = await Promise.map(
includedNetworks, includedNetworks,
async network => (network.parameters?.sequential async network => (network.parameters?.sequential