Refactored various modules for entities. Updated and refactored Kink scraper.
This commit is contained in:
@@ -24,7 +24,7 @@ async function init() {
|
||||
const actors = argv.actors && await scrapeActors(argv.actors);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||
|
||||
const updateBaseScenes = (argv.scrape || argv.sites || argv.networks) && await fetchUpdates();
|
||||
const updateBaseScenes = (argv.scrape || argv.channels || argv.networks) && await fetchUpdates();
|
||||
|
||||
const deepScenes = argv.deep
|
||||
? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
|
||||
15
src/argv.js
15
src/argv.js
@@ -11,23 +11,18 @@ const { argv } = yargs
|
||||
alias: 'web',
|
||||
})
|
||||
.option('scrape', {
|
||||
describe: 'Scrape sites and networks defined in configuration',
|
||||
describe: 'Scrape channels and networks defined in configuration',
|
||||
type: 'boolean',
|
||||
})
|
||||
.option('networks', {
|
||||
describe: 'Networks to scrape (overrides configuration)',
|
||||
describe: 'Network to scrape all channels from (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'network',
|
||||
})
|
||||
.option('sites', {
|
||||
describe: 'Sites to scrape (overrides configuration)',
|
||||
.option('channels', {
|
||||
describe: 'Channel to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'site',
|
||||
})
|
||||
.option('entities', {
|
||||
describe: 'Networks or sites to scrape (overrides configuration)',
|
||||
type: 'array',
|
||||
alias: 'entity',
|
||||
alias: 'channel',
|
||||
})
|
||||
.option('actors', {
|
||||
describe: 'Scrape actors by name or slug',
|
||||
|
||||
@@ -20,7 +20,7 @@ function curateEntity(entity, includeParameters = false) {
|
||||
children: (entity.children || []).map(child => curateEntity({
|
||||
...child,
|
||||
parent: entity,
|
||||
})),
|
||||
}, includeParameters)),
|
||||
};
|
||||
|
||||
return curatedEntity;
|
||||
@@ -30,39 +30,40 @@ async function curateEntities(entities, includeParameters) {
|
||||
return Promise.all(entities.map(async entity => curateEntity(entity, includeParameters)));
|
||||
}
|
||||
|
||||
async function fetchSitesFromArgv() {
|
||||
async function fetchChannelsFromArgv() {
|
||||
const rawNetworks = await knex.raw(`
|
||||
/* networks from argument with sites as children */
|
||||
WITH RECURSIVE temp AS (
|
||||
/* networks from argument with channels as children */
|
||||
WITH RECURSIVE children AS (
|
||||
SELECT
|
||||
id, parent_id, name, slug, type, url, description, parameters
|
||||
FROM
|
||||
entities
|
||||
WHERE
|
||||
slug = ANY(?) AND entities.type = 1
|
||||
slug = ANY(?) AND entities.type = 'network'
|
||||
UNION ALL
|
||||
SELECT
|
||||
entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters
|
||||
FROM
|
||||
entities
|
||||
INNER JOIN
|
||||
temp ON temp.id = entities.parent_id
|
||||
children ON children.id = entities.parent_id
|
||||
)
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(temp) as children
|
||||
entities.*, row_to_json(parents) as parent, json_agg(children) as children
|
||||
FROM
|
||||
temp
|
||||
children
|
||||
LEFT JOIN
|
||||
entities ON entities.id = temp.parent_id
|
||||
entities ON entities.id = children.parent_id
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
temp.type = 2
|
||||
children.type = 'channel'
|
||||
GROUP BY
|
||||
temp.parent_id, entities.id, entities.name, parents.id
|
||||
children.parent_id, entities.id, entities.name, parents.id
|
||||
|
||||
UNION ALL
|
||||
|
||||
/* sites from argument as the child of network with parent */
|
||||
/* channels from argument as the child of network with parent */
|
||||
SELECT
|
||||
entities.*, row_to_json(parents) as parent, json_agg(row_to_json(children))
|
||||
FROM
|
||||
@@ -72,10 +73,10 @@ async function fetchSitesFromArgv() {
|
||||
LEFT JOIN
|
||||
entities AS parents ON parents.id = entities.parent_id
|
||||
WHERE
|
||||
children.slug = ANY(?) AND children.type = 2
|
||||
children.slug = ANY(?) AND children.type = 'channel'
|
||||
GROUP BY
|
||||
entities.id, parents.id;
|
||||
`, [argv.networks || [], argv.sites || []]);
|
||||
`, [argv.networks || [], argv.channels || []]);
|
||||
|
||||
const curatedNetworks = await curateEntities(rawNetworks.rows, true);
|
||||
logger.info(`Found ${curatedNetworks.length} networks in database`);
|
||||
@@ -83,10 +84,10 @@ async function fetchSitesFromArgv() {
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchSitesFromConfig() {
|
||||
async function fetchChannelsFromConfig() {
|
||||
const rawSites = await knex('entities')
|
||||
.select('entities.*')
|
||||
.leftJoin('entities as entities_parents', 'entities_parents.id', 'entities.id')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.where((builder) => {
|
||||
if (config.include) {
|
||||
builder.whereIn('entities.slug', config.include);
|
||||
@@ -103,14 +104,14 @@ async function fetchSitesFromConfig() {
|
||||
}
|
||||
|
||||
async function fetchIncludedEntities() {
|
||||
if (argv.networks || argv.sites) {
|
||||
return fetchSitesFromArgv();
|
||||
if (argv.networks || argv.channels) {
|
||||
return fetchChannelsFromArgv();
|
||||
}
|
||||
|
||||
return fetchSitesFromConfig();
|
||||
return fetchChannelsFromConfig();
|
||||
}
|
||||
|
||||
async function fetchSites(queryObject) {
|
||||
async function fetchChannels(queryObject) {
|
||||
const sites = await knex('sites')
|
||||
.where(builder => whereOr(queryObject, 'sites', builder))
|
||||
.select(
|
||||
@@ -123,7 +124,7 @@ async function fetchSites(queryObject) {
|
||||
return curateEntities(sites);
|
||||
}
|
||||
|
||||
async function fetchSitesFromReleases() {
|
||||
async function fetchChannelsFromReleases() {
|
||||
const sites = await knex('releases')
|
||||
.select('site_id', '')
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
@@ -137,8 +138,8 @@ module.exports = {
|
||||
curateEntity,
|
||||
curateEntities,
|
||||
fetchIncludedEntities,
|
||||
fetchSites,
|
||||
fetchSitesFromConfig,
|
||||
fetchSitesFromArgv,
|
||||
fetchSitesFromReleases,
|
||||
fetchChannels,
|
||||
fetchChannelsFromConfig,
|
||||
fetchChannelsFromArgv,
|
||||
fetchChannelsFromReleases,
|
||||
};
|
||||
|
||||
@@ -1,117 +1,97 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
const { get, getAll } = require('../utils/qu');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.shoot-list .shoot').toArray();
|
||||
function scrapeLatest(scenes) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text().trim();
|
||||
const href = qu.url('.shoot-thumb-title a');
|
||||
release.url = `https://kink.com${href}`;
|
||||
|
||||
const poster = $(element).find('.adimage').attr('src');
|
||||
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
|
||||
release.shootId = href.split('/').slice(-1)[0];
|
||||
release.entryId = release.shootId;
|
||||
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
|
||||
release.title = qu.q('.shoot-thumb-title a', true);
|
||||
release.date = qu.date('.date', 'MMM DD, YYYY');
|
||||
|
||||
const timestamp = $(element).find('.video span').text();
|
||||
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
|
||||
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
|
||||
release.actors = qu.all('.shoot-thumb-models a', true);
|
||||
release.stars = qu.q('.average-rating', 'data-rating') / 10;
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
photos,
|
||||
poster,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
duration,
|
||||
site,
|
||||
};
|
||||
release.poster = qu.img('.adimage');
|
||||
release.photos = qu.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [
|
||||
photo.replace('410/', '830/'),
|
||||
photo,
|
||||
]);
|
||||
|
||||
release.duration = qu.dur('.video span');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
async function scrapeScene({ qu }, url) {
|
||||
const release = { url };
|
||||
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
|
||||
const actorsRaw = $('.shoot-info p.starring');
|
||||
release.shootId = new URL(url).pathname.split('/')[2];
|
||||
release.entryId = release.shootId;
|
||||
|
||||
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
|
||||
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
|
||||
const trailerPoster = $('.player video#kink-player').attr('poster');
|
||||
release.title = qu.q('.shoot-title span.favorite-button', 'data-title');
|
||||
release.description = qu.q('.description-text', true);
|
||||
|
||||
const date = moment.utc($(actorsRaw)
|
||||
.prev()
|
||||
.text()
|
||||
.trim()
|
||||
.replace('Date: ', ''),
|
||||
'MMMM DD, YYYY')
|
||||
.toDate();
|
||||
release.date = qu.date('.shoot-date', 'MMMM DD, YYYY');
|
||||
release.actors = qu.all('.names a', true).map(actor => actor.replace(/,\s*/, ''));
|
||||
release.director = qu.q('.director-name', true);
|
||||
|
||||
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.shoot-info .description').text().trim();
|
||||
release.photos = qu.imgs('.gallery .thumb img', 'data-image-file');
|
||||
release.poster = qu.poster();
|
||||
|
||||
const { average: stars } = ratingRes.body;
|
||||
release.tags = qu.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, ''));
|
||||
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const trailer = qu.q('.player span[data-type="trailer-src"]', 'data-url');
|
||||
|
||||
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const channel = siteSlug;
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
photos,
|
||||
poster: trailerPoster,
|
||||
trailer: {
|
||||
src: trailerVideo,
|
||||
release.trailer = [
|
||||
{
|
||||
src: trailer.replace('480p', '1080p'),
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('480p', '720p'),
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer,
|
||||
quality: 480,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
{
|
||||
src: trailer.replace('480p', '360p'),
|
||||
quality: 360,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
];
|
||||
|
||||
release.channel = qu.url('.shoot-logo a').split('/').slice(-1)[0];
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
|
||||
const res = await getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot');
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
if (res.ok) {
|
||||
return scrapeLatest(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
const res = await get(url);
|
||||
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { fetchSitesFromArgv, fetchSitesFromConfig } = require('./entities');
|
||||
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
|
||||
|
||||
const afterDate = (() => {
|
||||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
|
||||
@@ -27,7 +27,7 @@ const afterDate = (() => {
|
||||
|
||||
async function filterUniqueReleases(latestReleases, accReleases) {
|
||||
const latestReleaseIdentifiers = latestReleases
|
||||
.map(release => [release.site.id, release.entryId]);
|
||||
.map(release => [release.entity.id, release.entryId]);
|
||||
|
||||
const duplicateReleases = await knex('releases')
|
||||
.whereIn(['entity_id', 'entry_id'], latestReleaseIdentifiers);
|
||||
@@ -37,17 +37,17 @@ async function filterUniqueReleases(latestReleases, accReleases) {
|
||||
const duplicateReleasesSiteIdAndEntryIds = duplicateReleases
|
||||
.concat(accReleases)
|
||||
.reduce((acc, release) => {
|
||||
const siteId = release.entity_id || release.site.id;
|
||||
const entityId = release.entity_id || release.entity.id;
|
||||
const entryId = release.entry_id || release.entryId;
|
||||
|
||||
if (!acc[siteId]) acc[siteId] = {};
|
||||
acc[siteId][entryId] = true;
|
||||
if (!acc[entityId]) acc[entityId] = {};
|
||||
acc[entityId][entryId] = true;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const uniqueReleases = latestReleases
|
||||
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.site.id]?.[release.entryId]);
|
||||
.filter(release => !duplicateReleasesSiteIdAndEntryIds[release.entity.id]?.[release.entryId]);
|
||||
|
||||
return uniqueReleases;
|
||||
}
|
||||
@@ -77,15 +77,15 @@ function needNextPage(uniqueReleases, pageAccReleases) {
|
||||
return pageAccReleases.length <= argv.nullDateLimit;
|
||||
}
|
||||
|
||||
async function scrapeReleases(scraper, site, preData, upcoming = false) {
|
||||
async function scrapeReleases(scraper, entity, preData, upcoming = false) {
|
||||
const scrapePage = async (page = 1, accReleases = []) => {
|
||||
const latestReleases = upcoming
|
||||
? await scraper.fetchUpcoming(site, page, preData, include)
|
||||
: await scraper.fetchLatest(site, page, preData, include);
|
||||
? await scraper.fetchUpcoming(entity, page, preData, include)
|
||||
: await scraper.fetchLatest(entity, page, preData, include);
|
||||
|
||||
if (!Array.isArray(latestReleases)) {
|
||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${site.name}' (${site.parent?.name})`);
|
||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
@@ -94,15 +94,15 @@ async function scrapeReleases(scraper, site, preData, upcoming = false) {
|
||||
return accReleases;
|
||||
}
|
||||
|
||||
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site: release.site || site })); // attach site release is assigned to when stored
|
||||
const latestReleasesWithEntity = latestReleases.map(release => ({ ...release, entity })); // attach entity the release is assigned to when stored
|
||||
|
||||
const uniqueReleases = argv.redownload
|
||||
? latestReleasesWithSite
|
||||
: await filterUniqueReleases(latestReleasesWithSite, accReleases);
|
||||
? latestReleasesWithEntity
|
||||
: await filterUniqueReleases(latestReleasesWithEntity, accReleases);
|
||||
|
||||
const pageAccReleases = accReleases.concat(uniqueReleases);
|
||||
|
||||
logger.verbose(`Scraped '${site.name}' (${site.parent?.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
|
||||
logger.verbose(`Scraped '${entity.name}' (${entity.parent?.name}) ${upcoming ? 'upcoming' : 'latest'} page ${page}, found ${uniqueReleases.length} unique updates`);
|
||||
|
||||
if (needNextPage(uniqueReleases, pageAccReleases)) {
|
||||
return scrapePage(page + 1, pageAccReleases);
|
||||
@@ -127,70 +127,70 @@ async function scrapeReleases(scraper, site, preData, upcoming = false) {
|
||||
return releases.slice(0, argv.nullDateLimit);
|
||||
}
|
||||
|
||||
async function scrapeLatestReleases(scraper, site, preData) {
|
||||
async function scrapeLatestReleases(scraper, entity, preData) {
|
||||
if (!scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, false);
|
||||
return await scrapeReleases(scraper, entity, preData, false);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape latest updates for '${site.slug}' (${site.parent?.slug}): ${error.message}`);
|
||||
logger.warn(`Failed to scrape latest updates for '${entity.slug}' (${entity.parent?.slug}): ${error.message}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site, preData) {
|
||||
async function scrapeUpcomingReleases(scraper, entity, preData) {
|
||||
if (!scraper.fetchUpcoming) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
return await scrapeReleases(scraper, site, preData, true);
|
||||
return await scrapeReleases(scraper, entity, preData, true);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape upcoming updates for '${site.slug}' (${site.parent?.slug}): ${error.message}`);
|
||||
logger.warn(`Failed to scrape upcoming updates for '${entity.slug}' (${entity.parent?.slug}): ${error.message}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function scrapeSiteReleases(scraper, site, preData) {
|
||||
async function scrapeChannelReleases(scraper, channelEntity, preData) {
|
||||
const [latestReleases, upcomingReleases] = await Promise.all([
|
||||
argv.latest
|
||||
? scrapeLatestReleases(scraper, site, preData)
|
||||
? scrapeLatestReleases(scraper, channelEntity, preData)
|
||||
: [],
|
||||
argv.upcoming
|
||||
? scrapeUpcomingReleases(scraper, site, preData)
|
||||
? scrapeUpcomingReleases(scraper, channelEntity, preData)
|
||||
: [],
|
||||
]);
|
||||
|
||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${site.name}' (${site.parent.name})`);
|
||||
logger.info(`Fetching ${latestReleases.length} latest and ${upcomingReleases.length} upcoming updates for '${channelEntity.name}' (${channelEntity.parent.name})`);
|
||||
|
||||
return [...latestReleases, ...upcomingReleases];
|
||||
}
|
||||
|
||||
async function scrapeSite(site, accSiteReleases) {
|
||||
const scraper = scrapers.releases[site.slug]
|
||||
|| scrapers.releases[site.parent?.slug]
|
||||
|| scrapers.releases[site.parent?.parent?.slug];
|
||||
async function scrapeChannel(channelEntity, accNetworkReleases) {
|
||||
const scraper = scrapers.releases[channelEntity.slug]
|
||||
|| scrapers.releases[channelEntity.parent?.slug]
|
||||
|| scrapers.releases[channelEntity.parent?.parent?.slug];
|
||||
|
||||
if (!scraper) {
|
||||
logger.warn(`No scraper found for '${site.name}' (${site.parent.name})`);
|
||||
logger.warn(`No scraper found for '${channelEntity.name}' (${channelEntity.parent.name})`);
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site);
|
||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(channelEntity);
|
||||
|
||||
const siteReleases = await scrapeSiteReleases(scraper, site, {
|
||||
accSiteReleases,
|
||||
const channelEntityReleases = await scrapeChannelReleases(scraper, channelEntity, {
|
||||
accNetworkReleases,
|
||||
beforeFetchLatest,
|
||||
});
|
||||
|
||||
return siteReleases.map(release => ({ ...release, site }));
|
||||
return channelEntityReleases.map(release => ({ ...release, channelEntity }));
|
||||
} catch (error) {
|
||||
logger.error(`Failed to scrape releases from ${site.name} using ${scraper.slug}: ${error.message}`);
|
||||
logger.error(`Failed to scrape releases from ${channelEntity.name} using ${scraper.slug}: ${error.message}`);
|
||||
|
||||
return [];
|
||||
}
|
||||
@@ -199,11 +199,11 @@ async function scrapeSite(site, accSiteReleases) {
|
||||
async function scrapeNetworkSequential(networkEntity) {
|
||||
return Promise.reduce(
|
||||
networkEntity.children,
|
||||
async (chain, siteEntity) => {
|
||||
const accSiteReleases = await chain;
|
||||
const siteReleases = await scrapeSite(siteEntity, networkEntity, accSiteReleases);
|
||||
async (chain, channelEntity) => {
|
||||
const accNetworkReleases = await chain;
|
||||
const channelReleases = await scrapeChannel(channelEntity, networkEntity, accNetworkReleases);
|
||||
|
||||
return accSiteReleases.concat(siteReleases);
|
||||
return accNetworkReleases.concat(channelReleases);
|
||||
},
|
||||
Promise.resolve([]),
|
||||
);
|
||||
@@ -212,21 +212,21 @@ async function scrapeNetworkSequential(networkEntity) {
|
||||
async function scrapeNetworkParallel(networkEntity) {
|
||||
return Promise.map(
|
||||
networkEntity.children,
|
||||
async siteEntity => scrapeSite(siteEntity, networkEntity),
|
||||
async channelEntity => scrapeChannel(channelEntity, networkEntity),
|
||||
{ concurrency: 3 },
|
||||
);
|
||||
}
|
||||
|
||||
async function fetchUpdates() {
|
||||
const includedNetworks = argv.sites || argv.networks || argv.from
|
||||
? await fetchSitesFromArgv()
|
||||
: await fetchSitesFromConfig();
|
||||
const includedNetworks = argv.channels || argv.networks
|
||||
? await fetchChannelsFromArgv()
|
||||
: await fetchChannelsFromConfig();
|
||||
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
async network => (network.parameters?.sequential
|
||||
? scrapeNetworkSequential(network)
|
||||
: scrapeNetworkParallel(network)),
|
||||
async networkEntity => (networkEntity.parameters?.sequential
|
||||
? scrapeNetworkSequential(networkEntity)
|
||||
: scrapeNetworkParallel(networkEntity)),
|
||||
{ concurrency: 5 },
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user