Refactored deep and store modules to use entities.
This commit is contained in:
64
src/deep.js
64
src/deep.js
@@ -7,8 +7,6 @@ const include = require('./utils/argv-include')(argv);
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const scrapers = require('./scrapers/scrapers');
|
||||
const { curateSites } = require('./sites');
|
||||
const { curateNetworks } = require('./networks');
|
||||
|
||||
function urlToSiteSlug(url) {
|
||||
try {
|
||||
@@ -19,40 +17,31 @@ function urlToSiteSlug(url) {
|
||||
|
||||
return slug;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to derive site slug from '${url}': ${error.message}`);
|
||||
logger.warn(`Failed to derive entity slug from '${url}': ${error.message}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function findSites(baseReleases) {
|
||||
const baseReleasesWithoutSite = baseReleases.filter(release => release.url && !release.site);
|
||||
async function findEntities(baseReleases) {
|
||||
const baseReleasesWithoutEntity = baseReleases.filter(release => release.url && !release.site && !release.entity);
|
||||
|
||||
const siteSlugs = Array.from(new Set(
|
||||
baseReleasesWithoutSite
|
||||
const entitySlugs = Array.from(new Set(
|
||||
baseReleasesWithoutEntity
|
||||
.map(baseRelease => urlToSiteSlug(baseRelease.url))
|
||||
.filter(Boolean),
|
||||
));
|
||||
|
||||
const siteEntries = await knex('entities')
|
||||
const entities = await knex('entities')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.select('entities.*', 'parents.id as network_id', 'parents.name as network_name', 'parents.slug as network_slug', 'parents.url as network_url', 'parents.parameters as network_parameters', 'parents.description as network_description')
|
||||
.where('entities.type', 2)
|
||||
.whereIn('entities.slug', siteSlugs);
|
||||
.whereIn('entities.slug', entitySlugs)
|
||||
.orderBy('entities.type', 'asc');
|
||||
|
||||
const networkEntries = await knex('entities')
|
||||
.where('type', 1)
|
||||
.whereIn('slug', siteSlugs);
|
||||
// channel entity will overwrite network entity
|
||||
const entitiesBySlug = entities.reduce((accEntities, entity) => ({ ...accEntities, [entity.slug]: entity }), {});
|
||||
|
||||
const sites = await curateSites(siteEntries, true, false);
|
||||
const networks = await curateNetworks(networkEntries, true, false, false);
|
||||
const markedNetworks = networks.map(network => ({ ...network, isNetwork: true }));
|
||||
|
||||
const sitesBySlug = []
|
||||
.concat(markedNetworks, sites)
|
||||
.reduce((accSites, site) => ({ ...accSites, [site.slug]: site }), {});
|
||||
|
||||
return sitesBySlug;
|
||||
return entitiesBySlug;
|
||||
}
|
||||
|
||||
function toBaseReleases(baseReleasesOrUrls) {
|
||||
@@ -92,23 +81,22 @@ function toBaseReleases(baseReleasesOrUrls) {
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
|
||||
async function scrapeRelease(baseRelease, entities, type = 'scene') {
|
||||
const entity = baseRelease.entity || baseRelease.site || entities[urlToSiteSlug(baseRelease.url)];
|
||||
|
||||
if (!site) {
|
||||
logger.warn(`No site available for ${baseRelease.url}`);
|
||||
if (!entity) {
|
||||
logger.warn(`No entity available for ${baseRelease.url}`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) {
|
||||
return {
|
||||
...baseRelease,
|
||||
site,
|
||||
entity,
|
||||
};
|
||||
}
|
||||
|
||||
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
const scraper = scrapers.releases[entity.slug] || scrapers.releases[entity.parent?.slug];
|
||||
|
||||
if (!scraper) {
|
||||
logger.warn(`Could not find scraper for ${baseRelease.url}`);
|
||||
@@ -116,7 +104,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
}
|
||||
|
||||
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
|
||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||
logger.warn(`The '${entity.name}'-scraper cannot fetch individual ${type}s`);
|
||||
return baseRelease;
|
||||
}
|
||||
|
||||
@@ -124,14 +112,14 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
logger.verbose(`Fetching ${type} ${baseRelease.url}`);
|
||||
|
||||
const scrapedRelease = type === 'scene'
|
||||
? await scraper.fetchScene(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include)
|
||||
: await scraper.fetchMovie(baseRelease.url, siteWithFallbackNetwork, baseRelease, null, include);
|
||||
? await scraper.fetchScene(baseRelease.url, entity, baseRelease, null, include)
|
||||
: await scraper.fetchMovie(baseRelease.url, entity, baseRelease, null, include);
|
||||
|
||||
const mergedRelease = {
|
||||
...baseRelease,
|
||||
...scrapedRelease,
|
||||
deep: !!scrapedRelease,
|
||||
site,
|
||||
entity,
|
||||
};
|
||||
|
||||
if (!mergedRelease.entryId) {
|
||||
@@ -155,19 +143,19 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeReleases(baseReleases, sites, type) {
|
||||
async function scrapeReleases(baseReleases, entities, type) {
|
||||
return Promise.map(
|
||||
baseReleases,
|
||||
async baseRelease => scrapeRelease(baseRelease, sites, type),
|
||||
async baseRelease => scrapeRelease(baseRelease, entities, type),
|
||||
{ concurrency: 10 },
|
||||
);
|
||||
}
|
||||
|
||||
async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
|
||||
const baseReleases = toBaseReleases(baseReleasesOrUrls);
|
||||
const sites = await findSites(baseReleases);
|
||||
const entities = await findEntities(baseReleases);
|
||||
|
||||
const deepReleases = await scrapeReleases(baseReleases, sites, type);
|
||||
const deepReleases = await scrapeReleases(baseReleases, entities, type);
|
||||
|
||||
return deepReleases.filter(Boolean);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user