2019-11-16 02:33:36 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const config = require('config');
|
2020-01-02 23:59:02 +00:00
|
|
|
const Promise = require('bluebird');
|
2019-11-16 02:33:36 +00:00
|
|
|
|
2020-02-08 01:49:39 +00:00
|
|
|
const logger = require('./logger')(__filename);
|
2019-11-16 02:33:36 +00:00
|
|
|
const argv = require('./argv');
|
|
|
|
const scrapers = require('./scrapers/scrapers');
|
|
|
|
const { findSiteByUrl } = require('./sites');
|
|
|
|
const { findNetworkByUrl } = require('./networks');
|
2020-01-31 00:55:55 +00:00
|
|
|
const { storeReleases } = require('./releases');
|
2019-11-16 02:33:36 +00:00
|
|
|
|
|
|
|
async function findSite(url, release) {
|
2020-02-11 03:58:18 +00:00
|
|
|
if (release?.site) return release.site;
|
|
|
|
if (!url) return null;
|
|
|
|
|
|
|
|
const site = await findSiteByUrl(url);
|
2019-11-16 02:33:36 +00:00
|
|
|
|
|
|
|
if (site) {
|
|
|
|
return site;
|
|
|
|
}
|
|
|
|
|
|
|
|
const network = await findNetworkByUrl(url);
|
|
|
|
|
|
|
|
if (network) {
|
|
|
|
return {
|
|
|
|
...network,
|
2019-11-16 22:37:33 +00:00
|
|
|
network,
|
2019-11-16 02:33:36 +00:00
|
|
|
isFallback: true,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2020-02-12 00:54:54 +00:00
|
|
|
async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) {
|
2020-01-31 20:43:16 +00:00
|
|
|
// profile scraper may return either URLs or pre-scraped scenes
|
2020-02-13 22:05:28 +00:00
|
|
|
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
|
|
|
|
const url = sourceIsUrlOrEmpty ? source : source?.url;
|
|
|
|
const release = sourceIsUrlOrEmpty ? basicRelease : source;
|
2020-01-31 20:43:16 +00:00
|
|
|
|
2020-02-13 22:05:28 +00:00
|
|
|
const site = basicRelease?.site || await findSite(url, release);
|
2019-12-06 23:42:47 +00:00
|
|
|
|
2019-11-16 02:33:36 +00:00
|
|
|
if (!site) {
|
2020-02-09 02:09:06 +00:00
|
|
|
throw new Error(`Could not find site ${url} in database`);
|
2019-11-16 02:33:36 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 22:25:10 +00:00
|
|
|
if (!argv.deep && release) {
|
|
|
|
return {
|
|
|
|
...release,
|
|
|
|
site,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-12-06 23:42:47 +00:00
|
|
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
|
|
|
|
2019-11-16 02:33:36 +00:00
|
|
|
if (!scraper) {
|
|
|
|
throw new Error('Could not find scraper for URL');
|
|
|
|
}
|
|
|
|
|
2019-12-15 04:42:51 +00:00
|
|
|
if (type === 'scene' && !scraper.fetchScene) {
|
2020-02-22 02:22:30 +00:00
|
|
|
if (release) {
|
|
|
|
logger.warn(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2019-12-13 02:28:52 +00:00
|
|
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
2019-11-16 02:33:36 +00:00
|
|
|
}
|
|
|
|
|
2019-12-15 04:42:51 +00:00
|
|
|
if (type === 'movie' && !scraper.fetchMovie) {
|
2020-02-22 02:22:30 +00:00
|
|
|
if (release) {
|
|
|
|
logger.warn(`The '${site.name}'-scraper cannot fetch individual movies`);
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2019-12-13 02:28:52 +00:00
|
|
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
|
|
|
|
}
|
|
|
|
|
2020-01-31 00:55:55 +00:00
|
|
|
if (!release) {
|
|
|
|
logger.info(`Scraping release from ${url}`);
|
|
|
|
}
|
|
|
|
|
2019-12-15 04:42:51 +00:00
|
|
|
const scrapedRelease = type === 'scene'
|
2020-02-12 00:54:54 +00:00
|
|
|
? await scraper.fetchScene(url, site, release, preflight)
|
|
|
|
: await scraper.fetchMovie(url, site, release, preflight);
|
2019-12-15 04:42:51 +00:00
|
|
|
|
2020-01-28 02:05:53 +00:00
|
|
|
return {
|
2020-01-31 20:43:16 +00:00
|
|
|
...release,
|
2020-02-01 02:30:11 +00:00
|
|
|
...scrapedRelease,
|
2020-02-01 00:15:40 +00:00
|
|
|
...(scrapedRelease && release?.tags && {
|
|
|
|
tags: release.tags.concat(scrapedRelease.tags),
|
|
|
|
}),
|
2020-01-28 02:05:53 +00:00
|
|
|
site,
|
|
|
|
};
|
2020-01-02 23:59:02 +00:00
|
|
|
}
|
|
|
|
|
2020-02-12 00:54:54 +00:00
|
|
|
async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) {
|
|
|
|
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), {
|
2020-01-02 23:59:02 +00:00
|
|
|
concurrency: 5,
|
2020-02-22 02:22:30 +00:00
|
|
|
}).filter(Boolean);
|
2019-11-16 02:33:36 +00:00
|
|
|
|
2020-01-02 23:59:02 +00:00
|
|
|
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
|
|
|
|
|
2020-02-06 22:51:13 +00:00
|
|
|
if (argv.scene && argv.inspect) {
|
|
|
|
// only show when fetching from URL
|
|
|
|
console.log(curatedReleases);
|
|
|
|
}
|
|
|
|
|
2020-01-02 23:59:02 +00:00
|
|
|
if (argv.save) {
|
2019-12-31 02:12:52 +00:00
|
|
|
/*
|
2019-12-15 04:42:51 +00:00
|
|
|
const movie = scrapedRelease.movie
|
|
|
|
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
|
|
|
|
: null;
|
|
|
|
|
|
|
|
if (movie) {
|
|
|
|
const { releases: [storedMovie] } = await storeReleases([movie]);
|
|
|
|
curatedRelease.parentId = storedMovie.id;
|
|
|
|
}
|
2019-12-31 02:12:52 +00:00
|
|
|
*/
|
2019-12-15 04:42:51 +00:00
|
|
|
|
2020-01-02 23:59:02 +00:00
|
|
|
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
2019-11-16 02:33:36 +00:00
|
|
|
|
2020-01-02 23:59:02 +00:00
|
|
|
if (storedReleases) {
|
2020-02-04 02:12:09 +00:00
|
|
|
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
2019-12-09 04:00:49 +00:00
|
|
|
}
|
2019-11-16 02:33:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-02 23:59:02 +00:00
|
|
|
module.exports = {
|
|
|
|
scrapeRelease,
|
|
|
|
scrapeReleases,
|
|
|
|
};
|