114 lines
3.1 KiB
JavaScript
114 lines
3.1 KiB
JavaScript
'use strict';
|
|
|
|
const config = require('config');
|
|
const Promise = require('bluebird');
|
|
|
|
const logger = require('./logger');
|
|
const argv = require('./argv');
|
|
const scrapers = require('./scrapers/scrapers');
|
|
const { findSiteByUrl } = require('./sites');
|
|
const { findNetworkByUrl } = require('./networks');
|
|
const { storeReleases } = require('./releases');
|
|
|
|
async function findSite(url, release) {
|
|
const site = (release && release.site) || await findSiteByUrl(url);
|
|
|
|
if (site) {
|
|
return site;
|
|
}
|
|
|
|
const network = await findNetworkByUrl(url);
|
|
|
|
if (network) {
|
|
return {
|
|
...network,
|
|
network,
|
|
isFallback: true,
|
|
};
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
|
// profile scraper may return either URLs or pre-scraped scenes
|
|
const sourceIsUrl = typeof source === 'string';
|
|
const url = sourceIsUrl ? source : source.url;
|
|
const release = sourceIsUrl ? basicRelease : source;
|
|
|
|
const site = await findSite(url, release);
|
|
|
|
if (!site) {
|
|
throw new Error('Could not find site in database');
|
|
}
|
|
|
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
|
|
|
if (!scraper) {
|
|
throw new Error('Could not find scraper for URL');
|
|
}
|
|
|
|
if (type === 'scene' && !scraper.fetchScene) {
|
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
|
}
|
|
|
|
if (type === 'movie' && !scraper.fetchMovie) {
|
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
|
|
}
|
|
|
|
if (!release) {
|
|
logger.info(`Scraping release from ${url}`);
|
|
}
|
|
|
|
const scrapedRelease = type === 'scene'
|
|
? await scraper.fetchScene(url, site, release)
|
|
: await scraper.fetchMovie(url, site, release);
|
|
|
|
return {
|
|
url,
|
|
...release,
|
|
...scrapedRelease,
|
|
...(scrapedRelease && release?.tags && {
|
|
tags: release.tags.concat(scrapedRelease.tags),
|
|
}),
|
|
site,
|
|
};
|
|
}
|
|
|
|
async function scrapeReleases(sources, release = null, type = 'scene') {
|
|
const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type), {
|
|
concurrency: 5,
|
|
});
|
|
|
|
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
|
|
|
|
if (argv.scene && argv.inspect) {
|
|
// only show when fetching from URL
|
|
console.log(curatedReleases);
|
|
}
|
|
|
|
if (argv.save) {
|
|
/*
|
|
const movie = scrapedRelease.movie
|
|
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
|
|
: null;
|
|
|
|
if (movie) {
|
|
const { releases: [storedMovie] } = await storeReleases([movie]);
|
|
curatedRelease.parentId = storedMovie.id;
|
|
}
|
|
*/
|
|
|
|
const { releases: storedReleases } = await storeReleases(curatedReleases);
|
|
|
|
if (storedReleases) {
|
|
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
scrapeRelease,
|
|
scrapeReleases,
|
|
};
|