traxxx/src/scrape-releases.js

91 lines
2.4 KiB
JavaScript
Raw Normal View History

'use strict';
const config = require('config');
2020-01-02 23:59:02 +00:00
const Promise = require('bluebird');
const argv = require('./argv');
const scrapers = require('./scrapers/scrapers');
2019-11-17 02:56:45 +00:00
const { storeReleases } = require('./releases');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
async function findSite(url, release) {
const site = (release && release.site) || await findSiteByUrl(url);
if (site) {
return site;
}
const network = await findNetworkByUrl(url);
if (network) {
return {
...network,
network,
isFallback: true,
};
}
return null;
}
2020-01-02 23:59:02 +00:00
async function scrapeRelease(url, release, type = 'scene') {
const site = await findSite(url, release);
if (!site) {
throw new Error('Could not find site in database');
}
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
throw new Error('Could not find scraper for URL');
}
if (type === 'scene' && !scraper.fetchScene) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
}
if (type === 'movie' && !scraper.fetchMovie) {
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
}
const scrapedRelease = type === 'scene'
? await scraper.fetchScene(url, site, release)
: await scraper.fetchMovie(url, site, release);
2020-01-02 23:59:02 +00:00
return scrapedRelease;
}
async function scrapeReleases(urls, release, type = 'scene') {
const scrapedReleases = await Promise.map(urls, async url => scrapeRelease(url, release, type), {
concurrency: 5,
});
2020-01-02 23:59:02 +00:00
const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type }));
if (argv.save) {
/*
const movie = scrapedRelease.movie
? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
: null;
if (movie) {
const { releases: [storedMovie] } = await storeReleases([movie]);
curatedRelease.parentId = storedMovie.id;
}
*/
2020-01-02 23:59:02 +00:00
const { releases: storedReleases } = await storeReleases(curatedReleases);
2020-01-02 23:59:02 +00:00
if (storedReleases) {
console.log(storedReleases.map(storedRelease => `http://${config.web.host}:${config.web.port}/scene/${storedRelease.id}`).join('\n'));
2019-12-09 04:00:49 +00:00
}
}
}
2020-01-02 23:59:02 +00:00
module.exports = {
scrapeRelease,
scrapeReleases,
};