'use strict'; const config = require('config'); const Promise = require('bluebird'); const logger = require('./logger'); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const { findSiteByUrl } = require('./sites'); const { findNetworkByUrl } = require('./networks'); const { storeReleases } = require('./releases'); async function findSite(url, release) { const site = (release && release.site) || await findSiteByUrl(url); if (site) { return site; } const network = await findNetworkByUrl(url); if (network) { return { ...network, network, isFallback: true, }; } return null; } async function scrapeRelease(source, basicRelease = null, type = 'scene') { // profile scraper may return either URLs or pre-scraped scenes const sourceIsUrl = typeof source === 'string'; const url = sourceIsUrl ? source : source.url; const release = sourceIsUrl ? basicRelease : source; const site = await findSite(url, release); if (!site) { throw new Error('Could not find site in database'); } const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug]; if (!scraper) { throw new Error('Could not find scraper for URL'); } if (type === 'scene' && !scraper.fetchScene) { throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`); } if (type === 'movie' && !scraper.fetchMovie) { throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`); } if (!release) { logger.info(`Scraping release from ${url}`); } const scrapedRelease = type === 'scene' ? await scraper.fetchScene(url, site, release) : await scraper.fetchMovie(url, site, release); return { url, ...release, ...scrapedRelease, ...(scrapedRelease && release?.tags && { tags: release.tags.concat(scrapedRelease.tags), }), site, }; } async function scrapeReleases(sources, release = null, type = 'scene') { const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type), { concurrency: 5, }); const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type })); if (argv.scene && argv.inspect) { // only show when fetching from URL console.log(curatedReleases); } if (argv.save) { /* const movie = scrapedRelease.movie ? await scrapeRelease(scrapedRelease.movie, null, false, 'movie') : null; if (movie) { const { releases: [storedMovie] } = await storeReleases([movie]); curatedRelease.parentId = storedMovie.id; } */ const { releases: storedReleases } = await storeReleases(curatedReleases); if (storedReleases) { logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join('')); } } } module.exports = { scrapeRelease, scrapeReleases, };