'use strict'; const config = require('config'); const Promise = require('bluebird'); const logger = require('./logger')(__filename); const argv = require('./argv'); const scrapers = require('./scrapers/scrapers'); const { findSiteByUrl } = require('./sites'); const { findNetworkByUrl } = require('./networks'); const { storeReleases } = require('./releases'); async function findSite(url, release) { if (release?.site) return release.site; if (!url) return null; const site = await findSiteByUrl(url); if (site) { return site; } const network = await findNetworkByUrl(url); if (network) { return { ...network, network, isFallback: true, }; } return null; } async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) { // profile scraper may return either URLs or pre-scraped scenes const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined; const url = sourceIsUrlOrEmpty ? source : source?.url; const release = sourceIsUrlOrEmpty ? basicRelease : source; const site = basicRelease?.site || await findSite(url, release); if (!site) { throw new Error(`Could not find site for ${url} in database`); } if (!argv.deep && release) { return { ...release, site, }; } const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug]; if (!scraper) { throw new Error(`Could not find scraper for ${url}`); } if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) { if (release) { logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`); return null; } throw new Error(`The '${site.name}'-scraper cannot fetch individual ${type}s`); } if (!release) { logger.info(`Scraping release from ${url}`); } const scrapedRelease = type === 'scene' ? await scraper.fetchScene(url, site, release, preflight) : await scraper.fetchMovie(url, site, release, preflight); return { ...release, ...scrapedRelease, ...(scrapedRelease && release?.tags && { tags: release.tags.concat(scrapedRelease.tags), }), site, }; } async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) { const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), { concurrency: 5, }).filter(Boolean); const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type })); if ((argv.scene || argv.movie) && argv.inspect) { // only show when fetching from URL } if (argv.save) { /* const movie = scrapedRelease.movie ? await scrapeRelease(scrapedRelease.movie, null, false, 'movie') : null; if (movie) { const { releases: [storedMovie] } = await storeReleases([movie]); curatedRelease.parentId = storedMovie.id; } */ const { releases: storedReleases } = await storeReleases(curatedReleases); const movieScenes = storedReleases.map(movie => movie.scenes).flat(); // console.log(movieScenes); if (storedReleases) { logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join('')); } } } async function deepFetchReleases(baseReleases, beforeFetchLatest) { const deepReleases = await Promise.map(baseReleases, async (release) => { if (release.url || (release.path && release.site)) { try { const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest); if (fullRelease) { return { ...release, ...fullRelease, deep: true, }; } logger.warn(`Release scraper returned empty result for ${release.url}`); return release; } catch (error) { logger.error(`Failed to scrape ${release.url}: ${error}`); return { ...release, deep: false, }; } } return release; }, { concurrency: 2, }); // console.log(deepReleases); return deepReleases; } module.exports = { deepFetchReleases, scrapeRelease, scrapeReleases, };