'use strict'; const config = require('config'); const Promise = require('bluebird'); const logger = require('./logger')(__filename); const argv = require('./argv'); const include = require('./utils/argv-include')(argv); const knex = require('./knex'); const scrapers = require('./scrapers/scrapers'); const { findSiteByUrl } = require('./sites'); const { findNetworkByUrl } = require('./networks'); const { storeReleases } = require('./releases'); async function findSite(url, release) { if (release?.site) return release.site; if (!url) return null; const site = await findSiteByUrl(url); if (site) { return site; } const network = await findNetworkByUrl(url); if (network) { return { ...network, network, isFallback: true, }; } return null; } async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) { // profile scraper may return either URLs or pre-scraped scenes const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined; const url = sourceIsUrlOrEmpty ? source : source?.url; const release = sourceIsUrlOrEmpty ? basicRelease : source; const site = basicRelease?.site || await findSite(url, release); if (!site) { throw new Error(`Could not find site for ${url} in database`); } if (!argv.deep && release) { return { ...release, site, }; } const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug]; if (!scraper) { throw new Error(`Could not find scraper for ${url}`); } if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) { if (release) { logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`); return null; } throw new Error(`The '${site.name}'-scraper cannot fetch individual ${type}s`); } if (!release) { logger.info(`Scraping release from ${url}`); } const scrapedRelease = type === 'scene' ? await scraper.fetchScene(url, site, release, beforeFetchLatest, include) : await scraper.fetchMovie(url, site, release, beforeFetchLatest, include); return { ...release, ...scrapedRelease, ...(scrapedRelease && release?.tags && { tags: release.tags.concat(scrapedRelease.tags), }), site, }; } async function accumulateMovies(releases) { if (!argv.withMovies) return []; const moviesByUrl = releases.reduce((acc, release) => { if (!release.movie) return acc; const movie = release.movie.url ? release.movie : { url: release.movie }; if (!acc[movie.url]) { acc[movie.url] = { ...movie, type: 'movie', sceneIds: [], }; } acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id); return acc; }, {}); const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie')); const { releases: storedMovies } = await storeReleases(movies); const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({ movie_id: movie.id, scene_id: sceneId, }))), []); await knex('releases_movies').insert(movieAssociations); // console.log(moviesByUrl); return movies; } async function scrapeReleases(sources, type = 'scene') { const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), { concurrency: 5, }).filter(Boolean); const curatedReleases = scrapedReleases.map(scrapedRelease => ({ ...scrapedRelease, type })); if ((argv.scene || argv.movie) && argv.inspect) { // only show when fetching from URL } if (argv.save) { const { releases: storedReleases } = await storeReleases(curatedReleases); await accumulateMovies(storedReleases); if (storedReleases) { logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join('')); } return storedReleases; } return curatedReleases; } async function scrapeScenes(sources) { return scrapeReleases(sources, 'scene'); } async function scrapeMovies(sources) { return scrapeReleases(sources, 'movie'); } async function deepFetchReleases(baseReleases, beforeFetchLatest) { const deepReleases = await Promise.map(baseReleases, async (release) => { if (release.url || (release.path && release.site)) { try { const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest); if (fullRelease) { return { ...release, ...fullRelease, deep: true, }; } logger.warn(`Release scraper returned empty result for ${release.url}`); return release; } catch (error) { logger.error(`Failed to scrape ${release.url}: ${error}`); return { ...release, deep: false, }; } } return release; }, { concurrency: 2, }); return deepReleases; } module.exports = { deepFetchReleases, scrapeMovies, scrapeRelease, scrapeReleases, scrapeScenes, };