'use strict'; const config = require('config'); const moment = require('moment'); const argv = require('./argv'); const knex = require('./knex'); const scrapers = require('./scrapers'); function destructConfigNetworks(networks) { return networks.reduce((acc, network) => { if (Array.isArray(network)) { // network specifies sites return { ...acc, sites: [...acc.sites, ...network[1]], }; } return { ...acc, networks: [...acc.networks, network], }; }, { networks: [], sites: [], }); } function curateSites(sites) { return sites.map(site => ({ id: site.id, name: site.name, description: site.description, url: site.url, network: { id: site.network_id, name: site.network_name, }, parameters: JSON.parse(site.parameters), })); } async function accumulateIncludedSites() { if (argv.networks || argv.sites) { const rawSites = await knex('sites') .select('sites.*', 'networks.name as network_name') .whereIn('sites.id', argv.sites || []) .orWhereIn('network_id', argv.networks || []) .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } const included = destructConfigNetworks(config.include); const rawSites = await knex('sites') .select('sites.*', 'networks.name as network_name') .whereIn('sites.id', included.sites) .orWhereIn('network_id', included.networks) .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } async function findDuplicateReleases(latestReleases, _siteId) { const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined); const latestReleasesEntryIds = latestReleases.map(release => release.entryId).filter(release => release !== undefined); return knex('releases') .whereIn('shoot_id', latestReleasesShootIds) .orWhereIn('entry_id', latestReleasesEntryIds); } async function storeReleases(releases) { const curatedReleases = releases.map(release => ({ site_id: release.site.id, shoot_id: release.shootId || null, entry_id: release.entryId || null, url: release.url, title: release.title, date: release.date, description: release.description, director: release.director, duration: release.duration, likes: release.rating && release.rating.likes, dislikes: release.rating && release.rating.dislikes, rating: release.rating && release.rating.stars, })); if (curatedReleases.length) { console.log(`Saving ${curatedReleases.length} new releases to database`); const insertQuery = knex('releases').insert(curatedReleases).toString(); await knex.raw(insertQuery.replace('insert', 'INSERT OR IGNORE')); return curatedReleases; } return []; } async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) { const latestReleases = await scraper.fetchLatest(site, page); if (latestReleases.length === 0) { return []; } const duplicateReleases = await findDuplicateReleases(latestReleases, site.id); const duplicateReleasesIds = new Set( duplicateReleases .map(release => release.shoot_id || release.entry_id) // exclude accumulated releases to prevent an infinite loop if the next page contains the same releases as the previous .concat(duplicateReleases.map(release => release.shoot_id || release.entry_id)) .concat(accReleases.map(release => release.shootId || release.entryId)), ); const uniqueReleases = latestReleases.filter(release => !duplicateReleasesIds.has(String(release.shootId)) && !duplicateReleasesIds.has(String(release.entryId)) && moment(release.date).isAfter(afterDate)); console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`); const oldestReleaseOnPage = latestReleases.slice(-1)[0].date; if (uniqueReleases.length > 0 && moment(oldestReleaseOnPage).isAfter(afterDate)) { return fetchNewReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1); } return accReleases.concat(uniqueReleases); } async function fetchReleases() { const sites = await accumulateIncludedSites(); const scenesPerSite = await Promise.all(sites.map(async (site) => { const scraper = scrapers[site.id] || scrapers[site.network.id]; if (scraper) { try { const afterDate = moment.utc().subtract(...argv.after.split(' ')).toDate(); const [newReleases, upcomingReleases] = await Promise.all([ fetchNewReleases(scraper, site, afterDate), scraper.fetchUpcoming ? await scraper.fetchUpcoming(site) : [], ]); console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`); if (argv.save) { await storeReleases(newReleases); } return [ ...newReleases.map(release => ({ ...release, network: site.network, })), ...upcomingReleases.map(release => ({ ...release, network: site.network, upcoming: true, })), ]; } catch (error) { if (argv.debug) { console.error(`${site.id}: Failed to fetch releases`, error); return []; } console.log(`${site.id}: Failed to fetch releases`); return []; } } return []; })); knex.destroy(); const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []); const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA)); return sortedScenes; } module.exports = fetchReleases;