'use strict'; const config = require('config'); const moment = require('moment'); const argv = require('./argv'); const knex = require('./knex'); const scrapers = require('./scrapers'); function destructConfigNetworks(networks) { return networks.reduce((acc, network) => { if (Array.isArray(network)) { // network specifies sites return { ...acc, sites: [...acc.sites, ...network[1]], }; } return { ...acc, networks: [...acc.networks, network], }; }, { networks: [], sites: [], }); } function curateSites(sites) { return sites.map(site => ({ id: site.id, name: site.name, description: site.description, url: site.url, networkId: site.network_id, parameters: JSON.parse(site.parameters), })); } async function accumulateIncludedSites() { if (argv.networks || argv.sites) { const rawSites = await knex('sites') .whereIn('id', argv.sites || []) .orWhereIn('network_id', argv.networks || []); return curateSites(rawSites); } const included = destructConfigNetworks(config.include); const rawSites = await knex('sites') .whereIn('id', included.sites) .orWhereIn('network_id', included.networks); return curateSites(rawSites); } async function findDuplicateReleases(latestReleases) { const latestReleasesIds = latestReleases.map(release => release.shootId); return knex('releases') .whereIn('shoot_id', latestReleasesIds); } async function storeReleases(releases) { const curatedReleases = releases.map(release => ({ site_id: release.site.id, shoot_id: release.shootId || null, url: release.url, title: release.title, date: release.date, description: release.description, director: release.director, duration: release.duration, likes: release.rating && release.rating.likes, dislikes: release.rating && release.rating.dislikes, rating: release.rating && release.rating.stars, })); if (curatedReleases.length) { console.log(`Saving ${curatedReleases.length} new releases to database`); const insertQuery = knex('releases').insert(curatedReleases).toString(); await knex.raw(insertQuery.replace('insert', 'INSERT OR IGNORE')); return curatedReleases; } return []; } async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) { const latestReleases = await scraper.fetchLatest(site, page); const duplicateReleases = await findDuplicateReleases(latestReleases); const duplicateReleasesShootIds = new Set( duplicateReleases .map(release => release.shoot_id) // exclude accumulated releases to prevent an infinite loop if the next page contains the same releases as the previous .concat(accReleases.map(release => release.shootId)), ); const uniqueReleases = latestReleases.filter(release => !duplicateReleasesShootIds.has(String(release.shootId)) && moment(release.date).isAfter(afterDate)); console.log(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique releases`); const oldestReleaseOnPage = latestReleases.slice(-1)[0].date; if (uniqueReleases.length > 0 && moment(oldestReleaseOnPage).isAfter(afterDate)) { return fetchNewReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1); } return accReleases.concat(uniqueReleases); } async function fetchReleases() { const sites = await accumulateIncludedSites(); // const releases = await getExistingReleases(); const scenesPerSite = await Promise.all(sites.map(async (site) => { const scraper = scrapers[site.id] || scrapers[site.networkId]; if (scraper) { try { const afterDate = moment.utc().subtract(...argv.after.split(' ')).toDate(); const [newReleases, upcomingReleases] = await Promise.all([ fetchNewReleases(scraper, site, afterDate), scraper.fetchUpcoming ? await scraper.fetchUpcoming(site) : [], ]); console.log(`${site.name}: Found ${newReleases.length} new releases, ${upcomingReleases.length} upcoming releases`); if (argv.save) { await storeReleases(newReleases); } return [...newReleases, ...upcomingReleases.map(release => ({ ...release, upcoming: true }))]; } catch (error) { if (argv.debug) { console.error(`${site.id}: Failed to fetch releases`, error); return []; } console.log(`${site.id}: Failed to fetch releases`); return []; } } return []; })); const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []); const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA)); return sortedScenes; } module.exports = fetchReleases;