'use strict'; const config = require('config'); const fs = require('fs-extra'); const path = require('path'); const Promise = require('bluebird'); const moment = require('moment'); const mime = require('mime'); const bhttp = require('bhttp'); const argv = require('./argv'); const knex = require('./knex'); const scrapers = require('./scrapers'); const fetchScene = require('./fetch-scene'); function destructConfigNetworks(networks) { return networks.reduce((acc, network) => { if (Array.isArray(network)) { // network specifies sites return { ...acc, sites: [...acc.sites, ...network[1]], }; } return { ...acc, networks: [...acc.networks, network], }; }, { networks: [], sites: [], }); } function curateSites(sites) { return sites.map(site => ({ id: site.id, name: site.name, slug: site.slug, description: site.description, url: site.url, network: { id: site.network_id, name: site.network_name, slug: site.network_slug, parameters: JSON.parse(site.network_parameters), }, parameters: JSON.parse(site.parameters), })); } async function accumulateIncludedSites() { if (argv.networks || argv.sites) { const networks = await knex('networks').select('id').whereIn('slug', argv.networks || []); const networkIds = networks.map(network => network.id); const rawSites = await knex('sites') .select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters') .whereIn('sites.slug', argv.sites || []) .orWhereIn('network_id', networkIds) .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } const included = destructConfigNetworks(config.include); const networks = await knex('networks').select('id').whereIn('slug', included.networks || []); const networkIds = networks.map(network => network.id); const rawSites = await knex('sites') .select('sites.*', 'networks.name as network_name') .whereIn('sites.slug', included.sites || []) .orWhereIn('network_id', networkIds) .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } async function findDuplicateReleases(latestReleases, _siteId) { const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined); const latestReleasesEntryIds = latestReleases.map(release => release.entryId).filter(release => release !== undefined); return knex('releases') .whereIn('shoot_id', latestReleasesShootIds) .orWhereIn('entry_id', latestReleasesEntryIds); } async function storeActors(release, releaseEntry) { const actors = await knex('actors').whereIn('name', release.actors); const newActors = release.actors.filter(actorName => !actors.some(actor => actor.name === actorName)); const { rows: insertedActors } = newActors.length ? await knex.raw(`${knex('actors').insert(newActors.map(actorName => ({ name: actorName, slug: actorName.toLowerCase().replace(/\s+/g, '-'), })))} ON CONFLICT DO NOTHING RETURNING *`) : { rows: [] }; return knex('actors_associated').insert(actors.concat(insertedActors).map(actor => ({ release_id: releaseEntry.id, actor_id: actor.id, })), '*'); } async function storeTags(release, releaseEntry) { return knex('tags_associated').insert(release.tags.map(tagId => ({ tag_id: tagId, release_id: releaseEntry.id, }))); } async function storePhotos(release, releaseEntry) { console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); const files = await Promise.map(release.photos, async (photoUrl, index) => { const { pathname } = new URL(photoUrl); const mimetype = mime.getType(pathname); const res = await bhttp.get(photoUrl); if (res.statusCode === 200) { const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `${index + 1}.${mime.getExtension(mimetype)}`); await fs.writeFile(path.join(config.photoPath, filepath), res.body); return { filepath, mimetype, }; } console.warn(`Failed to store photo ${index + 1} for (${release.site.name}, ${releaseEntry.id}) "${release.title}": ${res.statusCode}`); return null; }, { concurrency: 2, }); await knex('media').insert(files.filter(file => file).map(({ filepath, mimetype }, index) => ({ path: filepath, mime: mimetype, index, domain: 'releases', target_id: releaseEntry.id, role: 'photo', }))); } async function storePoster(release, releaseEntry) { console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); const res = await bhttp.get(release.poster); if (res.statusCode === 200) { const { pathname } = new URL(release.poster); const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg'; const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`); await fs.writeFile(path.join(config.photoPath, filepath), res.body); await knex('media').insert({ path: filepath, mime: mimetype, domain: 'releases', target_id: releaseEntry.id, role: 'poster', }); return; } console.warn(`Failed to store poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}": ${res.statusCode}`); } async function storeTrailer(release, releaseEntry) { console.log(`Storing trailer for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); const { pathname } = new URL(release.trailer.src); const mimetype = release.trailer.type || mime.getType(pathname); const res = await bhttp.get(release.trailer.src); const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); await fs.writeFile(path.join(config.photoPath, filepath), res.body); await knex('media').insert({ path: filepath, mime: mimetype, domain: 'releases', target_id: releaseEntry.id, role: 'trailer', quality: release.trailer.quality || null, }); } async function storeReleases(releases = []) { return Promise.map(releases, async (release) => { const curatedRelease = { site_id: release.site.id, studio_id: release.studio ? release.studio.id : null, shoot_id: release.shootId || null, entry_id: release.entryId || null, url: release.url, title: release.title, date: release.date, description: release.description, // director: release.director, duration: release.duration, likes: release.rating && release.rating.likes, dislikes: release.rating && release.rating.dislikes, rating: release.rating && release.rating.stars && Math.floor(release.rating.stars), deep: argv.deep, }; /* const releaseQuery = `${knex('releases').insert(curatedRelease).toString()} ON CONFLICT DO NOTHING RETURNING *`; const releaseEntry = await knex.raw(releaseQuery); */ const releaseEntries = await knex('releases') .insert(curatedRelease) .returning('*'); if (releaseEntries.length) { const releaseEntry = releaseEntries[0]; console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); if (release.poster || (release.photos && release.photos.length)) { await fs.mkdir(path.join(config.photoPath, release.site.slug, releaseEntry.id.toString()), { recursive: true }); } await Promise.all([ release.actors && release.actors.length > 0 ? storeActors(release, releaseEntry) : Promise.resolve(), release.tags && release.tags.length > 0 ? storeTags(release, releaseEntry) : Promise.resolve(), release.photos && release.photos.length > 0 ? storePhotos(release, releaseEntry) : Promise.resolve(), release.poster ? storePoster(release, releaseEntry) : Promise.resolve(), release.trailer ? storeTrailer(release, releaseEntry) : Promise.resolve(), ]); return; } console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`); }, { concurrency: 2, }); } async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) { const latestReleases = await scraper.fetchLatest(site, page); if (latestReleases.length === 0) { return []; } const duplicateReleases = await findDuplicateReleases(latestReleases, site.id); const duplicateReleasesIds = new Set( duplicateReleases .map(release => release.shoot_id || release.entry_id) .concat(duplicateReleases.map(release => release.entry_id || release.shoot_id)) // exclude accumulated releases to prevent an infinite loop if the next page contains the same releases as the previous .concat(accReleases.map(release => release.shootId || release.entryId)), ); const uniqueReleases = latestReleases.filter(release => !duplicateReleasesIds.has(String(release.shootId)) && !duplicateReleasesIds.has(String(release.entryId)) && moment(release.date).isAfter(afterDate)); console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`); const oldestReleaseOnPage = latestReleases.slice(-1)[0].date; if (uniqueReleases.length > 0 && moment(oldestReleaseOnPage).isAfter(afterDate) && (oldestReleaseOnPage || page < argv.pages)) { return fetchNewReleases(scraper, site, afterDate, accReleases.concat(uniqueReleases), page + 1); } return accReleases.concat(uniqueReleases); } async function fetchReleases() { const sites = await accumulateIncludedSites(); if (sites.length === 0) { console.error('None of the specified sites are in the database'); return []; } const scenesPerSite = await Promise.map(sites, async (site) => { const scraper = scrapers[site.slug] || scrapers[site.network.slug]; if (scraper) { try { const afterDate = moment.utc().subtract(...argv.after.split(' ')).toDate(); const [newReleases, upcomingReleases] = await Promise.all([ fetchNewReleases(scraper, site, afterDate), scraper.fetchUpcoming ? await scraper.fetchUpcoming(site) : [], ]); console.log(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`); const finalReleases = argv.deep ? await Promise.map(newReleases, async (release) => { if (release.url) { const scene = await fetchScene(release.url, release); return { ...release, ...scene, }; } return release; }, { concurrency: 2, }) : newReleases; if (argv.save) { await storeReleases(finalReleases); } return [ ...finalReleases.map(release => ({ ...release, network: site.network, })), ...upcomingReleases.map(release => ({ ...release, network: site.network, upcoming: true, })), ]; } catch (error) { if (argv.debug) { console.error(`${site.id}: Failed to fetch releases`, error); return []; } console.log(`${site.id}: Failed to fetch releases`); return []; } } console.error(`Cound not find scraper for '${site.name}' (${site.slug})`); return []; }, { concurrency: 2, }); const accumulatedScenes = scenesPerSite.reduce((acc, siteScenes) => ([...acc, ...siteScenes]), []); const sortedScenes = accumulatedScenes.sort(({ date: dateA }, { date: dateB }) => moment(dateB).diff(dateA)); knex.destroy(); return sortedScenes; } module.exports = fetchReleases;