'use strict'; const config = require('config'); const Promise = require('bluebird'); const moment = require('moment'); const logger = require('./logger'); const knex = require('./knex'); const argv = require('./argv'); const whereOr = require('./utils/where-or'); const { associateTags } = require('./tags'); const { associateActors, scrapeBasicActors } = require('./actors'); const { createMediaDirectory, storePhotos, // storeReleasePhotos, storeTrailer, } = require('./media'); const { fetchSites, findSiteByUrl } = require('./sites'); const slugify = require('./utils/slugify'); function commonQuery(queryBuilder, { filter = [], after = new Date(0), // January 1970 before = new Date(2 ** 44), // May 2109 limit = 100, }) { const finalFilter = [].concat(filter); // ensure filter is array queryBuilder .leftJoin('sites', 'releases.site_id', 'sites.id') .leftJoin('studios', 'releases.studio_id', 'studios.id') .leftJoin('networks', 'sites.network_id', 'networks.id') .select( 'releases.*', 'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters', 'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', ) .whereNotExists((builder) => { // apply tag filters builder .select('*') .from('tags_associated') .leftJoin('tags', 'tags_associated.tag_id', 'tags.id') .whereIn('tags.slug', finalFilter) .where('tags_associated.domain', 'releases') .whereRaw('tags_associated.target_id = releases.id'); }) .andWhere('releases.date', '>', after) .andWhere('releases.date', '<=', before) .orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }]) .limit(limit); } async function curateRelease(release) { const [actors, tags, media] = await Promise.all([ knex('actors_associated') .select( 'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate', 'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias', 'media.thumbnail as avatar', ) .where({ release_id: release.id }) .leftJoin('actors', 'actors.id', 'actors_associated.actor_id') .leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2') .leftJoin('media', (builder) => { builder .on('media.target_id', 'actors.id') .andOnVal('media.domain', 'actors') .andOnVal('media.index', '0'); }) .orderBy('actors.gender'), knex('tags_associated') .select('tags.name', 'tags.slug') .where({ domain: 'releases', target_id: release.id, }) .leftJoin('tags', 'tags.id', 'tags_associated.tag_id') .orderBy('tags.priority', 'desc'), knex('media') .where({ target_id: release.id, domain: 'releases', }) .orderBy(['role', 'index']), ]); const curatedRelease = { id: release.id, type: release.type, title: release.title, date: release.date, dateAdded: release.created_at, description: release.description, url: release.url, shootId: release.shoot_id, entryId: release.entry_id, actors: actors.map(actor => ({ id: actor.id, slug: actor.slug, name: actor.name, gender: actor.gender, birthdate: actor.birthdate, age: moment().diff(actor.birthdate, 'years'), ageThen: moment(release.date).diff(actor.birthdate, 'years'), avatar: actor.avatar, origin: actor.birth_country_alpha2 ? { country: { name: actor.birth_country_alias, alpha2: actor.birth_country_alpha2, }, } : null, })), director: release.director, tags, duration: release.duration, photos: media.filter(item => item.role === 'photo'), poster: media.filter(item => item.role === 'poster')[0], covers: media.filter(item => item.role === 'cover'), trailer: media.filter(item => item.role === 'trailer')[0], site: { id: release.site_id, name: release.site_name, independent: release.site_parameters ? (JSON.parse(release.site_parameters).independent || false) : false, slug: release.site_slug, url: release.site_url, }, studio: release.studio_id ? { id: release.studio_id, name: release.studio_name, slug: release.studio_slug, url: release.studio_url, } : null, network: { id: release.network_id, name: release.network_name, description: release.network_description, slug: release.network_slug, url: release.network_url, }, }; return curatedRelease; } function curateReleases(releases) { return Promise.all(releases.map(async release => curateRelease(release))); } async function attachChannelSite(release) { if (!release.site.isFallback) { return release; } if (!release.channel) { throw new Error(`Unable to derive channel site from generic URL: ${release.url}`); } const [site] = await fetchSites({ name: release.channel, slug: release.channel, }); if (site) { return { ...release, site, }; } try { const urlSite = await findSiteByUrl(release.channel); return { ...release, site: urlSite, }; } catch (error) { throw new Error(`Unable to derive channel site from generic URL: ${release.url}`); } } async function attachStudio(release) { if (!release.studio) { return release; } const studio = await knex('studios') .where('name', release.studio) .orWhere('slug', release.studio) .orWhere('url', release.studio) .first(); return { ...release, studio, }; } async function curateReleaseEntry(release) { const slug = slugify(release.title, { encode: true, limit: config.titleSlugLength, }); const curatedRelease = { site_id: release.site.id, studio_id: release.studio ? release.studio.id : null, shoot_id: release.shootId || null, entry_id: release.entryId || null, parent_id: release.parentId, type: release.type, url: release.url, title: release.title, slug, date: release.date, description: release.description, // director: release.director, duration: release.duration, // likes: release.rating && release.rating.likes, // dislikes: release.rating && release.rating.dislikes, // rating: release.rating && release.rating.stars && Math.floor(release.rating.stars), deep: typeof release.deep === 'boolean' ? release.deep : false, }; return curatedRelease; } async function fetchReleases(queryObject = {}, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .andWhere(builder => whereOr(queryObject, 'releases', builder)); return curateReleases(releases); } async function fetchSiteReleases(queryObject, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'sites', builder)); return curateReleases(releases); } async function fetchNetworkReleases(queryObject, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'networks', builder)); return curateReleases(releases); } async function fetchActorReleases(queryObject, options = {}) { const releases = await knex('actors_associated') .leftJoin('releases', 'actors_associated.release_id', 'releases.id') .leftJoin('actors', 'actors_associated.actor_id', 'actors.id') .select( 'actors.name as actor_name', ) .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'actors', builder)); return curateReleases(releases); } async function fetchTagReleases(queryObject, options = {}) { const releases = await knex('tags_associated') .leftJoin('releases', 'tags_associated.target_id', 'releases.id') .leftJoin('tags', 'tags_associated.tag_id', 'tags.id') .select( 'tags.name as tag_name', ) .modify(commonQuery, options) .where('tags_associated.domain', 'releases') .where(builder => whereOr(queryObject, 'tags', builder)); return curateReleases(releases); } function accumulateActors(releases) { return releases.reduce((acc, release) => { if (!Array.isArray(release.actors)) return acc; release.actors.forEach((actor) => { const actorName = actor.name ? actor.name.trim() : actor.trim(); if (!acc[actorName]) acc[actorName] = []; acc[actorName].push(release.id); }); return acc; }, {}); } function accumulateMovies(releases) { return releases.reduce((acc, release) => { if (release.movie) { if (acc[release.movie]) { acc[release.movie] = acc[release.movie].concat(release.id); return acc; } acc[release.movie] = [release.id]; } return acc; }, {}); } async function storeReleaseAssets(releases) { // await storeReleasePhotos(releases); await Promise.map(releases, async (release) => { const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`; const identifier = `"${release.title}" (${release.id})`; try { await createMediaDirectory('releases', subpath); // don't use Promise.all to prevent concurrency issues with duplicate detection if (release.poster) { await storePhotos([release.poster], { role: 'poster', targetId: release.id, subpath, }, identifier); } await storePhotos(release.photos, { targetId: release.id, subpath, primaryRole: release.poster ? null : 'poster', }, identifier); await storePhotos(release.covers, { role: 'cover', targetId: release.id, subpath, }, identifier); await storeTrailer(release.trailer, { targetId: release.id, subpath, role: 'trailer', }, identifier); await storeTrailer(release.teaser, { targetId: release.id, subpath, role: 'teaser', }, identifier); } catch (error) { logger.error(error.message); } }, { concurrency: 10, }); } async function storeRelease(release) { const existingRelease = await knex('releases') .where({ entry_id: release.entryId, site_id: release.site.id, }) .first(); const curatedRelease = await curateReleaseEntry(release); if (existingRelease && !argv.redownload) { return existingRelease.id; } if (existingRelease && argv.redownload) { const [updatedRelease] = await knex('releases') .where('id', existingRelease.id) .update({ ...existingRelease, ...curatedRelease, }) .returning('*'); if (updatedRelease) { await associateTags(release, updatedRelease.id); logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`); } await associateTags(release, existingRelease.id); return existingRelease.id; } const [releaseEntry] = await knex('releases') .insert(curatedRelease) .returning('*'); await associateTags(release, releaseEntry.id); logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`); return releaseEntry; } async function storeReleases(releases) { const storedReleases = await Promise.map(releases, async (release) => { try { const releaseWithChannelSite = await attachChannelSite(release); const releaseWithStudio = await attachStudio(releaseWithChannelSite); const { id, slug } = await storeRelease(releaseWithStudio); return { id, slug, ...releaseWithChannelSite, }; } catch (error) { logger.error(error.message); return null; } }, { concurrency: 10, }).filter(release => release); const actors = accumulateActors(storedReleases); const movies = accumulateMovies(storedReleases); await Promise.all([ associateActors(actors, storedReleases), storeReleaseAssets(storedReleases), ]); if (argv.withProfiles && Object.keys(actors).length > 0) { await scrapeBasicActors(); } return { releases: storedReleases, actors, movies, }; } module.exports = { fetchReleases, fetchActorReleases, fetchSiteReleases, fetchNetworkReleases, fetchTagReleases, storeRelease, storeReleases, };