'use strict'; const config = require('config'); const Promise = require('bluebird'); const moment = require('moment'); const logger = require('./logger')(__filename); const knex = require('./knex'); const argv = require('./argv'); const whereOr = require('./utils/where-or'); const { associateTags } = require('./tags'); const { associateActors, scrapeBasicActors } = require('./actors'); const { pluckItems, storeMedia, associateMedia, } = require('./media'); const { fetchSites, findSiteByUrl } = require('./sites'); const slugify = require('./utils/slugify'); const capitalize = require('./utils/capitalize'); function commonQuery(queryBuilder, { filter = [], after = new Date(0), // January 1970 before = new Date(2 ** 44), // May 2109 limit = 100, }) { const finalFilter = [].concat(filter); // ensure filter is array queryBuilder .leftJoin('sites', 'releases.site_id', 'sites.id') .leftJoin('studios', 'releases.studio_id', 'studios.id') .leftJoin('networks', 'sites.network_id', 'networks.id') .select( 'releases.*', 'sites.name as site_name', 'sites.slug as site_slug', 'sites.url as site_url', 'sites.network_id', 'sites.parameters as site_parameters', 'studios.name as studio_name', 'sites.slug as site_slug', 'studios.url as studio_url', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', ) .whereNotExists((builder) => { // apply tag filters builder .select('*') .from('tags_associated') .leftJoin('tags', 'tags_associated.tag_id', 'tags.id') .whereIn('tags.slug', finalFilter) .where('tags_associated.domain', 'releases') .whereRaw('tags_associated.target_id = releases.id'); }) .andWhere('releases.date', '>', after) .andWhere('releases.date', '<=', before) .orderBy([{ column: 'date', order: 'desc' }, { column: 'created_at', order: 'desc' }]) .limit(limit); } async function curateRelease(release) { const [actors, tags, media] = await Promise.all([ knex('actors_associated') .select( 'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate', 'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias', 'media.thumbnail as avatar', ) .where({ release_id: release.id }) .leftJoin('actors', 'actors.id', 'actors_associated.actor_id') .leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2') .leftJoin('media', (builder) => { builder .on('media.target_id', 'actors.id') .andOnVal('media.domain', 'actors') .andOnVal('media.index', '0'); }) .orderBy('actors.gender'), knex('tags_associated') .select('tags.name', 'tags.slug') .where({ domain: 'releases', target_id: release.id, }) .leftJoin('tags', 'tags.id', 'tags_associated.tag_id') .orderBy('tags.priority', 'desc'), knex('media') .where({ target_id: release.id, domain: 'releases', }) .orderBy(['role', 'index']), ]); const curatedRelease = { id: release.id, type: release.type, title: release.title, date: release.date, dateAdded: release.created_at, description: release.description, url: release.url, shootId: release.shoot_id, entryId: release.entry_id, actors: actors.map(actor => ({ id: actor.id, slug: actor.slug, name: actor.name, gender: actor.gender, birthdate: actor.birthdate, age: moment().diff(actor.birthdate, 'years'), ageThen: moment(release.date).diff(actor.birthdate, 'years'), avatar: actor.avatar, origin: actor.birth_country_alpha2 ? { country: { name: actor.birth_country_alias, alpha2: actor.birth_country_alpha2, }, } : null, })), director: release.director, tags, duration: release.duration, photos: media.filter(item => item.role === 'photo'), poster: media.filter(item => item.role === 'poster')[0], covers: media.filter(item => item.role === 'cover'), trailer: media.filter(item => item.role === 'trailer')[0], site: { id: release.site_id, name: release.site_name, independent: !!release.site_parameters?.independent, slug: release.site_slug, url: release.site_url, }, studio: release.studio_id ? { id: release.studio_id, name: release.studio_name, slug: release.studio_slug, url: release.studio_url, } : null, network: { id: release.network_id, name: release.network_name, description: release.network_description, slug: release.network_slug, url: release.network_url, }, }; return curatedRelease; } function curateReleases(releases) { return Promise.all(releases.map(async release => curateRelease(release))); } async function attachChannelSite(release) { if (!release.site?.isFallback && !release.channel?.force) { return release; } if (!release.channel) { throw new Error(`Unable to derive channel site from generic URL: ${release.url}`); } const [site] = await fetchSites({ name: release.channel.name || release.channel, slug: release.channel.slug || release.channel, }); if (site) { return { ...release, site, }; } try { const urlSite = await findSiteByUrl(release.channel.url || release.channel); return { ...release, site: urlSite, }; } catch (error) { throw new Error(`Unable to derive channel site from generic URL: ${release.url}`); } } async function attachStudio(release) { if (!release.studio) { return release; } const studio = await knex('studios') .where('name', release.studio) .orWhere('slug', release.studio) .orWhere('url', release.studio) .first(); return { ...release, studio, }; } async function curateReleaseEntry(release, batchId, existingRelease) { const slug = slugify(release.title, { encode: true, limit: config.titleSlugLength, }); const curatedRelease = { site_id: release.site.id, studio_id: release.studio ? release.studio.id : null, shoot_id: release.shootId || null, entry_id: release.entryId || null, parent_id: release.parentId, type: release.type, url: release.url, title: release.title, slug, date: release.date, description: release.description, // director: release.director, duration: release.duration, // likes: release.rating && release.rating.likes, // dislikes: release.rating && release.rating.dislikes, // rating: release.rating && release.rating.stars && Math.floor(release.rating.stars), deep: typeof release.deep === 'boolean' ? release.deep : false, deep_url: release.deepUrl, updated_batch_id: batchId, ...(!existingRelease && { created_batch_id: batchId }), }; return curatedRelease; } async function fetchReleases(queryObject = {}, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .andWhere(builder => whereOr(queryObject, 'releases', builder)); return curateReleases(releases); } async function fetchSiteReleases(queryObject, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'sites', builder)); return curateReleases(releases); } async function fetchNetworkReleases(queryObject, options = {}) { const releases = await knex('releases') .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'networks', builder)); return curateReleases(releases); } async function fetchActorReleases(queryObject, options = {}) { const releases = await knex('actors_associated') .leftJoin('releases', 'actors_associated.release_id', 'releases.id') .leftJoin('actors', 'actors_associated.actor_id', 'actors.id') .select( 'actors.name as actor_name', ) .modify(commonQuery, options) .where(builder => whereOr(queryObject, 'actors', builder)); return curateReleases(releases); } async function fetchTagReleases(queryObject, options = {}) { const releases = await knex('tags_associated') .leftJoin('releases', 'tags_associated.target_id', 'releases.id') .leftJoin('tags', 'tags_associated.tag_id', 'tags.id') .select( 'tags.name as tag_name', ) .modify(commonQuery, options) .where('tags_associated.domain', 'releases') .where(builder => whereOr(queryObject, 'tags', builder)); return curateReleases(releases); } function accumulateActors(releases) { return releases.reduce((acc, release) => { if (!Array.isArray(release.actors)) return acc; release.actors.forEach((actor) => { const actorName = actor.name ? actor.name.trim() : actor.trim(); const actorSlug = slugify(actorName); if (!actorSlug) return; if (!acc[actorSlug]) { acc[actorSlug] = { name: actorName, slug: actorSlug, releaseIds: new Set(), avatars: [], }; } acc[actorSlug].releaseIds.add(release.id); if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info if (actor.avatar) { const avatar = Array.isArray(actor.avatar) ? actor.avatar.map(avatarX => ({ src: avatarX.src || avatarX, copyright: avatarX.copyright === undefined ? capitalize(release.site?.network?.name) : avatarX.copyright, })) : { src: actor.avatar.src || actor.avatar, copyright: actor.avatar.copyright === undefined ? capitalize(release.site?.network?.name) : actor.avatar.copyright, }; acc[actorSlug].avatars = acc[actorSlug].avatars.concat([avatar]); // don't flatten fallbacks } }); return acc; }, {}); } function accumulateMovies(releases) { return releases.reduce((acc, release) => { if (release.movie) { if (acc[release.movie]) { acc[release.movie] = acc[release.movie].concat(release.id); return acc; } acc[release.movie] = [release.id]; } return acc; }, {}); } async function storeReleaseAssets(releases) { if (!argv.media) { return; } const releasePostersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.poster] }), {}); const releaseCoversById = releases.reduce((acc, release) => ({ ...acc, [release.id]: release.covers }), {}); const releaseTrailersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.trailer] }), {}); const releaseTeasersById = releases.reduce((acc, release) => ({ ...acc, [release.id]: [release.teaser] }), {}); const releasePhotosById = releases.reduce((acc, release) => ({ ...acc, [release.id]: pluckItems(release.photos), }), {}); if (argv.images && argv.posters) { const posters = await storeMedia(Object.values(releasePostersById).flat(), 'release', 'poster'); if (posters) await associateMedia(releasePostersById, posters, 'release', 'poster'); } if (argv.images && argv.covers) { const covers = await storeMedia(Object.values(releaseCoversById).flat(), 'release', 'cover'); if (covers) await associateMedia(releaseCoversById, covers, 'release', 'cover'); } if (argv.images && argv.photos) { const photos = await storeMedia(Object.values(releasePhotosById).flat(), 'release', 'photo'); if (photos) await associateMedia(releasePhotosById, photos, 'release', 'photo'); } if (argv.videos && argv.trailers) { const trailers = await storeMedia(Object.values(releaseTrailersById).flat(), 'release', 'trailer'); if (trailers) await associateMedia(releaseTrailersById, trailers, 'release', 'trailer'); } if (argv.videos && argv.teasers) { const teasers = await storeMedia(Object.values(releaseTeasersById).flat(), 'release', 'teaser'); if (teasers) await associateMedia(releaseTeasersById, teasers, 'release', 'teaser'); } } async function updateReleasesSearch(releaseIds) { const documents = await knex.raw(` SELECT releases.id as release_id, to_tsvector( 'traxxx', releases.title || ' ' || sites.name || ' ' || sites.slug || ' ' || networks.name || ' ' || networks.slug || ' ' || coalesce(releases.shoot_id, '') || ' ' || EXTRACT(YEAR FROM releases.date) || ' ' || CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR) || ' ' || CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR) || ' ' || SUBSTRING(CAST(EXTRACT(YEAR FROM releases.date) AS VARCHAR) FROM 3 for 2) || ' ' || LPAD(CAST(EXTRACT(MONTH FROM releases.date) AS VARCHAR), 2, '0') || ' ' || LPAD(CAST(EXTRACT(DAY FROM releases.date) AS VARCHAR), 2, '0') || ' ' || string_agg(coalesce(actors.name, ''), ' ') || ' ' || string_agg(coalesce(tags.name, ''), ' ') || ' ' || string_agg(coalesce(tags_aliases.name, ''), ' ') ) as document FROM releases LEFT JOIN sites ON releases.site_id = sites.id LEFT JOIN networks ON sites.network_id = networks.id LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for WHERE releases.id = ANY(?) GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; `, [releaseIds]); if (documents.rows?.length > 0) { const query = knex('releases_search').insert(documents.rows).toString(); await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`); } } async function storeRelease(release, batchId) { if (!release.entryId) { logger.warn(`Missing entry ID, unable to store ${release.url}`); return null; } const existingRelease = await knex('releases') .where({ entry_id: release.entryId, site_id: release.site.id, }) .first(); const curatedRelease = await curateReleaseEntry(release, batchId, existingRelease); if (existingRelease && !argv.redownload) { return existingRelease; } if (existingRelease && argv.redownload) { const [updatedRelease] = await knex('releases') .where('id', existingRelease.id) .update({ ...existingRelease, ...curatedRelease, }) .returning('*'); if (updatedRelease) { await associateTags(release, updatedRelease.id); logger.info(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`); } await associateTags(release, existingRelease.id); return existingRelease; } const [releaseEntry] = await knex('releases') .insert(curatedRelease) .returning('*'); await associateTags(release, releaseEntry.id); logger.info(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`); return releaseEntry; } async function storeReleases(releases) { const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); const storedReleases = await Promise.map(releases, async (release) => { try { const releaseWithChannelSite = await attachChannelSite(release); const releaseWithStudio = await attachStudio(releaseWithChannelSite); const storedRelease = await storeRelease(releaseWithStudio, batchId); return storedRelease && { id: storedRelease.id, slug: storedRelease.slug, ...releaseWithChannelSite, }; } catch (error) { logger.error(error); return null; } }, { concurrency: 10, }).filter(Boolean); logger.info(`Stored ${storedReleases.length} new releases`); const actors = accumulateActors(storedReleases); const movies = accumulateMovies(storedReleases); await associateActors(actors, storedReleases); await Promise.all([ // actors need to be stored before generating search updateReleasesSearch(storedReleases.map(release => release.id)), storeReleaseAssets(storedReleases), ]); if (argv.withProfiles && Object.keys(actors).length > 0) { await scrapeBasicActors(); } return { releases: storedReleases, actors, movies, }; } module.exports = { fetchReleases, fetchActorReleases, fetchSiteReleases, fetchNetworkReleases, fetchTagReleases, storeRelease, storeReleases, };