'use strict'; const config = require('config'); const Promise = require('bluebird'); const { decode } = require('html-entities'); const argv = require('./argv'); const logger = require('./logger')(__filename); const knex = require('./knex'); const slugify = require('./utils/slugify'); const bulkInsert = require('./utils/bulk-insert'); const resolvePlace = require('./utils/resolve-place'); const chunk = require('./utils/chunk'); const { formatDate } = require('./utils/qu'); const { associateActors, associateDirectors, scrapeActors, toBaseActors } = require('./actors'); const { associateReleaseTags } = require('./tags'); const { curateEntity } = require('./entities'); const { associateReleaseMedia } = require('./media'); const { updateSceneSearch, updateMovieSearch } = require('./update-search'); const { notify } = require('./alerts'); async function curateReleaseEntry(release, batchId, existingRelease, type = 'scene') { const slugBase = release.title || (release.actors?.length && `${release.entity.slug} ${release.actors.map((actor) => actor.name).join(' ')}`) || (release.date && `${release.entity.slug} ${formatDate(release.date, 'YYYY MM DD')}`) || null; const slug = slugify(slugBase, '-', { encode: true, limit: config.titleSlugLength, }); const curatedRelease = { title: decode(release.title), alt_titles: release.altTitles?.map((title) => decode(title)), entry_id: release.entryId || null, entity_id: release.entity.id, studio_id: release.studio?.id || null, url: release.url, date: Number(release.date) ? release.date : null, date_precision: release.datePrecision, slug, description: decode(release.description), comment: release.comment, photo_count: Number(release.photoCount) || null, deep: typeof release.deep === 'boolean' ? release.deep : false, deep_url: release.deepUrl, updated_batch_id: batchId, }; if (release.id) { // release is updated curatedRelease.id = release.id; } if (type === 'scene') { curatedRelease.shoot_id = release.shootId || null; curatedRelease.production_date = Number(release.productionDate) ? release.productionDate : null; curatedRelease.duration = Number(release.duration) || null; curatedRelease.qualities = Array.from(new Set(release.qualities?.map(Number).filter(Boolean))).sort((qualityA, qualityB) => qualityB - qualityA); } if (release.productionLocation) { curatedRelease.production_location = decode(release.productionLocation); if (argv.resolvePlace) { const productionLocation = await resolvePlace(decode(release.productionLocation)); if (productionLocation) { curatedRelease.production_city = productionLocation.city; curatedRelease.production_state = productionLocation.state; curatedRelease.production_country_alpha2 = productionLocation.country; } } } if (!existingRelease && !release.id) { curatedRelease.created_batch_id = batchId; } return curatedRelease; } async function attachChannelEntities(releases) { const releasesWithoutEntity = releases.filter((release) => release.channel && (!release.entity || release.entity.type === 'network')); const channelEntities = await knex('entities') .select(knex.raw('entities.*, row_to_json(parents) as parent')) .whereIn('entities.slug', releasesWithoutEntity.map((release) => release.channel)) .where('entities.type', 'channel') .leftJoin('entities AS parents', 'parents.id', 'entities.parent_id'); const channelEntitiesBySlug = channelEntities.reduce((acc, entity) => ({ ...acc, [entity.slug]: entity }), {}); const releasesWithChannelEntity = await Promise.all(releases .map(async (release) => { if (release.channel && channelEntitiesBySlug[release.channel]) { const curatedEntity = await curateEntity(channelEntitiesBySlug[release.channel]); return { ...release, entity: curatedEntity, }; } if (release.entity) { return release; } logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`); return null; })); return releasesWithChannelEntity.filter(Boolean); } async function attachStudios(releases) { const studioSlugs = releases.map((release) => release.studio).filter(Boolean); const studios = await knex('entities') .whereIn('slug', studioSlugs) .where('type', 'studio'); const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); const releasesWithStudio = releases.map((release) => { if (release.studio && studioBySlug[release.studio]) { return { ...release, studio: studioBySlug[release.studio], }; } if (release.studio) { logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`); } return release; }); return releasesWithStudio; } function attachReleaseIds(releases, storedReleases, batchId) { const storedReleaseIdsByEntityIdAndEntryId = storedReleases.reduce((acc, release) => { if (!acc[release.entity_id]) acc[release.entity_id] = {}; acc[release.entity_id][release.entry_id] = release.id; return acc; }, {}); const releasesWithId = releases.map((release) => { if (!release.entity) { logger.error(`No entity available for ${release.url}`); return null; } const id = storedReleaseIdsByEntityIdAndEntryId[release.entity.id]?.[release.entryId] || storedReleaseIdsByEntityIdAndEntryId[release.entity.parent?.id]?.[release.entryId]; if (id) { return { ...release, id, batchId, }; } return null; }).filter(Boolean); return releasesWithId; } function filterInternalDuplicateReleases(releases) { const releasesByEntityIdAndEntryId = releases.reduce((acc, release) => { if (!release.entity) { return acc; } if (!release.entryId) { logger.warn(`No entry ID supplied for "${release.title}" from '${release.entity.name}'`); return acc; } if (!acc[release.entity.id]) { acc[release.entity.id] = {}; } acc[release.entity.id][release.entryId] = release; return acc; }, {}); return Object.values(releasesByEntityIdAndEntryId) .map((entityReleases) => Object.values(entityReleases)) .flat(); } async function filterDuplicateReleases(releases, domain = 'releases') { const internalUniqueReleases = filterInternalDuplicateReleases(releases); const internalUniqueReleaseChunks = chunk(internalUniqueReleases); const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex(domain) .whereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk.map((release) => [release.entryId, release.entity.id])) .orWhereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk // scene IDs shared across network, mark as duplicate so scene can be updated with channel if only available on release day (i.e. Perv City) .filter((release) => release.entity.parent?.parameters?.networkEntryIds) .map((release) => [release.entryId, release.entity.parent.id])), { concurrency: 10 }); const duplicateReleaseEntries = duplicateReleaseEntryChunks.flat(); const duplicateReleasesByEntityIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { if (!acc[release.entity_id]) acc[release.entity_id] = {}; acc[release.entity_id][release.entry_id] = true; return acc; }, {}); const duplicateReleases = internalUniqueReleases.filter((release) => duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId] || duplicateReleasesByEntityIdAndEntryId[release.entity.parent?.id]?.[release.entryId]); const uniqueReleases = internalUniqueReleases.filter((release) => !duplicateReleasesByEntityIdAndEntryId[release.entity.id]?.[release.entryId] && !duplicateReleasesByEntityIdAndEntryId[release.entity.parent?.id]?.[release.entryId]); return { uniqueReleases, duplicateReleases, duplicateReleaseEntries, }; } async function storeChapters(releases) { const chapters = releases .map((release) => release.chapters?.map((chapter, index) => ({ releaseId: release.id, index: index + 1, time: chapter.time, duration: chapter.duration, title: chapter.title, description: chapter.description, poster: chapter.poster, photos: chapter.photos, tags: chapter.tags, }))) .flat() .filter(Boolean) .sort((chapterA, chapterB) => chapterA.time - chapterB.time); const curatedChapterEntries = chapters.map((chapter) => ({ index: chapter.index, time: chapter.time, duration: chapter.duration, title: chapter.title, description: chapter.description, release_id: chapter.releaseId, })); const storedChapters = await bulkInsert('chapters', curatedChapterEntries, ['release_id', 'index']); const chapterIdsByReleaseIdAndChapter = storedChapters.reduce((acc, chapter) => ({ ...acc, [chapter.release_id]: { ...acc[chapter.release_id], [chapter.index]: chapter.id, }, }), {}); const chaptersWithId = chapters.map((chapter) => ({ ...chapter, id: chapterIdsByReleaseIdAndChapter[chapter.releaseId][chapter.index], })); await associateReleaseTags(chaptersWithId, 'chapter'); // media is more error-prone, associate separately await associateReleaseMedia(chaptersWithId, 'chapter'); } async function associateMovieScenes(movies, movieScenes) { const moviesByEntityIdAndEntryId = movies.reduce((acc, movie) => ({ ...acc, [movie.entity.id]: { ...acc[movie.entity.id], [movie.entryId]: movie, }, }), {}); const associations = movieScenes .toSorted((sceneA, sceneB) => { return (sceneA.sceneIndex || 1) - (sceneB.sceneIndex || 1); }) .map((scene) => { if (!scene.movie) { return null; } const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId] || moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId]; if (sceneMovie?.id) { return { movie_id: sceneMovie.id, scene_id: scene.id, }; } return null; }) .filter(Boolean); await bulkInsert('movies_scenes', associations, false); } async function associateSerieScenes(series, serieScenes) { const seriesByEntityIdAndEntryId = series.reduce((acc, serie) => ({ ...acc, [serie.entity.id]: { ...acc[serie.entity.id], [serie.entryId]: serie, }, }), {}); const associations = serieScenes.map((scene) => { if (!scene.serie) { return null; } const sceneSerie = seriesByEntityIdAndEntryId[scene.entity.id]?.[scene.serie.entryId] || seriesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.serie.entryId]; if (sceneSerie?.id) { return { serie_id: sceneSerie.id, scene_id: scene.id, }; } return null; }).filter(Boolean); await bulkInsert('series_scenes', associations, false); } async function storeMovies(movies, useBatchId) { if (!movies || movies.length === 0) { return []; } const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie'))); const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true); const moviesWithId = attachReleaseIds(movies, storedMovies); await associateReleaseTags(moviesWithId, 'movie'); await updateMovieSearch(moviesWithId.map((movie) => movie.id)); await associateReleaseMedia(moviesWithId, 'movie'); return moviesWithId; } async function storeSeries(series, useBatchId) { if (!series || series.length === 0) { return []; } const { uniqueReleases } = await filterDuplicateReleases(series, 'series'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const curatedSerieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'serie'))); const storedSeries = await bulkInsert('series', curatedSerieEntries, ['entity_id', 'entry_id'], true); const seriesWithId = attachReleaseIds(series, storedSeries); await updateMovieSearch(seriesWithId.map((serie) => serie.id), 'serie'); await associateReleaseMedia(seriesWithId, 'serie'); return seriesWithId; } async function storeScenes(releases, useBatchId) { if (!releases || releases.length === 0) { return []; } const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const releasesWithChannels = await attachChannelEntities(releases); const releasesWithBaseActors = releasesWithChannels.map((release) => ({ ...release, actors: toBaseActors(release.actors) })); const releasesWithStudios = await attachStudios(releasesWithBaseActors); // uniqueness is entity ID + entry ID, filter uniques after adding entities const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios, 'releases'); const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId))); const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries); const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : []; const uniqueReleasesWithId = attachReleaseIds(uniqueReleases, storedReleaseEntries, batchId); const duplicateReleasesWithId = attachReleaseIds(duplicateReleases, duplicateReleaseEntries, batchId); const curatedDuplicateReleases = await Promise.all(duplicateReleasesWithId.map((release) => curateReleaseEntry(release, batchId))); const releasesWithId = uniqueReleasesWithId.concat(duplicateReleasesWithId); const updated = await knex.raw(` UPDATE releases SET url = COALESCE(new.url, releases.url), date = COALESCE(new.date, releases.date), entity_id = COALESCE((new.entity->>'id')::integer, releases.entity_id), title = COALESCE(new.title, releases.title), description = COALESCE(new.description, releases.description), duration = COALESCE(new.duration, releases.duration), deep = new.url IS NOT NULL, updated_at = NOW() FROM json_to_recordset(:scenes) AS new(id int, url text, date timestamptz, entity json, title text, description text, duration integer, deep boolean) WHERE releases.id = new.id `, { scenes: JSON.stringify(curatedDuplicateReleases), }); const [actors, storedSeries] = await Promise.all([ associateActors(releasesWithId, batchId), storeSeries(releasesWithId.map((release) => release.serie && { ...release.serie, entity: release.entity }).filter(Boolean), batchId), associateReleaseTags(releasesWithId), storeChapters(releasesWithId), ]); await associateSerieScenes(storedSeries, releasesWithId); await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time await updateSceneSearch(releasesWithId.map((release) => release.id)); // media is more error-prone, associate separately await associateReleaseMedia(releasesWithId); if (argv.sceneActors && actors) { await scrapeActors(actors.map((actor) => actor.name)); } logger.info(`Stored ${storedReleaseEntries.length}, updated ${updated.rowCount} releases`); await notify(releasesWithId); return releasesWithId; } module.exports = { associateMovieScenes, storeScenes, storeMovies, updateSceneSearch, updateMovieSearch, };