'use strict'; const config = require('config'); const logger = require('./logger')(__filename); const knex = require('./knex'); const slugify = require('./utils/slugify'); const { associateTags } = require('./tags'); const { associateActors } = require('./actors'); function curateReleaseEntry(release, batchId, existingRelease) { const slug = slugify(release.title, '-', { encode: true, limit: config.titleSlugLength, }); const curatedRelease = { title: release.title, entry_id: release.entryId || null, site_id: release.site.id, shoot_id: release.shootId || null, studio_id: release.studio?.id || null, url: release.url, date: release.date, slug, description: release.description, duration: release.duration, type: release.type, // director: release.director, // likes: release.rating && release.rating.likes, // dislikes: release.rating && release.rating.dislikes, // rating: release.rating && release.rating.stars && Math.floor(release.rating.stars), deep: typeof release.deep === 'boolean' ? release.deep : false, deep_url: release.deepUrl, updated_batch_id: batchId, }; if (!existingRelease && !release.id) { curatedRelease.created_batch_id = batchId; } return curatedRelease; } async function attachChannelSites(releases) { const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback)); const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel)); const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); const releasesWithChannelSite = releases .map((release) => { if (release.site && !release.site.isFallback) { return release; } if (release.channel && channelSitesBySlug[release.channel]) { return { ...release, site: channelSitesBySlug[release.channel], }; } logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`); return null; }) .filter(Boolean); return releasesWithChannelSite; } async function attachStudios(releases) { const studioSlugs = releases.map(release => release.studio).filter(Boolean); const studios = await knex('studios').whereIn('slug', studioSlugs); const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); const releasesWithStudio = releases.map((release) => { if (release.studio && studioBySlug[release.studio]) { return { ...release, studio: release.studio, }; } if (release.studio) { logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`); } return release; }); return releasesWithStudio; } function attachReleaseIds(releases, storedReleases) { const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => { if (!acc[release.site_id]) acc[release.site_id] = {}; acc[release.site_id][release.entry_id] = release.id; return acc; }, {}); const releasesWithId = releases.map(release => ({ ...release, id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId], })); return releasesWithId; } function filterInternalDuplicateReleases(releases) { const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => { if (!acc[release.site.id]) { acc[release.site.id] = {}; } acc[release.site.id][release.entryId] = release; return acc; }, {}); return Object.values(releasesBySiteIdAndEntryId) .map(siteReleases => Object.values(siteReleases)) .flat(); } async function filterDuplicateReleases(releases) { const internalUniqueReleases = filterInternalDuplicateReleases(releases); const duplicateReleaseEntries = await knex('releases') .whereIn(['entry_id', 'site_id'], internalUniqueReleases.map(release => [release.entryId, release.site.id])); const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => { if (!acc[release.site_id]) acc[release.site_id] = {}; acc[release.site_id][release.entry_id] = true; return acc; }, {}); const duplicateReleases = internalUniqueReleases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); const uniqueReleases = internalUniqueReleases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]); return { uniqueReleases, duplicateReleases, duplicateReleaseEntries, }; } async function storeReleases(releases) { const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); const releasesWithSites = await attachChannelSites(releases); const releasesWithStudios = await attachStudios(releasesWithSites); // uniqueness is site ID + entry ID, filter uniques after adding sites const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios); const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId)); // console.log(curatedNewReleaseEntries); const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*'); // TODO: update duplicate releases const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : []; const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries)); await Promise.all([ associateTags(releasesWithId), associateActors(releasesWithId), ]); logger.info(`Stored ${storedReleaseEntries.length} releases`); return releasesWithId; } module.exports = { storeReleases, };