'use strict'; const config = require('config'); const argv = require('./argv'); const logger = require('./logger')(__filename); const knex = require('./knex'); const slugify = require('./utils/slugify'); function curateReleaseEntry(release, batchId, existingRelease) { const slug = slugify(release.title, '-', { encode: true, limit: config.titleSlugLength, }); const curatedRelease = { title: release.title, entry_id: release.entryId || null, site_id: release.site.id, shoot_id: release.shootId || null, studio_id: release.studio?.id || null, url: release.url, date: release.date, slug, description: release.description, duration: release.duration, type: release.type, // director: release.director, // likes: release.rating && release.rating.likes, // dislikes: release.rating && release.rating.dislikes, // rating: release.rating && release.rating.stars && Math.floor(release.rating.stars), deep: typeof release.deep === 'boolean' ? release.deep : false, deep_url: release.deepUrl, updated_batch_id: batchId, }; if (!existingRelease) { curatedRelease.created_batch_id = batchId; } return curatedRelease; } async function attachChannelSites(releases) { const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback)); const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel)); const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); const releasesWithChannelSite = releases .map((release) => { if (release.site && !release.site.isFallback) { return release; } if (release.channel && channelSitesBySlug[release.channel]) { return { ...release, site: channelSitesBySlug[release.channel], }; } logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`); return null; }) .filter(Boolean); return releasesWithChannelSite; } async function attachStudios(releases) { const studioSlugs = releases.map(release => release.studio).filter(Boolean); const studios = await knex('studios').whereIn('slug', studioSlugs); const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); const releasesWithStudio = releases.map((release) => { if (release.studio && studioBySlug[release.studio]) { return { ...release, studio: release.studio, }; } if (release.studio) { logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`); } return release; }); return releasesWithStudio; } async function extractUniqueReleases(releases) { const duplicateReleaseEntries = await knex('releases') .whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id])); const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`)); const duplicateReleases = releases.filter(release => duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`)); const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`)); return { duplicateReleases, uniqueReleases }; } async function storeReleases(releases) { const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); const releasesWithSites = await attachChannelSites(releases); const releasesWithStudios = await attachStudios(releasesWithSites); // uniqueness is site ID + entry ID, filter uniques after adding sites const { uniqueReleases, duplicateReleases } = await extractUniqueReleases(releasesWithStudios); console.log(argv.redownload, duplicateReleases); const curatedReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId)); const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*'); if (Array.isArray(storedReleases)) { return storedReleases; } // nothing inserted return []; } module.exports = { storeReleases, };