traxxx/src/store-releases.js

132 lines
4.5 KiB
JavaScript
Raw Normal View History

'use strict';
const config = require('config');
const argv = require('./argv');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const slugify = require('./utils/slugify');
function curateReleaseEntry(release, batchId, existingRelease) {
const slug = slugify(release.title, '-', {
encode: true,
limit: config.titleSlugLength,
});
const curatedRelease = {
title: release.title,
entry_id: release.entryId || null,
site_id: release.site.id,
shoot_id: release.shootId || null,
studio_id: release.studio?.id || null,
url: release.url,
date: release.date,
slug,
description: release.description,
duration: release.duration,
type: release.type,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
};
if (!existingRelease) {
curatedRelease.created_batch_id = batchId;
}
return curatedRelease;
}
async function attachChannelSites(releases) {
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback));
const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel));
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const releasesWithChannelSite = releases
.map((release) => {
if (release.site && !release.site.isFallback) {
return release;
}
if (release.channel && channelSitesBySlug[release.channel]) {
return {
...release,
site: channelSitesBySlug[release.channel],
};
}
logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
return null;
})
.filter(Boolean);
return releasesWithChannelSite;
}
async function attachStudios(releases) {
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
const studios = await knex('studios').whereIn('slug', studioSlugs);
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
const releasesWithStudio = releases.map((release) => {
if (release.studio && studioBySlug[release.studio]) {
return {
...release,
studio: release.studio,
};
}
if (release.studio) {
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
}
return release;
});
return releasesWithStudio;
}
async function extractUniqueReleases(releases) {
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`));
const duplicateReleases = releases.filter(release => duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
return { duplicateReleases, uniqueReleases };
}
async function storeReleases(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const releasesWithSites = await attachChannelSites(releases);
const releasesWithStudios = await attachStudios(releasesWithSites);
// uniqueness is site ID + entry ID, filter uniques after adding sites
const { uniqueReleases, duplicateReleases } = await extractUniqueReleases(releasesWithStudios);
console.log(argv.redownload, duplicateReleases);
const curatedReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
if (Array.isArray(storedReleases)) {
return storedReleases;
}
// nothing inserted
return [];
}
module.exports = {
storeReleases,
};