Improved release storage module. Added new tags module. Added movie scraping.

This commit is contained in:
2020-03-22 03:50:24 +01:00
parent d765543b30
commit fdb2b132f6
24 changed files with 343 additions and 141 deletions

View File

@@ -2,10 +2,10 @@
const config = require('config');
const argv = require('./argv');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const slugify = require('./utils/slugify');
const { associateTags } = require('./tags');
function curateReleaseEntry(release, batchId, existingRelease) {
const slug = slugify(release.title, '-', {
@@ -34,7 +34,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
updated_batch_id: batchId,
};
if (!existingRelease) {
if (!existingRelease && !release.id) {
curatedRelease.created_batch_id = batchId;
}
@@ -60,7 +60,7 @@ async function attachChannelSites(releases) {
};
}
logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
return null;
})
@@ -93,15 +93,41 @@ async function attachStudios(releases) {
return releasesWithStudio;
}
function attachReleaseIds(releases, storedReleases) {
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = release.id;
return acc;
}, {});
const releasesWithId = releases.map(release => ({
...release,
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
}));
return releasesWithId;
}
async function extractUniqueReleases(releases) {
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`));
const duplicateReleases = releases.filter(release => duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
if (!acc[release.site_id]) acc[release.site_id] = {};
acc[release.site_id][release.entry_id] = true;
return { duplicateReleases, uniqueReleases };
return acc;
}, {});
const duplicateReleases = releases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
const uniqueReleases = releases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
return {
uniqueReleases,
duplicateReleases,
duplicateReleaseEntries,
};
}
async function storeReleases(releases) {
@@ -111,19 +137,19 @@ async function storeReleases(releases) {
const releasesWithStudios = await attachStudios(releasesWithSites);
// uniqueness is site ID + entry ID, filter uniques after adding sites
const { uniqueReleases, duplicateReleases } = await extractUniqueReleases(releasesWithStudios);
const { uniqueReleases, duplicateReleaseEntries } = await extractUniqueReleases(releasesWithStudios);
console.log(argv.redownload, duplicateReleases);
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
const curatedReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
// TODO: update duplicate releases
if (Array.isArray(storedReleases)) {
return storedReleases;
}
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries));
// nothing inserted
return [];
await associateTags(releasesWithId);
return releasesWithId;
}
module.exports = {