Improved release storage module. Added new tags module. Added movie scraping.
This commit is contained in:
@@ -2,10 +2,10 @@
|
||||
|
||||
const config = require('config');
|
||||
|
||||
const argv = require('./argv');
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const slugify = require('./utils/slugify');
|
||||
const { associateTags } = require('./tags');
|
||||
|
||||
function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
const slug = slugify(release.title, '-', {
|
||||
@@ -34,7 +34,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
updated_batch_id: batchId,
|
||||
};
|
||||
|
||||
if (!existingRelease) {
|
||||
if (!existingRelease && !release.id) {
|
||||
curatedRelease.created_batch_id = batchId;
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ async function attachChannelSites(releases) {
|
||||
};
|
||||
}
|
||||
|
||||
logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
|
||||
logger.error(`Unable to match channel '${release.channel?.slug || release.channel}' from generic URL ${release.url}`);
|
||||
|
||||
return null;
|
||||
})
|
||||
@@ -93,15 +93,41 @@ async function attachStudios(releases) {
|
||||
return releasesWithStudio;
|
||||
}
|
||||
|
||||
function attachReleaseIds(releases, storedReleases) {
|
||||
const storedReleaseIdsBySiteIdAndEntryId = storedReleases.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = release.id;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const releasesWithId = releases.map(release => ({
|
||||
...release,
|
||||
id: storedReleaseIdsBySiteIdAndEntryId[release.site.id][release.entryId],
|
||||
}));
|
||||
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
async function extractUniqueReleases(releases) {
|
||||
const duplicateReleaseEntries = await knex('releases')
|
||||
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
|
||||
|
||||
const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`));
|
||||
const duplicateReleases = releases.filter(release => duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
|
||||
const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
|
||||
const duplicateReleasesBySiteIdAndEntryId = duplicateReleaseEntries.reduce((acc, release) => {
|
||||
if (!acc[release.site_id]) acc[release.site_id] = {};
|
||||
acc[release.site_id][release.entry_id] = true;
|
||||
|
||||
return { duplicateReleases, uniqueReleases };
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const duplicateReleases = releases.filter(release => duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
const uniqueReleases = releases.filter(release => !duplicateReleasesBySiteIdAndEntryId[release.site.id]?.[release.entryId]);
|
||||
|
||||
return {
|
||||
uniqueReleases,
|
||||
duplicateReleases,
|
||||
duplicateReleaseEntries,
|
||||
};
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
@@ -111,19 +137,19 @@ async function storeReleases(releases) {
|
||||
const releasesWithStudios = await attachStudios(releasesWithSites);
|
||||
|
||||
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
||||
const { uniqueReleases, duplicateReleases } = await extractUniqueReleases(releasesWithStudios);
|
||||
const { uniqueReleases, duplicateReleaseEntries } = await extractUniqueReleases(releasesWithStudios);
|
||||
|
||||
console.log(argv.redownload, duplicateReleases);
|
||||
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||
|
||||
const curatedReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||
const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
|
||||
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
|
||||
// TODO: update duplicate releases
|
||||
|
||||
if (Array.isArray(storedReleases)) {
|
||||
return storedReleases;
|
||||
}
|
||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||
const releasesWithId = attachReleaseIds(releases, [].concat(storedReleaseEntries, duplicateReleaseEntries));
|
||||
|
||||
// nothing inserted
|
||||
return [];
|
||||
await associateTags(releasesWithId);
|
||||
|
||||
return releasesWithId;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user