Improved scraping and association behavior.

This commit is contained in:
2019-11-19 04:36:15 +01:00
parent 7e9fd19c2a
commit 3ec7b15886
10 changed files with 166 additions and 94 deletions

View File

@@ -4,8 +4,8 @@ const Promise = require('bluebird');
const knex = require('./knex');
const argv = require('./argv');
const whereOr = require('./utils/where-or');
const { storeTags } = require('./tags');
const { storeActors } = require('./actors');
const { associateTags } = require('./tags');
const { associateActors } = require('./actors');
const {
createMediaDirectory,
storePoster,
@@ -141,15 +141,6 @@ async function fetchReleases(queryObject = {}, options = {}) {
return curateReleases(releases);
}
async function fetchReleasesByEntryIds(entryIds, queryObject = {}, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
.whereIn('entry_id', entryIds)
.andWhere(builder => whereOr(queryObject, 'releases', builder));
return curateReleases(releases);
}
async function fetchSiteReleases(queryObject, options = {}) {
const releases = await knex('releases')
.modify(commonQuery, options)
@@ -192,41 +183,52 @@ async function fetchTagReleases(queryObject, options = {}) {
return curateReleases(releases);
}
async function storeReleaseAssets(release, releaseId) {
await createMediaDirectory(release, releaseId);
await Promise.all([
associateActors(release, releaseId),
associateTags(release, releaseId),
storePhotos(release, releaseId),
storePoster(release, releaseId),
storeTrailer(release, releaseId),
]);
}
async function storeRelease(release) {
const existingRelease = await knex('releases').where('entry_id', release.entryId).first();
const curatedRelease = curateScrapedRelease(release);
const releaseEntries = await knex('releases')
if (existingRelease && !argv.redownload) {
return existingRelease.id;
}
if (existingRelease && argv.redownload) {
const [updatedRelease] = await knex('releases')
.where('entry_id', existingRelease.id)
.update({
...existingRelease,
...curatedRelease,
})
.returning('*');
await storeReleaseAssets(release, existingRelease.id);
console.log(`Updated release "${release.title}" (${existingRelease.id}, ${release.site.name})`);
return updatedRelease || existingRelease;
}
const [releaseEntry] = await knex('releases')
.insert(curatedRelease)
.returning('*');
if (releaseEntries.length) {
const releaseEntry = releaseEntries[0];
console.log(`Stored (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
await createMediaDirectory(release, releaseEntry.id);
await Promise.all([
storeActors(release, releaseEntry),
storeTags(release, releaseEntry),
storePhotos(release, releaseEntry),
storePoster(release, releaseEntry),
storeTrailer(release, releaseEntry),
]);
return releaseEntry.id;
}
console.error(`Unable to save scene to database, possible collision: "${release.title}" (${release.site.name})`);
await storeReleaseAssets(release, releaseEntry.id);
console.log(`Stored release "${release.title}" (${releaseEntry.id}, ${release.site.name})`);
return null;
}
async function storeReleases(releases) {
const existingReleases = await fetchReleasesByEntryIds(releases.map(release => release.entryId));
console.log(existingReleases);
return Promise.map(releases, async (release) => {
try {
const releaseId = await storeRelease(release);