Refactoring deep scrape. Added tag posters.

This commit is contained in:
2020-03-16 04:10:52 +01:00
parent c8ebe7892a
commit 0f09fd53eb
31 changed files with 851 additions and 589 deletions

71
src/store-releases.js Normal file
View File

@@ -0,0 +1,71 @@
'use strict';
const config = require('config');
const knex = require('./knex');
const slugify = require('./utils/slugify');
function curateReleaseEntry(release, batchId, existingRelease) {
const slug = slugify(release.title, '-', {
encode: true,
limit: config.titleSlugLength,
});
const curatedRelease = {
title: release.title,
entry_id: release.entryId || null,
site_id: release.site.id,
shoot_id: release.shootId || null,
studio_id: release.studio?.id || null,
url: release.url,
date: release.date,
slug,
description: release.description,
duration: release.duration,
type: release.type,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,
};
if (!existingRelease) {
curatedRelease.created_batch_id = batchId;
}
return curatedRelease;
}
async function attachSite(releases) {
const releasesWithoutSite = releases.filter(release => !release.site || release.site.isFallback);
// console.log(releases, releasesWithoutSite);
}
async function extractUniqueReleases(releases) {
const duplicateReleaseEntries = await knex('releases')
.whereIn(['entry_id', 'site_id'], releases.map(release => [release.entryId, release.site.id]));
const duplicateReleaseEntryKeys = new Set(duplicateReleaseEntries.map(releaseEntry => `${releaseEntry.site_id}_${releaseEntry.entry_id}`));
const uniqueReleases = releases.filter(release => !duplicateReleaseEntryKeys.has(`${release.site.id}_${release.entryId}`));
return uniqueReleases;
}
async function storeReleases(releases) {
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const uniqueReleases = await extractUniqueReleases(releases);
const releasesWithSites = await attachSite(releases);
const curatedReleaseEntries = uniqueReleases.slice(0, 2).map(release => curateReleaseEntry(release, batchId));
await knex('releases').insert(curatedReleaseEntries);
}
module.exports = {
storeReleases,
};