Attaching channel site and studio to stored releases.
This commit is contained in:
parent
0f09fd53eb
commit
e4b269956e
10
src/deep.js
10
src/deep.js
|
@ -1,5 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const Promise = require('bluebird');
|
||||||
|
|
||||||
const argv = require('./argv');
|
const argv = require('./argv');
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
|
@ -120,8 +122,6 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||||
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(mergedRelease);
|
|
||||||
|
|
||||||
return mergedRelease;
|
return mergedRelease;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
||||||
|
@ -130,7 +130,11 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeReleases(baseReleases, sites) {
|
async function scrapeReleases(baseReleases, sites) {
|
||||||
return Promise.all(baseReleases.map(baseRelease => scrapeRelease(baseRelease, sites)));
|
return Promise.map(
|
||||||
|
baseReleases,
|
||||||
|
async baseRelease => scrapeRelease(baseRelease, sites),
|
||||||
|
{ concurrency: 10 },
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchReleases(baseReleasesOrUrls) {
|
async function fetchReleases(baseReleasesOrUrls) {
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
|
|
||||||
|
const argv = require('./argv');
|
||||||
|
const logger = require('./logger');
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
|
|
||||||
|
@ -39,10 +41,56 @@ function curateReleaseEntry(release, batchId, existingRelease) {
|
||||||
return curatedRelease;
|
return curatedRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function attachSite(releases) {
|
async function attachChannelSites(releases) {
|
||||||
const releasesWithoutSite = releases.filter(release => !release.site || release.site.isFallback);
|
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback));
|
||||||
|
|
||||||
// console.log(releases, releasesWithoutSite);
|
const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel));
|
||||||
|
const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||||
|
|
||||||
|
const releasesWithChannelSite = releases
|
||||||
|
.map((release) => {
|
||||||
|
if (release.site && !release.site.isFallback) {
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (release.channel && channelSitesBySlug[release.channel]) {
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
site: channelSitesBySlug[release.channel],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
return releasesWithChannelSite;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function attachStudios(releases) {
|
||||||
|
const studioSlugs = releases.map(release => release.studio).filter(Boolean);
|
||||||
|
|
||||||
|
const studios = await knex('studios').whereIn('slug', studioSlugs);
|
||||||
|
const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
|
||||||
|
|
||||||
|
const releasesWithStudio = releases.map((release) => {
|
||||||
|
if (release.studio && studioBySlug[release.studio]) {
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
studio: release.studio,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (release.studio) {
|
||||||
|
logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
|
||||||
|
return releasesWithStudio;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function extractUniqueReleases(releases) {
|
async function extractUniqueReleases(releases) {
|
||||||
|
@ -58,12 +106,18 @@ async function extractUniqueReleases(releases) {
|
||||||
async function storeReleases(releases) {
|
async function storeReleases(releases) {
|
||||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||||
|
|
||||||
const uniqueReleases = await extractUniqueReleases(releases);
|
const releasesWithSites = await attachChannelSites(releases);
|
||||||
const releasesWithSites = await attachSite(releases);
|
const releasesWithStudios = await attachStudios(releasesWithSites);
|
||||||
|
|
||||||
|
// uniqueness is site ID + entry ID, filter uniques after adding sites
|
||||||
|
const uniqueReleases = argv.redownload
|
||||||
|
? releasesWithStudios
|
||||||
|
: await extractUniqueReleases(releasesWithStudios);
|
||||||
|
|
||||||
const curatedReleaseEntries = uniqueReleases.slice(0, 2).map(release => curateReleaseEntry(release, batchId));
|
const curatedReleaseEntries = uniqueReleases.slice(0, 2).map(release => curateReleaseEntry(release, batchId));
|
||||||
|
const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
|
||||||
|
|
||||||
await knex('releases').insert(curatedReleaseEntries);
|
console.log(storedReleases);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
Loading…
Reference in New Issue