Attaching channel site and studio to stored releases.
This commit is contained in:
		
							parent
							
								
									0f09fd53eb
								
							
						
					
					
						commit
						e4b269956e
					
				
							
								
								
									
										10
									
								
								src/deep.js
								
								
								
								
							
							
						
						
									
										10
									
								
								src/deep.js
								
								
								
								
							| 
						 | 
				
			
			@ -1,5 +1,7 @@
 | 
			
		|||
'use strict';
 | 
			
		||||
 | 
			
		||||
const Promise = require('bluebird');
 | 
			
		||||
 | 
			
		||||
const argv = require('./argv');
 | 
			
		||||
const logger = require('./logger')(__filename);
 | 
			
		||||
const knex = require('./knex');
 | 
			
		||||
| 
						 | 
				
			
			@ -120,8 +122,6 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
 | 
			
		|||
            mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        console.log(mergedRelease);
 | 
			
		||||
 | 
			
		||||
        return mergedRelease;
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
        logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
 | 
			
		||||
| 
						 | 
				
			
			@ -130,7 +130,11 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
async function scrapeReleases(baseReleases, sites) {
 | 
			
		||||
    return Promise.all(baseReleases.map(baseRelease => scrapeRelease(baseRelease, sites)));
 | 
			
		||||
    return Promise.map(
 | 
			
		||||
        baseReleases,
 | 
			
		||||
        async baseRelease => scrapeRelease(baseRelease, sites),
 | 
			
		||||
        { concurrency: 10 },
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function fetchReleases(baseReleasesOrUrls) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,6 +2,8 @@
 | 
			
		|||
 | 
			
		||||
const config = require('config');
 | 
			
		||||
 | 
			
		||||
const argv = require('./argv');
 | 
			
		||||
const logger = require('./logger');
 | 
			
		||||
const knex = require('./knex');
 | 
			
		||||
const slugify = require('./utils/slugify');
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -39,10 +41,56 @@ function curateReleaseEntry(release, batchId, existingRelease) {
 | 
			
		|||
    return curatedRelease;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function attachSite(releases) {
 | 
			
		||||
    const releasesWithoutSite = releases.filter(release => !release.site || release.site.isFallback);
 | 
			
		||||
async function attachChannelSites(releases) {
 | 
			
		||||
    const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback));
 | 
			
		||||
 | 
			
		||||
    // console.log(releases, releasesWithoutSite);
 | 
			
		||||
    const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel));
 | 
			
		||||
    const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
 | 
			
		||||
 | 
			
		||||
    const releasesWithChannelSite = releases
 | 
			
		||||
        .map((release) => {
 | 
			
		||||
            if (release.site && !release.site.isFallback) {
 | 
			
		||||
                return release;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (release.channel && channelSitesBySlug[release.channel]) {
 | 
			
		||||
                return {
 | 
			
		||||
                    ...release,
 | 
			
		||||
                    site: channelSitesBySlug[release.channel],
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`);
 | 
			
		||||
 | 
			
		||||
            return null;
 | 
			
		||||
        })
 | 
			
		||||
        .filter(Boolean);
 | 
			
		||||
 | 
			
		||||
    return releasesWithChannelSite;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function attachStudios(releases) {
 | 
			
		||||
    const studioSlugs = releases.map(release => release.studio).filter(Boolean);
 | 
			
		||||
 | 
			
		||||
    const studios = await knex('studios').whereIn('slug', studioSlugs);
 | 
			
		||||
    const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {});
 | 
			
		||||
 | 
			
		||||
    const releasesWithStudio = releases.map((release) => {
 | 
			
		||||
        if (release.studio && studioBySlug[release.studio]) {
 | 
			
		||||
            return {
 | 
			
		||||
                ...release,
 | 
			
		||||
                studio: release.studio,
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (release.studio) {
 | 
			
		||||
            logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return release;
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    return releasesWithStudio;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function extractUniqueReleases(releases) {
 | 
			
		||||
| 
						 | 
				
			
			@ -58,12 +106,18 @@ async function extractUniqueReleases(releases) {
 | 
			
		|||
async function storeReleases(releases) {
 | 
			
		||||
    const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
 | 
			
		||||
 | 
			
		||||
    const uniqueReleases = await extractUniqueReleases(releases);
 | 
			
		||||
    const releasesWithSites = await attachSite(releases);
 | 
			
		||||
    const releasesWithSites = await attachChannelSites(releases);
 | 
			
		||||
    const releasesWithStudios = await attachStudios(releasesWithSites);
 | 
			
		||||
 | 
			
		||||
    // uniqueness is site ID + entry ID, filter uniques after adding sites
 | 
			
		||||
    const uniqueReleases = argv.redownload
 | 
			
		||||
        ? releasesWithStudios
 | 
			
		||||
        : await extractUniqueReleases(releasesWithStudios);
 | 
			
		||||
 | 
			
		||||
    const curatedReleaseEntries = uniqueReleases.slice(0, 2).map(release => curateReleaseEntry(release, batchId));
 | 
			
		||||
    const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*');
 | 
			
		||||
 | 
			
		||||
    await knex('releases').insert(curatedReleaseEntries);
 | 
			
		||||
    console.log(storedReleases);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue