Attaching channel site and studio to stored releases.
This commit is contained in:
		
							parent
							
								
									0f09fd53eb
								
							
						
					
					
						commit
						e4b269956e
					
				
							
								
								
									
										10
									
								
								src/deep.js
								
								
								
								
							
							
						
						
									
										10
									
								
								src/deep.js
								
								
								
								
							|  | @ -1,5 +1,7 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const Promise = require('bluebird'); | ||||
| 
 | ||||
| const argv = require('./argv'); | ||||
| const logger = require('./logger')(__filename); | ||||
| const knex = require('./knex'); | ||||
|  | @ -120,8 +122,6 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { | |||
|             mergedRelease.tags = baseRelease.tags.concat(scrapedRelease.tags); | ||||
|         } | ||||
| 
 | ||||
|         console.log(mergedRelease); | ||||
| 
 | ||||
|         return mergedRelease; | ||||
|     } catch (error) { | ||||
|         logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`); | ||||
|  | @ -130,7 +130,11 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') { | |||
| } | ||||
| 
 | ||||
| async function scrapeReleases(baseReleases, sites) { | ||||
|     return Promise.all(baseReleases.map(baseRelease => scrapeRelease(baseRelease, sites))); | ||||
|     return Promise.map( | ||||
|         baseReleases, | ||||
|         async baseRelease => scrapeRelease(baseRelease, sites), | ||||
|         { concurrency: 10 }, | ||||
|     ); | ||||
| } | ||||
| 
 | ||||
| async function fetchReleases(baseReleasesOrUrls) { | ||||
|  |  | |||
|  | @ -2,6 +2,8 @@ | |||
| 
 | ||||
| const config = require('config'); | ||||
| 
 | ||||
| const argv = require('./argv'); | ||||
| const logger = require('./logger'); | ||||
| const knex = require('./knex'); | ||||
| const slugify = require('./utils/slugify'); | ||||
| 
 | ||||
|  | @ -39,10 +41,56 @@ function curateReleaseEntry(release, batchId, existingRelease) { | |||
|     return curatedRelease; | ||||
| } | ||||
| 
 | ||||
| async function attachSite(releases) { | ||||
|     const releasesWithoutSite = releases.filter(release => !release.site || release.site.isFallback); | ||||
| async function attachChannelSites(releases) { | ||||
|     const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isFallback)); | ||||
| 
 | ||||
|     // console.log(releases, releasesWithoutSite);
 | ||||
|     const channelSites = await knex('sites').whereIn('slug', releasesWithoutSite.map(release => release.channel)); | ||||
|     const channelSitesBySlug = channelSites.reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); | ||||
| 
 | ||||
|     const releasesWithChannelSite = releases | ||||
|         .map((release) => { | ||||
|             if (release.site && !release.site.isFallback) { | ||||
|                 return release; | ||||
|             } | ||||
| 
 | ||||
|             if (release.channel && channelSitesBySlug[release.channel]) { | ||||
|                 return { | ||||
|                     ...release, | ||||
|                     site: channelSitesBySlug[release.channel], | ||||
|                 }; | ||||
|             } | ||||
| 
 | ||||
|             logger.error(`Unable to match channel '${release.channel.slug || release.channel}' from generic URL ${release.url}`); | ||||
| 
 | ||||
|             return null; | ||||
|         }) | ||||
|         .filter(Boolean); | ||||
| 
 | ||||
|     return releasesWithChannelSite; | ||||
| } | ||||
| 
 | ||||
| async function attachStudios(releases) { | ||||
|     const studioSlugs = releases.map(release => release.studio).filter(Boolean); | ||||
| 
 | ||||
|     const studios = await knex('studios').whereIn('slug', studioSlugs); | ||||
|     const studioBySlug = studios.reduce((acc, studio) => ({ ...acc, [studio.slug]: studio }), {}); | ||||
| 
 | ||||
|     const releasesWithStudio = releases.map((release) => { | ||||
|         if (release.studio && studioBySlug[release.studio]) { | ||||
|             return { | ||||
|                 ...release, | ||||
|                 studio: release.studio, | ||||
|             }; | ||||
|         } | ||||
| 
 | ||||
|         if (release.studio) { | ||||
|             logger.warn(`Unable to match studio '${release.studio}' for ${release.url}`); | ||||
|         } | ||||
| 
 | ||||
|         return release; | ||||
|     }); | ||||
| 
 | ||||
|     return releasesWithStudio; | ||||
| } | ||||
| 
 | ||||
| async function extractUniqueReleases(releases) { | ||||
|  | @ -58,12 +106,18 @@ async function extractUniqueReleases(releases) { | |||
| async function storeReleases(releases) { | ||||
|     const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); | ||||
| 
 | ||||
|     const uniqueReleases = await extractUniqueReleases(releases); | ||||
|     const releasesWithSites = await attachSite(releases); | ||||
|     const releasesWithSites = await attachChannelSites(releases); | ||||
|     const releasesWithStudios = await attachStudios(releasesWithSites); | ||||
| 
 | ||||
|     // uniqueness is site ID + entry ID, filter uniques after adding sites
 | ||||
|     const uniqueReleases = argv.redownload | ||||
|         ? releasesWithStudios | ||||
|         : await extractUniqueReleases(releasesWithStudios); | ||||
| 
 | ||||
|     const curatedReleaseEntries = uniqueReleases.slice(0, 2).map(release => curateReleaseEntry(release, batchId)); | ||||
|     const storedReleases = await knex('releases').insert(curatedReleaseEntries).returning('*'); | ||||
| 
 | ||||
|     await knex('releases').insert(curatedReleaseEntries); | ||||
|     console.log(storedReleases); | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue