forked from DebaucheryLibrarian/traxxx
				
			Split up profile scrape runner. Fixed wrong search document date key. Added search update CLI.
This commit is contained in:
		
							parent
							
								
									791a6c1c72
								
							
						
					
					
						commit
						d97b1ab894
					
				|  | @ -376,6 +376,48 @@ async function mergeProfiles(profiles, actor) { | |||
|     return mergedProfile; | ||||
| } | ||||
| 
 | ||||
| async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) { | ||||
|     return Promise.map(sources, async (source) => { | ||||
|        //  const [scraperSlug, scraper] = source;
 | ||||
|         const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] })); | ||||
| 
 | ||||
|         try { | ||||
|             return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => { | ||||
|                 if (!scraper) { | ||||
|                     logger.warn(`No profile profile scraper available for ${scraperSlug}`); | ||||
|                     throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`)); | ||||
|                 } | ||||
| 
 | ||||
|                 logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); | ||||
| 
 | ||||
|                 const site = sitesBySlug[scraperSlug] || null; | ||||
|                 const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); | ||||
| 
 | ||||
|                 if (profile) { | ||||
|                     logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); | ||||
| 
 | ||||
|                     return { | ||||
|                         ...profile, | ||||
|                         name: actorName, | ||||
|                         scraper: scraperSlug, | ||||
|                         site, | ||||
|                     }; | ||||
|                 } | ||||
| 
 | ||||
|                 logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`); | ||||
|                 throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false }); | ||||
|             }), Promise.reject(new Error())); | ||||
|         } catch (error) { | ||||
|             if (error.warn !== false) { | ||||
|                 logger.warn(`Error in scraper ${source}: ${error.message}`); | ||||
|                 // logger.error(error.stack);
 | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return null; | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| async function scrapeActors(actorNames) { | ||||
|     return Promise.map(actorNames || argv.actors, async (actorName) => { | ||||
|         try { | ||||
|  | @ -399,46 +441,7 @@ async function scrapeActors(actorNames) { | |||
|             const sites = await curateSites(siteEntries, true); | ||||
|             const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); | ||||
| 
 | ||||
|             const profiles = await Promise.map(finalSources, async (source) => { | ||||
|                //  const [scraperSlug, scraper] = source;
 | ||||
|                 const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] })); | ||||
| 
 | ||||
|                 try { | ||||
|                     return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => { | ||||
|                         if (!scraper) { | ||||
|                             logger.warn(`No profile profile scraper available for ${scraperSlug}`); | ||||
|                             throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`)); | ||||
|                         } | ||||
| 
 | ||||
|                         logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); | ||||
| 
 | ||||
|                         const site = sitesBySlug[scraperSlug] || null; | ||||
|                         const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); | ||||
| 
 | ||||
|                         if (profile) { | ||||
|                             logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); | ||||
| 
 | ||||
|                             return { | ||||
|                                 ...profile, | ||||
|                                 name: actorName, | ||||
|                                 scraper: scraperSlug, | ||||
|                                 site, | ||||
|                             }; | ||||
|                         } | ||||
| 
 | ||||
|                         logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`); | ||||
|                         throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false }); | ||||
|                     }), Promise.reject(new Error())); | ||||
|                 } catch (error) { | ||||
|                     if (error.warn !== false) { | ||||
|                         logger.warn(`Error in scraper ${source}: ${error.message}`); | ||||
|                         // logger.error(error.stack);
 | ||||
|                     } | ||||
|                 } | ||||
| 
 | ||||
|                 return null; | ||||
|             }); | ||||
| 
 | ||||
|             const profiles = await scrapeProfiles(sources, sitesBySlug, actorName, actorEntry); | ||||
|             const profile = await mergeProfiles(profiles, actorEntry); | ||||
| 
 | ||||
|             if (profile === null) { | ||||
|  |  | |||
|  | @ -6,7 +6,7 @@ const initServer = require('./web/server'); | |||
| 
 | ||||
| const scrapeSites = require('./scrape-sites'); | ||||
| const { scrapeScenes, scrapeMovies, deepFetchReleases } = require('./scrape-releases'); | ||||
| const { storeReleases } = require('./releases'); | ||||
| const { storeReleases, updateReleasesSearch } = require('./releases'); | ||||
| const { scrapeActors, scrapeBasicActors } = require('./actors'); | ||||
| 
 | ||||
| if (process.env.NODE_ENV === 'development') { | ||||
|  | @ -41,6 +41,10 @@ async function init() { | |||
|         await scrapeBasicActors(); | ||||
|     } | ||||
| 
 | ||||
|     if (argv.updateSearch) { | ||||
|         await updateReleasesSearch(); | ||||
|     } | ||||
| 
 | ||||
|     if (argv.server) { | ||||
|         await initServer(); | ||||
|         return; | ||||
|  |  | |||
|  | @ -171,8 +171,8 @@ const { argv } = yargs | |||
|         type: 'boolean', | ||||
|         default: process.env.NODE_ENV === 'development', | ||||
|     }) | ||||
|     .option('dummy', { | ||||
|         describe: 'Generate dummy data during seed', | ||||
|     .option('update-search', { | ||||
|         describe: 'Update search documents for all releases.', | ||||
|         type: 'boolean', | ||||
|         default: false, | ||||
|     }); | ||||
|  |  | |||
|  | @ -367,6 +367,8 @@ async function storeReleaseAssets(releases) { | |||
| } | ||||
| 
 | ||||
| async function updateReleasesSearch(releaseIds) { | ||||
|     logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`); | ||||
| 
 | ||||
|     const documents = await knex.raw(` | ||||
|         SELECT | ||||
|             releases.id AS release_id, | ||||
|  | @ -378,7 +380,7 @@ async function updateReleasesSearch(releaseIds) { | |||
|                 networks.name || ' ' || | ||||
|                 networks.slug || ' ' || | ||||
|                 COALESCE(releases.shoot_id, '') || ' ' || | ||||
|                 COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMD'), '') || ' ' || | ||||
|                 COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' || | ||||
|                 STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || | ||||
|                 STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' || | ||||
|                 STRING_AGG(COALESCE(tags_aliases.name, ''), ' ') | ||||
|  | @ -391,9 +393,9 @@ async function updateReleasesSearch(releaseIds) { | |||
|         LEFT JOIN actors ON local_actors.actor_id = actors.id | ||||
|         LEFT JOIN tags ON local_tags.tag_id = tags.id | ||||
|         LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for | ||||
|         WHERE releases.id = ANY(?) | ||||
|         ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} | ||||
|         GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; | ||||
|     `, [releaseIds]);
 | ||||
|     `, releaseIds && [releaseIds]);
 | ||||
| 
 | ||||
|     if (documents.rows?.length > 0) { | ||||
|         const query = knex('releases_search').insert(documents.rows).toString(); | ||||
|  | @ -507,4 +509,5 @@ module.exports = { | |||
|     fetchTagReleases, | ||||
|     storeRelease, | ||||
|     storeReleases, | ||||
|     updateReleasesSearch, | ||||
| }; | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ const PgConnectionFilterPlugin = require('postgraphile-plugin-connection-filter' | |||
| const PgSimplifyInflectorPlugin = require('@graphile-contrib/pg-simplify-inflector'); | ||||
| const PgOrderByRelatedPlugin = require('@graphile-contrib/pg-order-by-related'); | ||||
| 
 | ||||
| const logger = require('../logger'); | ||||
| const { ActorPlugins, SitePlugins, ReleasePlugins } = require('./plugins/plugins'); | ||||
| 
 | ||||
| const { | ||||
|  | @ -100,7 +101,7 @@ function initServer() { | |||
|     const server = app.listen(config.web.port, config.web.host, () => { | ||||
|         const { address, port } = server.address(); | ||||
| 
 | ||||
|         console.log(`Web server listening on ${address}:${port}`); | ||||
|         logger.info(`Web server listening on ${address}:${port}`); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue