diff --git a/src/actors.js b/src/actors.js index 4b1fc7b7..e13c805b 100644 --- a/src/actors.js +++ b/src/actors.js @@ -376,6 +376,48 @@ async function mergeProfiles(profiles, actor) { return mergedProfile; } +async function scrapeProfiles(sources, actorName, actorEntry, sitesBySlug) { + return Promise.map(sources, async (source) => { + // const [scraperSlug, scraper] = source; + const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] })); + + try { + return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => { + if (!scraper) { + logger.warn(`No profile profile scraper available for ${scraperSlug}`); + throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`)); + } + + logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); + + const site = sitesBySlug[scraperSlug] || null; + const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); + + if (profile) { + logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); + + return { + ...profile, + name: actorName, + scraper: scraperSlug, + site, + }; + } + + logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`); + throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false }); + }), Promise.reject(new Error())); + } catch (error) { + if (error.warn !== false) { + logger.warn(`Error in scraper ${source}: ${error.message}`); + // logger.error(error.stack); + } + } + + return null; + }); +} + async function scrapeActors(actorNames) { return Promise.map(actorNames || argv.actors, async (actorName) => { try { @@ -399,46 +441,7 @@ async function scrapeActors(actorNames) { const sites = await curateSites(siteEntries, true); const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {}); - const profiles = await Promise.map(finalSources, async (source) => { - // const [scraperSlug, scraper] = source; - const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] })); - - try { - return await profileScrapers.reduce(async (outcome, { scraper, scraperSlug }) => outcome.catch(async () => { - if (!scraper) { - logger.warn(`No profile profile scraper available for ${scraperSlug}`); - throw Object.assign(new Error(`No profile scraper available for ${scraperSlug}`)); - } - - logger.verbose(`Searching '${actorName}' on ${scraperSlug}`); - - const site = sitesBySlug[scraperSlug] || null; - const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases); - - if (profile) { - logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`); - - return { - ...profile, - name: actorName, - scraper: scraperSlug, - site, - }; - } - - logger.verbose(`No profile for '${actorName}' available on ${scraperSlug}`); - throw Object.assign(new Error(`Profile for ${actorName} not available on ${scraperSlug}`), { warn: false }); - }), Promise.reject(new Error())); - } catch (error) { - if (error.warn !== false) { - logger.warn(`Error in scraper ${source}: ${error.message}`); - // logger.error(error.stack); - } - } - - return null; - }); - + const profiles = await scrapeProfiles(sources, sitesBySlug, actorName, actorEntry); const profile = await mergeProfiles(profiles, actorEntry); if (profile === null) { diff --git a/src/app.js b/src/app.js index 31b754eb..95824c24 100644 --- a/src/app.js +++ b/src/app.js @@ -6,7 +6,7 @@ const initServer = require('./web/server'); const scrapeSites = require('./scrape-sites'); const { scrapeScenes, scrapeMovies, deepFetchReleases } = require('./scrape-releases'); -const { storeReleases } = require('./releases'); +const { storeReleases, updateReleasesSearch } = require('./releases'); const { scrapeActors, scrapeBasicActors } = require('./actors'); if (process.env.NODE_ENV === 'development') { @@ -41,6 +41,10 @@ async function init() { await scrapeBasicActors(); } + if (argv.updateSearch) { + await updateReleasesSearch(); + } + if (argv.server) { await initServer(); return; diff --git a/src/argv.js b/src/argv.js index 94578fb1..9abef0d1 100644 --- a/src/argv.js +++ b/src/argv.js @@ -171,8 +171,8 @@ const { argv } = yargs type: 'boolean', default: process.env.NODE_ENV === 'development', }) - .option('dummy', { - describe: 'Generate dummy data during seed', + .option('update-search', { + describe: 'Update search documents for all releases.', type: 'boolean', default: false, }); diff --git a/src/releases.js b/src/releases.js index d982ab67..5acd7980 100644 --- a/src/releases.js +++ b/src/releases.js @@ -367,6 +367,8 @@ async function storeReleaseAssets(releases) { } async function updateReleasesSearch(releaseIds) { + logger.info(`Updating search documents for ${releaseIds ? releaseIds.length : 'all' } releases`); + const documents = await knex.raw(` SELECT releases.id AS release_id, @@ -378,7 +380,7 @@ async function updateReleasesSearch(releaseIds) { networks.name || ' ' || networks.slug || ' ' || COALESCE(releases.shoot_id, '') || ' ' || - COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMD'), '') || ' ' || + COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' || STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(tags.name, ''), ' ') || ' ' || STRING_AGG(COALESCE(tags_aliases.name, ''), ' ') @@ -391,9 +393,9 @@ async function updateReleasesSearch(releaseIds) { LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN tags ON local_tags.tag_id = tags.id LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for - WHERE releases.id = ANY(?) + ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; - `, [releaseIds]); + `, releaseIds && [releaseIds]); if (documents.rows?.length > 0) { const query = knex('releases_search').insert(documents.rows).toString(); @@ -507,4 +509,5 @@ module.exports = { fetchTagReleases, storeRelease, storeReleases, + updateReleasesSearch, }; diff --git a/src/web/server.js b/src/web/server.js index 6c3eff0b..f0b067b3 100644 --- a/src/web/server.js +++ b/src/web/server.js @@ -11,6 +11,7 @@ const PgConnectionFilterPlugin = require('postgraphile-plugin-connection-filter' const PgSimplifyInflectorPlugin = require('@graphile-contrib/pg-simplify-inflector'); const PgOrderByRelatedPlugin = require('@graphile-contrib/pg-order-by-related'); +const logger = require('../logger'); const { ActorPlugins, SitePlugins, ReleasePlugins } = require('./plugins/plugins'); const { @@ -100,7 +101,7 @@ function initServer() { const server = app.listen(config.web.port, config.web.host, () => { const { address, port } = server.address(); - console.log(`Web server listening on ${address}:${port}`); + logger.info(`Web server listening on ${address}:${port}`); }); }