Using common for place resolve.

This commit is contained in:
DebaucheryLibrarian 2024-10-31 03:01:13 +01:00
parent fd39fa93d9
commit 1aa876acd9
5 changed files with 36 additions and 98 deletions

2
common

@ -1 +1 @@
Subproject commit c068c759b2bb62fad44c87226a780532b771479b Subproject commit 4316b69a43daeaa7ddadb428ccc6cb3eb10f555e

View File

@ -9,12 +9,14 @@ const DOMPurify = require('dompurify');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const omit = require('object.omit'); const omit = require('object.omit');
const inquirer = require('inquirer'); const inquirer = require('inquirer');
const unprint = require('unprint');
const { window } = new JSDOM(''); const { window } = new JSDOM('');
const domPurify = DOMPurify(window); const domPurify = DOMPurify(window);
// const logger = require('./logger')(__filename); // const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const redis = require('./redis');
const scrapers = require('./scrapers/scrapers').actors; const scrapers = require('./scrapers/scrapers').actors;
const argv = require('./argv'); const argv = require('./argv');
@ -29,13 +31,24 @@ const { fetchEntitiesBySlug } = require('./entities');
const { deleteScenes } = require('./releases'); const { deleteScenes } = require('./releases');
const actorsCommon = import('../common/actors.mjs'); // eslint-disable-line import/extensions, import/no-relative-packages const actorsCommon = import('../common/actors.mjs'); // eslint-disable-line import/extensions, import/no-relative-packages
const geoCommon = import('../common/geo.mjs'); // eslint-disable-line import/extensions, import/no-relative-packages
const slugify = require('./utils/slugify'); const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize'); const capitalize = require('./utils/capitalize');
const resolvePlace = require('./utils/resolve-place');
const { resolveLayoutScraper } = require('./scrapers/resolve'); const { resolveLayoutScraper } = require('./scrapers/resolve');
const getRecursiveParameters = require('./utils/get-recursive-parameters'); const getRecursiveParameters = require('./utils/get-recursive-parameters');
const commonContext = {
argv,
knex,
redis,
logger,
moment,
slugify,
omit,
unprint,
};
const hairColors = { const hairColors = {
'jet-black': 'black', 'jet-black': 'black',
'red-head': 'red', 'red-head': 'red',
@ -364,13 +377,7 @@ async function interpolateProfiles(actorIdsOrNames, refreshView) {
const { interpolateProfiles: interpolateProfilesUtil } = await actorsCommon; const { interpolateProfiles: interpolateProfilesUtil } = await actorsCommon;
try { try {
await interpolateProfilesUtil(actorIdsOrNames, { await interpolateProfilesUtil(actorIdsOrNames, commonContext, { refreshView });
knex,
logger,
moment,
slugify,
omit,
}, { refreshView });
} catch (error) { } catch (error) {
console.log(error); console.log(error);
} }
@ -381,6 +388,8 @@ async function curateProfile(profile, actor) {
return null; return null;
} }
const { resolvePlace } = await geoCommon;
try { try {
const curatedProfile = { const curatedProfile = {
// id: profile.id, // id: profile.id,
@ -465,8 +474,8 @@ async function curateProfile(profile, actor) {
if (argv.resolvePlace) { if (argv.resolvePlace) {
const [placeOfBirth, placeOfResidence] = await Promise.all([ const [placeOfBirth, placeOfResidence] = await Promise.all([
resolvePlace(profile.birthPlace), resolvePlace(profile.birthPlace, commonContext),
resolvePlace(profile.residencePlace), resolvePlace(profile.residencePlace, commonContext),
]); ]);
curatedProfile.placeOfBirth = placeOfBirth; curatedProfile.placeOfBirth = placeOfBirth;

View File

@ -187,6 +187,7 @@ async function fetchLatest(channel, page = 1) {
return res.status; return res.status;
} }
async function fetchUpcoming(channel) { async function fetchUpcoming(channel) {
const url = `${channel.url}&by=date.desc&early-access=true`; const url = `${channel.url}&by=date.desc&early-access=true`;
const res = await unprint.get(url, { selectAll: '.video_container' }); const res = await unprint.get(url, { selectAll: '.video_container' });

View File

@ -3,14 +3,15 @@
const config = require('config'); const config = require('config');
const Promise = require('bluebird'); const Promise = require('bluebird');
const { decode } = require('html-entities'); const { decode } = require('html-entities');
const unprint = require('unprint');
const argv = require('./argv'); const argv = require('./argv');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const slugify = require('./utils/slugify'); const slugify = require('./utils/slugify');
const bulkInsert = require('./utils/bulk-insert'); const bulkInsert = require('./utils/bulk-insert');
const resolvePlace = require('./utils/resolve-place');
const chunk = require('./utils/chunk'); const chunk = require('./utils/chunk');
const redis = require('./redis');
const { formatDate } = require('./utils/qu'); const { formatDate } = require('./utils/qu');
const { associateActors, associateDirectors, scrapeActors, toBaseActors } = require('./actors'); const { associateActors, associateDirectors, scrapeActors, toBaseActors } = require('./actors');
const { associateReleaseTags } = require('./tags'); const { associateReleaseTags } = require('./tags');
@ -19,6 +20,17 @@ const { associateReleaseMedia } = require('./media');
const { updateSceneSearch, updateMovieSearch } = require('./update-search'); const { updateSceneSearch, updateMovieSearch } = require('./update-search');
const { notify } = require('./alerts'); const { notify } = require('./alerts');
const geoCommon = import('../common/geo.mjs'); // eslint-disable-line import/extensions, import/no-relative-packages
const commonContext = {
argv,
knex,
redis,
logger,
slugify,
unprint,
};
async function curateReleaseEntry(release, batchId, existingRelease, type = 'scene') { async function curateReleaseEntry(release, batchId, existingRelease, type = 'scene') {
const slugBase = release.title const slugBase = release.title
|| (release.actors?.length && `${release.entity.slug} ${release.actors.map((actor) => actor.name).join(' ')}`) || (release.actors?.length && `${release.entity.slug} ${release.actors.map((actor) => actor.name).join(' ')}`)
@ -64,7 +76,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
curatedRelease.production_location = decode(release.productionLocation); curatedRelease.production_location = decode(release.productionLocation);
if (argv.resolvePlace) { if (argv.resolvePlace) {
const productionLocation = await resolvePlace(decode(release.productionLocation)); const { resolvePlace } = await geoCommon;
const productionLocation = await resolvePlace(decode(release.productionLocation), commonContext);
if (productionLocation) { if (productionLocation) {
curatedRelease.production_city = productionLocation.city; curatedRelease.production_city = productionLocation.city;

View File

@ -1,85 +0,0 @@
'use strict';
const config = require('config');
const knex = require('../knex');
const logger = require('../logger')(__filename);
const http = require('./http');
const slugify = require('./slugify');
const argv = require('../argv');
const redis = require('../redis');
async function resolvePlace(query) {
if (!query) {
return null;
}
const cacheKey = `place-${slugify(query)}`;
const cachedPlace = await redis.hGetAll(cacheKey);
if (argv.placeCache !== false && await redis.exists(cacheKey)) {
await redis.expire(cacheKey, 3600 * 24 * 30);
logger.debug(`Using cached place '${cacheKey}' for query '${query}': ${JSON.stringify(cachedPlace)}`);
return cachedPlace;
}
// query is a nationality, lookup would get weird results (British resolves to British, Northern Ireland)
const country = await knex('countries')
.where('nationality', 'ilike', `%${query}%`)
.orWhere('alpha3', 'ilike', `%${query}%`)
.orWhere('alpha2', 'ilike', `%${query}%`)
.orderBy('priority', 'desc')
.first();
if (country) {
return {
country: country.alpha2,
};
}
try {
// https://operations.osmfoundation.org/policies/nominatim/
const res = await http.get(`https://nominatim.openstreetmap.org/search?q=${encodeURI(query)}&format=json&accept-language=en&addressdetails=1`, {
headers: {
'User-Agent': config.location.userAgent,
},
interval: 1000,
concurrency: 1,
});
const [item] = res.body;
if (item && item.address) {
const rawPlace = item.address;
const place = {};
if (item.class === 'place' || item.class === 'boundary') {
const location = rawPlace[item.type] || rawPlace.city || rawPlace.place || rawPlace.town;
if (location) {
place.place = location;
place.city = rawPlace.city || location;
}
}
if (rawPlace.state) place.state = rawPlace.state;
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
if (rawPlace.continent) place.continent = rawPlace.continent;
logger.debug(`Resolved place '${query}' to ${JSON.stringify(place)}`);
await redis.hSet(cacheKey, place);
await redis.expire(cacheKey, 3600 * 24 * 30);
return place;
}
} catch (error) {
logger.error(`Failed to resolve place '${query}': ${error.message}`);
}
return null;
}
module.exports = resolvePlace;