Scraping and storing actor profiles.
This commit is contained in:
@@ -8,10 +8,10 @@ const moment = require('moment');
|
||||
const argv = require('../argv');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function init() {
|
||||
async function actorPosters(actorNames) {
|
||||
const posters = await knex('actors')
|
||||
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'media.index', 'sites.name as site_name', 'networks.name as network_name')
|
||||
.whereIn('actors.name', (argv.actors || []).concat(argv._))
|
||||
.whereIn('actors.name', actorNames)
|
||||
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
||||
.join('releases', 'releases_actors.release_id', 'releases.id')
|
||||
.join('sites', 'sites.id', 'releases.site_id')
|
||||
@@ -37,4 +37,41 @@ async function init() {
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
async function sitePosters(siteSlugs) {
|
||||
const posters = await knex('sites')
|
||||
.select('sites.name as site_name', 'releases.title', 'releases.date', 'media.path')
|
||||
.whereIn('sites.slug', siteSlugs)
|
||||
.join('releases', 'releases.site_id', 'sites.id')
|
||||
.join('releases_posters', 'releases_posters.release_id', 'releases.id')
|
||||
.join('media', 'releases_posters.media_id', 'media.id');
|
||||
// .where('releases.date', '<', '2020-01-01');
|
||||
|
||||
const files = await Promise.all(posters.map(async (poster) => {
|
||||
const directory = path.join(config.media.path, 'extracted', poster.site_name);
|
||||
|
||||
const source = path.join(config.media.path, poster.path);
|
||||
const target = path.join(directory, `${poster.site_name} - ${moment.utc(poster.date).format('YYYY-MM-DD')} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`);
|
||||
|
||||
await fs.mkdir(directory, { recursive: true });
|
||||
await fs.copyFile(source, target);
|
||||
|
||||
return target;
|
||||
}));
|
||||
|
||||
console.log(files);
|
||||
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
async function init() {
|
||||
if (argv.actors) {
|
||||
await actorPosters(argv.actors);
|
||||
return;
|
||||
}
|
||||
|
||||
if (argv.sites) {
|
||||
await sitePosters(argv.sites);
|
||||
}
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('./http');
|
||||
|
||||
async function resolvePlace(query) {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await bhttp.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`);
|
||||
const [item] = res.body;
|
||||
try {
|
||||
// https://operations.osmfoundation.org/policies/nominatim/
|
||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||
'User-Agent': 'contact at moonloop.adult@protonmail.com',
|
||||
});
|
||||
|
||||
if (item && item.address) {
|
||||
const rawPlace = item.address;
|
||||
const place = {};
|
||||
const [item] = res.body;
|
||||
|
||||
if (rawPlace.city) place.city = rawPlace.city;
|
||||
if (rawPlace.state) place.state = rawPlace.state;
|
||||
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
|
||||
if (rawPlace.continent) place.continent = rawPlace.continent;
|
||||
if (item && item.address) {
|
||||
const rawPlace = item.address;
|
||||
const place = {};
|
||||
|
||||
return place;
|
||||
if (rawPlace.city) place.city = rawPlace.city;
|
||||
if (rawPlace.state) place.state = rawPlace.state;
|
||||
if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase();
|
||||
if (rawPlace.continent) place.continent = rawPlace.continent;
|
||||
|
||||
return place;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to resolve place '${query}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
Reference in New Issue
Block a user