Added scene count to actor inspect. Preferring network slug over data brand for scene URLs in MindGeek scraper, since milehighmedia.com's brand is milehigh, resulting in milehigh.com.

This commit is contained in:
ThePendulum 2020-02-09 03:09:06 +01:00
parent 2068202ca6
commit 9d9eda29be
4 changed files with 11 additions and 9 deletions

View File

@ -387,6 +387,7 @@ async function scrapeActors(actorNames) {
if (argv.inspect) {
console.log(profile);
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
}
if (profile === null) {

View File

@ -39,7 +39,7 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
const site = await findSite(url, release);
if (!site) {
throw new Error('Could not find site in database');
throw new Error(`Could not find site ${url} in database`);
}
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];

View File

@ -73,7 +73,7 @@ async function scrapeLatest(items, site) {
return latestReleases.filter(Boolean);
}
function scrapeScene(data, url, _site) {
function scrapeScene(data, url, _site, networkName) {
const release = {};
const { id: entryId, title, description } = data;
@ -100,7 +100,7 @@ function scrapeScene(data, url, _site) {
const siteName = data.collections[0]?.name || data.brand;
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
release.url = url || `https://www.${data.brand}.com/scene/${entryId}/`;
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
return release;
}
@ -139,7 +139,7 @@ async function getSession(url) {
return { session, instanceToken };
}
function scrapeProfile(data, html, releases = []) {
function scrapeProfile(data, html, releases = [], networkName) {
const { qa, qd } = ex(html);
const profile = {
@ -170,7 +170,7 @@ function scrapeProfile(data, html, releases = []) {
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
profile.releases = releases.map(release => scrapeScene(release));
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
return profile;
}
@ -247,11 +247,11 @@ async function fetchProfile(actorName, networkName, actorPath = 'model') {
]);
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result);
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString());
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
}
}
}

View File

@ -3,13 +3,14 @@
const config = require('config');
const path = require('path');
const fs = require('fs-extra');
const moment = require('moment');
const argv = require('../argv');
const knex = require('../knex');
async function init() {
const posters = await knex('actors')
.select('actors.name as actor_name', 'releases.title', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
.whereIn('actors.name', argv.actors)
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
.join('releases', 'releases_actors.release_id', 'releases.id')
@ -20,7 +21,7 @@ async function init() {
await Promise.all(posters.map(async (poster) => {
const source = path.join(config.media.path, poster.path);
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`);
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')}).jpeg`);
const file = await fs.readFile(source);
await fs.writeFile(target, file);