forked from DebaucheryLibrarian/traxxx
Added scene count to actor inspect. Preferring network slug over data brand for scene URLs in MindGeek scraper, since milehighmedia.com's brand is milehigh, resulting in milehigh.com.
This commit is contained in:
parent
2068202ca6
commit
9d9eda29be
|
@ -387,6 +387,7 @@ async function scrapeActors(actorNames) {
|
|||
|
||||
if (argv.inspect) {
|
||||
console.log(profile);
|
||||
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
|
||||
}
|
||||
|
||||
if (profile === null) {
|
||||
|
|
|
@ -39,7 +39,7 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
|||
const site = await findSite(url, release);
|
||||
|
||||
if (!site) {
|
||||
throw new Error('Could not find site in database');
|
||||
throw new Error(`Could not find site ${url} in database`);
|
||||
}
|
||||
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
|
|
@ -73,7 +73,7 @@ async function scrapeLatest(items, site) {
|
|||
return latestReleases.filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene(data, url, _site) {
|
||||
function scrapeScene(data, url, _site, networkName) {
|
||||
const release = {};
|
||||
|
||||
const { id: entryId, title, description } = data;
|
||||
|
@ -100,7 +100,7 @@ function scrapeScene(data, url, _site) {
|
|||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
release.url = url || `https://www.${data.brand}.com/scene/${entryId}/`;
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ async function getSession(url) {
|
|||
return { session, instanceToken };
|
||||
}
|
||||
|
||||
function scrapeProfile(data, html, releases = []) {
|
||||
function scrapeProfile(data, html, releases = [], networkName) {
|
||||
const { qa, qd } = ex(html);
|
||||
|
||||
const profile = {
|
||||
|
@ -170,7 +170,7 @@ function scrapeProfile(data, html, releases = []) {
|
|||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
|
||||
profile.releases = releases.map(release => scrapeScene(release));
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
@ -247,11 +247,11 @@ async function fetchProfile(actorName, networkName, actorPath = 'model') {
|
|||
]);
|
||||
|
||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result);
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||
}
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString());
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,13 +3,14 @@
|
|||
const config = require('config');
|
||||
const path = require('path');
|
||||
const fs = require('fs-extra');
|
||||
const moment = require('moment');
|
||||
|
||||
const argv = require('../argv');
|
||||
const knex = require('../knex');
|
||||
|
||||
async function init() {
|
||||
const posters = await knex('actors')
|
||||
.select('actors.name as actor_name', 'releases.title', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
|
||||
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
|
||||
.whereIn('actors.name', argv.actors)
|
||||
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
||||
.join('releases', 'releases_actors.release_id', 'releases.id')
|
||||
|
@ -20,7 +21,7 @@ async function init() {
|
|||
|
||||
await Promise.all(posters.map(async (poster) => {
|
||||
const source = path.join(config.media.path, poster.path);
|
||||
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`);
|
||||
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')}).jpeg`);
|
||||
|
||||
const file = await fs.readFile(source);
|
||||
await fs.writeFile(target, file);
|
||||
|
|
Loading…
Reference in New Issue