forked from DebaucheryLibrarian/traxxx
Added scene count to actor inspect. Preferring network slug over data brand for scene URLs in MindGeek scraper, since milehighmedia.com's brand is milehigh, resulting in milehigh.com.
This commit is contained in:
parent
2068202ca6
commit
9d9eda29be
|
@ -387,6 +387,7 @@ async function scrapeActors(actorNames) {
|
||||||
|
|
||||||
if (argv.inspect) {
|
if (argv.inspect) {
|
||||||
console.log(profile);
|
console.log(profile);
|
||||||
|
logger.info(`Found ${profile.releases.length} releases for ${actorName}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (profile === null) {
|
if (profile === null) {
|
||||||
|
|
|
@ -39,7 +39,7 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
||||||
const site = await findSite(url, release);
|
const site = await findSite(url, release);
|
||||||
|
|
||||||
if (!site) {
|
if (!site) {
|
||||||
throw new Error('Could not find site in database');
|
throw new Error(`Could not find site ${url} in database`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||||
|
|
|
@ -73,7 +73,7 @@ async function scrapeLatest(items, site) {
|
||||||
return latestReleases.filter(Boolean);
|
return latestReleases.filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene(data, url, _site) {
|
function scrapeScene(data, url, _site, networkName) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const { id: entryId, title, description } = data;
|
const { id: entryId, title, description } = data;
|
||||||
|
@ -100,7 +100,7 @@ function scrapeScene(data, url, _site) {
|
||||||
const siteName = data.collections[0]?.name || data.brand;
|
const siteName = data.collections[0]?.name || data.brand;
|
||||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||||
|
|
||||||
release.url = url || `https://www.${data.brand}.com/scene/${entryId}/`;
|
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -139,7 +139,7 @@ async function getSession(url) {
|
||||||
return { session, instanceToken };
|
return { session, instanceToken };
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile(data, html, releases = []) {
|
function scrapeProfile(data, html, releases = [], networkName) {
|
||||||
const { qa, qd } = ex(html);
|
const { qa, qd } = ex(html);
|
||||||
|
|
||||||
const profile = {
|
const profile = {
|
||||||
|
@ -170,7 +170,7 @@ function scrapeProfile(data, html, releases = []) {
|
||||||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||||
|
|
||||||
profile.releases = releases.map(release => scrapeScene(release));
|
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
@ -247,11 +247,11 @@ async function fetchProfile(actorName, networkName, actorPath = 'model') {
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result);
|
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (actorRes.statusCode === 200) {
|
if (actorRes.statusCode === 200) {
|
||||||
return scrapeProfile(actorData, actorRes.body.toString());
|
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,13 +3,14 @@
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
|
const moment = require('moment');
|
||||||
|
|
||||||
const argv = require('../argv');
|
const argv = require('../argv');
|
||||||
const knex = require('../knex');
|
const knex = require('../knex');
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
const posters = await knex('actors')
|
const posters = await knex('actors')
|
||||||
.select('actors.name as actor_name', 'releases.title', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
|
.select('actors.name as actor_name', 'releases.title', 'releases.date', 'media.path', 'sites.name as site_name', 'networks.name as network_name')
|
||||||
.whereIn('actors.name', argv.actors)
|
.whereIn('actors.name', argv.actors)
|
||||||
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
.join('releases_actors', 'releases_actors.actor_id', 'actors.id')
|
||||||
.join('releases', 'releases_actors.release_id', 'releases.id')
|
.join('releases', 'releases_actors.release_id', 'releases.id')
|
||||||
|
@ -20,7 +21,7 @@ async function init() {
|
||||||
|
|
||||||
await Promise.all(posters.map(async (poster) => {
|
await Promise.all(posters.map(async (poster) => {
|
||||||
const source = path.join(config.media.path, poster.path);
|
const source = path.join(config.media.path, poster.path);
|
||||||
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')}.jpeg`);
|
const target = path.join(config.media.path, 'posters', `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')}).jpeg`);
|
||||||
|
|
||||||
const file = await fs.readFile(source);
|
const file = await fs.readFile(source);
|
||||||
await fs.writeFile(target, file);
|
await fs.writeFile(target, file);
|
||||||
|
|
Loading…
Reference in New Issue