Passing matching site to profile scrapers. Allowing scrapers to pass avatar metadata. Added scraper and copyright properties to media. Auto-adding copyright from site or scraper to avatars. Separated Porn Pros from Whale Member.

This commit is contained in:
2020-02-24 03:12:58 +01:00
parent 73443b77a8
commit 6d1f30f703
40 changed files with 232 additions and 123 deletions

View File

@@ -12,6 +12,7 @@ const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or');
const resolvePlace = require('./utils/resolve-place');
const slugify = require('./utils/slugify');
const capitalize = require('./utils/capitalize');
// const { createMediaDirectory, storePhotos } = require('./media_legacy');
const { storeMedia, associateMedia } = require('./media');
@@ -94,10 +95,7 @@ function curateActors(releases) {
function curateActorEntry(actor, scraped, scrapeSuccess) {
const curatedActor = {
name: actor.name
.split(' ')
.map(segment => `${segment.charAt(0).toUpperCase()}${segment.slice(1)}`)
.join(' '),
name: capitalize(actor.name),
slug: slugify(actor.name),
birthdate: actor.birthdate,
description: actor.description,
@@ -305,12 +303,12 @@ async function mergeProfiles(profiles, actor) {
return prevProfile;
}
return {
const accProfile = {
id: actor ? actor.id : null,
name: actor ? actor.name : (prevProfile.name || profile.name),
description: prevProfile.description || profile.description,
gender: prevProfile.gender || profile.gender,
birthdate: Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
birthdate: !prevProfile.birthdate || Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
@@ -328,9 +326,28 @@ async function mergeProfiles(profiles, actor) {
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
avatars: prevProfile.avatars.concat(profile.avatar ? [{ src: profile.avatar }] : []), // don't flatten fallbacks
releases: prevProfile.releases.concat(profile.releases ? profile.releases : []), // don't flatten fallbacks
};
if (profile.avatar) {
const avatar = Array.isArray(profile.avatar)
? profile.avatar.map(avatarX => ({
src: avatarX.src || avatarX,
scraper: profile.scraper,
copyright: avatarX.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
}))
: {
src: profile.avatar.src || profile.avatar,
scraper: profile.scraper,
copyright: profile.avatar.copyright === undefined ? capitalize(profile.site?.name || profile.scraper) : profile.avatar.copyright,
};
accProfile.avatars = prevProfile.avatars.concat([avatar]); // don't flatten fallbacks
} else {
accProfile.avatars = prevProfile.avatars;
}
return accProfile;
}, {
social: [],
avatars: [],
@@ -368,6 +385,9 @@ async function scrapeActors(actorNames) {
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
const [sites, networks] = await Promise.all([knex('sites').select('*').whereIn('slug', finalSources.flat()), knex('networks').select('*').whereIn('slug', finalSources.flat())]);
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
const profiles = await Promise.map(finalSources, async (source) => {
// const [scraperSlug, scraper] = source;
const profileScrapers = [].concat(source).map(slug => ({ scraperSlug: slug, scraper: scrapers.actors[slug] }));
@@ -381,7 +401,8 @@ async function scrapeActors(actorNames) {
logger.verbose(`Searching '${actorName}' on ${scraperSlug}`);
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, argv.withReleases);
const site = sitesBySlug[scraperSlug] || null;
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug, site, argv.withReleases);
if (profile) {
logger.verbose(`Found profile for '${actorName}' on ${scraperSlug}`);
@@ -390,6 +411,7 @@ async function scrapeActors(actorNames) {
...profile,
name: actorName,
scraper: scraperSlug,
site,
};
}