Added profile interpolation.
This commit is contained in:
155
src/actors.js
155
src/actors.js
@@ -2,6 +2,7 @@
|
||||
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
// const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
@@ -10,12 +11,46 @@ const scrapers = require('./scrapers/scrapers').actors;
|
||||
const argv = require('./argv');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const logger = require('./logger')(__filename);
|
||||
|
||||
const { toBaseReleases } = require('./deep');
|
||||
const { associateAvatars } = require('./media');
|
||||
|
||||
const slugify = require('./utils/slugify');
|
||||
const capitalize = require('./utils/capitalize');
|
||||
const resolvePlace = require('./utils/resolve-place');
|
||||
const { associateAvatars } = require('./media');
|
||||
|
||||
const { toBaseReleases } = require('./deep');
|
||||
function getMostFrequent(items) {
|
||||
const { mostFrequent } = items.reduce((acc, item) => {
|
||||
acc.counts[item] = (acc.counts[item] || 0) + 1;
|
||||
|
||||
if (!acc.mostFrequent || acc.counts[item] > acc.counts[acc.mostFrequent]) {
|
||||
acc.mostFrequent = item;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {
|
||||
counts: {},
|
||||
mostFrequent: null,
|
||||
});
|
||||
|
||||
return mostFrequent;
|
||||
}
|
||||
|
||||
function getMostFrequentDate(dates) {
|
||||
const year = getMostFrequent(dates.map(dateX => dateX.getFullYear()));
|
||||
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
|
||||
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
|
||||
|
||||
return moment({ year, month, date }).toDate();
|
||||
}
|
||||
|
||||
function getLongest(items) {
|
||||
return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
|
||||
}
|
||||
|
||||
function getAverage(items) {
|
||||
return Math.round(items.reduce((acc, item) => acc + item, 0) / items.length);
|
||||
}
|
||||
|
||||
function toBaseActors(actorsOrNames, release) {
|
||||
return actorsOrNames.map((actorOrName) => {
|
||||
@@ -64,10 +99,10 @@ function curateProfileEntry(profile) {
|
||||
description: profile.description,
|
||||
birth_city: profile.placeOfBirth?.city || null,
|
||||
birth_state: profile.placeOfBirth?.state || null,
|
||||
birth_country_alpha2: profile.placeOfBirth?.country?.alpha2 || null,
|
||||
birth_country_alpha2: profile.placeOfBirth?.country || null,
|
||||
residence_city: profile.placeOfResidence?.city || null,
|
||||
residence_state: profile.placeOfResidence?.state || null,
|
||||
residence_country_alpha2: profile.placeOfResidence?.country?.alpha2 || null,
|
||||
residence_country_alpha2: profile.placeOfResidence?.country || null,
|
||||
cup: profile.cup,
|
||||
bust: profile.bust,
|
||||
waist: profile.waist,
|
||||
@@ -131,13 +166,15 @@ async function curateProfile(profile) {
|
||||
curatedProfile.hasTattoos = typeof profile.hasTattoos === 'boolean' ? profile.hasTattoos : null;
|
||||
curatedProfile.hasPiercings = typeof profile.hasPiercings === 'boolean' ? profile.hasPiercings : null;
|
||||
|
||||
const [placeOfBirth, placeOfResidence] = await Promise.all([
|
||||
resolvePlace(profile.birthPlace),
|
||||
resolvePlace(profile.residencePlace),
|
||||
]);
|
||||
if (argv.resolvePlace) {
|
||||
const [placeOfBirth, placeOfResidence] = await Promise.all([
|
||||
resolvePlace(profile.birthPlace),
|
||||
resolvePlace(profile.residencePlace),
|
||||
]);
|
||||
|
||||
curatedProfile.placeOfBirth = placeOfBirth;
|
||||
curatedProfile.placeOfResidence = placeOfResidence;
|
||||
curatedProfile.placeOfBirth = placeOfBirth;
|
||||
curatedProfile.placeOfResidence = placeOfResidence;
|
||||
}
|
||||
|
||||
if (!curatedProfile.placeOfBirth && curatedProfile.nationality) {
|
||||
const country = await knex('countries')
|
||||
@@ -164,6 +201,10 @@ async function curateProfile(profile) {
|
||||
|
||||
curatedProfile.releases = toBaseReleases(profile.releases);
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(curatedProfile);
|
||||
}
|
||||
|
||||
return curatedProfile;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
|
||||
@@ -172,6 +213,91 @@ async function curateProfile(profile) {
|
||||
}
|
||||
}
|
||||
|
||||
async function interpolateProfiles(actors) {
|
||||
const profiles = await knex('actors_profiles')
|
||||
.select(['actors_profiles.*', 'media.width as avatar_width', 'media.height as avatar_height', 'media.size as avatar_size'])
|
||||
.whereIn('actor_id', actors.map(actor => actor.id))
|
||||
.leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id');
|
||||
|
||||
const profilesByActorId = profiles.reduce((acc, profile) => ({
|
||||
...acc,
|
||||
[profile.actor_id]: [
|
||||
...(acc[profile.actor_id] || []),
|
||||
profile,
|
||||
],
|
||||
}), {});
|
||||
|
||||
const interpolatedProfiles = Object.entries(profilesByActorId).map(([actorId, actorProfiles]) => {
|
||||
const valuesByProperty = actorProfiles.reduce((acc, profile) => Object
|
||||
.entries(profile)
|
||||
.reduce((profileAcc, [property, value]) => ({
|
||||
...profileAcc,
|
||||
[property]: [
|
||||
...(acc[property] || []),
|
||||
...(value === null ? [] : [value]),
|
||||
],
|
||||
}), {}), {});
|
||||
|
||||
const avatars = actorProfiles.map(profile => profile.avatar_media_id && ({
|
||||
id: profile.avatar_media_id,
|
||||
width: profile.avatar_width,
|
||||
height: profile.avatar_height,
|
||||
size: profile.avatar_size,
|
||||
})).filter(Boolean);
|
||||
|
||||
const profile = {
|
||||
id: actorId,
|
||||
};
|
||||
|
||||
profile.gender = getMostFrequent(valuesByProperty.gender);
|
||||
profile.ethnicity = getMostFrequent(valuesByProperty.ethnicity.map(ethnicity => ethnicity.toLowerCase()));
|
||||
|
||||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||
|
||||
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
|
||||
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
|
||||
|
||||
profile.residence_city = getMostFrequent(valuesByProperty.residence_city);
|
||||
profile.residence_state = getMostFrequent(valuesByProperty.residence_state);
|
||||
profile.residence_country_alpha2 = getMostFrequent(valuesByProperty.residence_country_alpha2);
|
||||
|
||||
profile.cup = getMostFrequent(valuesByProperty.cup);
|
||||
profile.bust = getMostFrequent(valuesByProperty.bust);
|
||||
profile.waist = getMostFrequent(valuesByProperty.waist);
|
||||
profile.hip = getMostFrequent(valuesByProperty.hip);
|
||||
profile.natural_boobs = getMostFrequent(valuesByProperty.natural_boobs);
|
||||
|
||||
profile.hair = getMostFrequent(valuesByProperty.hair.map(hair => hair.toLowerCase()));
|
||||
profile.eyes = getMostFrequent(valuesByProperty.eyes.map(eyes => eyes.toLowerCase()));
|
||||
|
||||
profile.weight = getAverage(valuesByProperty.weight);
|
||||
profile.height = getMostFrequent(valuesByProperty.height);
|
||||
|
||||
profile.has_tattoos = getMostFrequent(valuesByProperty.has_tattoos);
|
||||
profile.has_piercings = getMostFrequent(valuesByProperty.has_piercings);
|
||||
|
||||
profile.tattoos = getLongest(valuesByProperty.tattoos);
|
||||
profile.piercings = getLongest(valuesByProperty.piercings);
|
||||
|
||||
profile.avatar_media_id = avatars.sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0].id;
|
||||
|
||||
return profile;
|
||||
});
|
||||
|
||||
const transaction = await knex.transaction();
|
||||
|
||||
const queries = interpolatedProfiles.map(profile => knex('actors')
|
||||
.where('id', profile.id)
|
||||
.update(profile)
|
||||
.transacting(transaction));
|
||||
|
||||
await Promise.all(queries)
|
||||
.then(transaction.commit)
|
||||
.catch(transaction.rollback);
|
||||
}
|
||||
|
||||
async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
|
||||
const profiles = Promise.map(sources, async (source) => {
|
||||
try {
|
||||
@@ -217,7 +343,9 @@ async function scrapeProfiles(actor, sources, networksBySlug, sitesBySlug) {
|
||||
return profiles.filter(Boolean);
|
||||
}
|
||||
|
||||
async function upsertProfiles(curatedProfileEntries) {
|
||||
async function upsertProfiles(profiles) {
|
||||
const curatedProfileEntries = profiles.map(profile => curateProfileEntry(profile));
|
||||
|
||||
const existingProfiles = await knex('actors_profiles')
|
||||
.whereIn(['actor_id', 'network_id'], curatedProfileEntries.map(entry => [entry.actor_id, entry.network_id]))
|
||||
.orWhereIn(['actor_id', 'site_id'], curatedProfileEntries.map(entry => [entry.actor_id, entry.site_id]));
|
||||
@@ -311,9 +439,8 @@ async function scrapeActors(actorNames) {
|
||||
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
|
||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
|
||||
const curatedProfileEntries = profilesWithAvatarIds.map(profile => curateProfileEntry(profile));
|
||||
|
||||
await upsertProfiles(curatedProfileEntries);
|
||||
await upsertProfiles(profilesWithAvatarIds);
|
||||
await interpolateProfiles(actors);
|
||||
}
|
||||
|
||||
async function getOrCreateActors(baseActors, batchId) {
|
||||
|
||||
@@ -177,6 +177,11 @@ const { argv } = yargs
|
||||
type: 'string',
|
||||
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info',
|
||||
})
|
||||
.option('resolve-place', {
|
||||
describe: 'Call OSM Nominatim API for actor place of birth and residence. Raw value discarded if disabled.',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('debug', {
|
||||
describe: 'Show error stack traces',
|
||||
type: 'boolean',
|
||||
|
||||
@@ -52,6 +52,10 @@ async function findSites(baseReleases) {
|
||||
}
|
||||
|
||||
function toBaseReleases(baseReleasesOrUrls) {
|
||||
if (!baseReleasesOrUrls) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return baseReleasesOrUrls
|
||||
.map((baseReleaseOrUrl) => {
|
||||
if (baseReleaseOrUrl.url) {
|
||||
|
||||
@@ -141,7 +141,7 @@ async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName) {
|
||||
async function scrapeProfile(html, url, actorName, include) {
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
|
||||
@@ -175,7 +175,9 @@ async function scrapeProfile(html, url, actorName) {
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
if (include.releases) {
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
@@ -198,7 +200,7 @@ async function fetchScene(url, site) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
async function fetchProfile(actorName, scraperSlug, siteOrNetwork, include) {
|
||||
const searchUrl = 'https://brazzers.com/pornstars-search/';
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
headers: {
|
||||
@@ -212,7 +214,7 @@ async function fetchProfile(actorName) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
return scrapeProfile(res.body.toString(), url, actorName, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -368,7 +368,7 @@ function scrapeApiProfile(data, releases, siteSlug) {
|
||||
const avatarPaths = Object.values(data.pictures).reverse();
|
||||
if (avatarPaths.length > 0) profile.avatar = avatarPaths.map(avatarPath => `https://images01-evilangel.gammacdn.com/actors${avatarPath}`);
|
||||
|
||||
profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`);
|
||||
if (releases) profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`);
|
||||
|
||||
return profile;
|
||||
}
|
||||
@@ -579,7 +579,7 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl, getActorReleasesU
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchApiProfile(actorName, siteSlug) {
|
||||
async function fetchApiProfile(actorName, siteSlug, site, include) {
|
||||
const actorSlug = encodeURI(actorName);
|
||||
const referer = `https://www.${siteSlug}.com/en/search`;
|
||||
|
||||
@@ -603,7 +603,7 @@ async function fetchApiProfile(actorName, siteSlug) {
|
||||
const actorData = res.body.results[0].hits.find(actor => slugify(actor.name) === slugify(actorName));
|
||||
|
||||
if (actorData) {
|
||||
const actorScenes = await fetchActorScenes(actorData.name, apiUrl, siteSlug);
|
||||
const actorScenes = include.releases && await fetchActorScenes(actorData.name, apiUrl, siteSlug);
|
||||
|
||||
return scrapeApiProfile(actorData, actorScenes, siteSlug);
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||
}
|
||||
|
||||
extend type Actor {
|
||||
age: Int @requires(columns: ["date_of_birth"])
|
||||
age: Int @requires(columns: ["dateOfBirth"])
|
||||
height(units:Units): String @requires(columns: ["height"])
|
||||
weight(units:Units): String @requires(columns: ["weight"])
|
||||
}
|
||||
@@ -20,9 +20,9 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||
resolvers: {
|
||||
Actor: {
|
||||
age(parent, _args, _context, _info) {
|
||||
if (!parent.birthdate) return null;
|
||||
if (!parent.dateOfBirth) return null;
|
||||
|
||||
return moment().diff(parent.birthdate, 'years');
|
||||
return moment().diff(parent.dateOfBirth, 'years');
|
||||
},
|
||||
height(parent, args, _context, _info) {
|
||||
if (!parent.height) return null;
|
||||
|
||||
Reference in New Issue
Block a user