Storing actor profile URL when provided from scene page.

This commit is contained in:
DebaucheryLibrarian
2020-08-31 02:43:41 +02:00
parent 1bfdf4b232
commit f6353ca14c
9 changed files with 99 additions and 3 deletions

View File

@@ -148,6 +148,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
id: actor.id,
name: actor.name,
slug: actor.slug,
url: actor.url,
gender: actor.gender,
entityId: actor.entity_id,
aliasFor: actor.alias_for,
@@ -238,6 +239,7 @@ function curateProfileEntry(profile) {
entity_id: profile.entity?.id || null,
date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath,
url: profile.url,
gender: profile.gender,
ethnicity: profile.ethnicity,
description: profile.description,
@@ -552,7 +554,10 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
logger.verbose(`Searching profile for '${actor.name}' on '${label}'`);
const profile = await scraper.fetchProfile(actor, context, include);
const profile = await scraper.fetchProfile(curateActor({
...existingProfile,
...actor,
}), context, include);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
@@ -657,7 +662,11 @@ async function scrapeActors(argNames) {
const actors = existingActorEntries.concat(Array.isArray(newActorEntries) ? newActorEntries : []);
const existingProfiles = await knex('actors_profiles').whereIn('actor_id', actors.map(actor => actor.id));
const existingProfiles = await knex('actors_profiles')
.select(knex.raw('actors_profiles.*, row_to_json(avatars) as avatar'))
.whereIn('actor_id', actors.map(actor => actor.id))
.leftJoin('media as avatars', 'avatars.id', 'actors_profiles.avatar_media_id');
const existingProfilesByActorEntityId = existingProfiles.reduce((acc, profile) => ({
...acc,
[profile.actor_id]: {
@@ -724,6 +733,8 @@ async function getOrCreateActors(baseActors, batchId) {
}))
.filter(actor => !!actor.id);
console.log(newActorIdsByEntityIdAndSlug, newActorProfiles);
await storeProfiles(newActorProfiles);
if (Array.isArray(newActors)) {

View File

@@ -2,6 +2,7 @@
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
@@ -58,6 +59,7 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, type =
return {
name: qu.query.cnt(el, 'span'),
url: qu.query.url(el, 'a', 'href', { origin: channel.url }),
avatar: [
avatar.replace(/\/actor\/\d+/, '/actor/1600'),
avatar,
@@ -124,6 +126,68 @@ function scrapeMovies(movies, channel) {
});
}
function scrapeActorScenes(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
release.title = query.cnt('.grid-item-title');
const poster = query.img('a img');
release.poster = [
poster.replace(/\/\d+\//, '/1600/'),
poster,
];
return release;
});
}
async function scrapeProfile({ query }, url, channel, include) {
const profile = {};
const bio = query.cnts('.performer-page-header li').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
const measurements = bio.meas?.match(/(\d+)(\w+)-(\d+)-(\d+)/);
if (measurements) {
[profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1);
}
profile.hair = bio.hair;
profile.eyes = bio.eyes;
profile.ethnicity = bio.ethnicity;
profile.height = feetInchesToCm(bio.height);
profile.weight = lbsToKg(bio.weight);
profile.avatar = query.img('picture img');
if (include) {
const actorId = new URL(url).pathname.match(/\/(\d+)/)[1];
const res = await qu.getAll(`https://www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, {
rejectUnauthorized: false,
});
if (res.ok) {
profile.releases = scrapeActorScenes(res.items, channel);
}
}
console.log(profile);
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/tour?page=${page}`;
const res = await qu.getAll(url, '.scene-update', null, {
@@ -177,9 +241,26 @@ async function fetchMovies(channel, page = 1) {
return res.status;
}
async function fetchProfile(baseActor, channel, include) {
if (!baseActor.url) {
return null;
}
const res = await qu.get(baseActor.url, '.performer-page', null, {
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeProfile(res.item, baseActor.url, channel, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchMovies,
fetchMovie,
fetchProfile,
};

View File

@@ -178,6 +178,7 @@ module.exports = {
devilsfilm: famedigital,
digitalplayground,
dtfsluts: fullpornnetwork,
elegantangel,
evilangel,
eyeontheguy: hush,
fakehub,