Updated profile scrapers to use base actor instead of actor name. Fixes for Reality Kings and Cherry Pimps scrapers.

This commit is contained in:
DebaucheryLibrarian
2020-07-21 01:44:51 +02:00
parent 939eba8e61
commit dff4d15872
46 changed files with 91 additions and 94 deletions

View File

@@ -1,31 +1,31 @@
'use strict';
const { get, geta, ctxa, ed } = require('../utils/q');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
return scenes.map(({ query }) => {
const url = query.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
const channelUrl = query.url('.badge');
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
if (site?.parameters?.extract && query.q('.badge', true) !== site.name) {
return null;
}
const release = {};
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.entryId = pathname.match(/\/trailers\/(.*).html/)[1];
release.title = query.q('.text-thumb a', true);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.date = query.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = query.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.actors = qu.all('.category a', true);
release.actors = query.all('.category a', true);
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
release.poster = query.img('img.video_placeholder, .video-images img');
release.teaser = { src: query.trailer() };
return release;
}).filter(Boolean);
@@ -56,18 +56,18 @@ function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
return release;
}
function scrapeProfile({ q, qa, qtx }) {
function scrapeProfile({ query }) {
const profile = {};
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const keys = query.all('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = query.all('.model-descr_line:not(.model-descr_rait) p.text').map(el => query.text(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)\s*cm\)/)?.[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)?.[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.measurements) {
@@ -96,11 +96,11 @@ function scrapeProfile({ q, qa, qtx }) {
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
const avatar = q('.model-img img');
const avatar = query.q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
const releases = query.all('.video-thumb');
profile.releases = scrapeAll(qu.initAll(releases));
return profile;
}
@@ -109,18 +109,18 @@ async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
const res = await qu.getAll(url, 'div.video-thumb');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site, release) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile(actorName, { site, network, scraper }) {
async function fetchProfile({ name: actorName }, { site, network, scraper }) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
@@ -130,10 +130,10 @@ async function fetchProfile(actorName, { site, network, scraper }) {
? [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`]
: [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`];
const res = await get(url);
const res = await qu.get(url);
if (res.ok) return scrapeProfile(res.item);
const res2 = await get(url2);
const res2 = await qu.get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
}