Updated profile scrapers to use base actor instead of actor name. Fixes for Reality Kings and Cherry Pimps scrapers.

This commit is contained in:
DebaucheryLibrarian 2020-07-21 01:44:51 +02:00
parent 939eba8e61
commit dff4d15872
46 changed files with 91 additions and 94 deletions

View File

@ -610,7 +610,7 @@ async function scrapeActors(actorNames) {
},
}), {});
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug][baseActor.entryId]);
const newBaseActors = baseActors.filter(baseActor => !existingActorEntriesBySlugAndEntryId[baseActor.slug]?.[baseActor.entryId]);
const [batchId] = newBaseActors.length > 0 ? await knex('batches').insert({ comment: null }).returning('id') : [null];
const curatedActorEntries = batchId && curateActorEntries(newBaseActors, batchId);

View File

@ -126,7 +126,7 @@ async function fetchScene(url, site) {
return res.status;
}
async function fetchProfile(actorName, { site }) {
async function fetchProfile({ name: actorName }, { site }) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'babes');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'babes');
}
module.exports = {

View File

@ -132,7 +132,7 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, { site }, include) {
async function fetchProfile({ name: actorName }, { site }, include) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);

View File

@ -278,7 +278,7 @@ async function fetchScene(url) {
return scrapeScene(res.body._source); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile(actorName, context, include) {
async function fetchProfile({ name: actorName }, context, include) {
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{

View File

@ -221,7 +221,7 @@ async function fetchScene(url, site, release) {
return scrapeScene(res.item.html, url, site);
}
async function fetchProfile(actorName, scope) {
async function fetchProfile({ name: actorName }, scope) {
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);

View File

@ -25,8 +25,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile(actorName, context, include) {
return fetchProfile(actorName, context, null, getActorReleasesUrl, include);
async function networkFetchProfile({ name: actorName }, context, include) {
return fetchProfile({ name: actorName }, context, null, getActorReleasesUrl, include);
}
module.exports = {

View File

@ -78,7 +78,7 @@ function scrapeProfile(html) {
return profile;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.replace(/\s+/, '_');
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);

View File

@ -185,7 +185,7 @@ async function fetchScene(url, site) {
return res.status;
}
async function fetchProfile(actorName, context, include) {
async function fetchProfile({ name: actorName }, context, include) {
const searchRes = await qu.get('https://brazzers.com/pornstars-search/', `a[title="${actorName}" i]`, {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
});

View File

@ -1,31 +1,31 @@
'use strict';
const { get, geta, ctxa, ed } = require('../utils/q');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
return scenes.map(({ query }) => {
const url = query.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
const channelUrl = query.url('.badge');
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
if (site?.parameters?.extract && query.q('.badge', true) !== site.name) {
return null;
}
const release = {};
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.entryId = pathname.match(/\/trailers\/(.*).html/)[1];
release.title = query.q('.text-thumb a', true);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.date = query.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = query.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.actors = qu.all('.category a', true);
release.actors = query.all('.category a', true);
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
release.poster = query.img('img.video_placeholder, .video-images img');
release.teaser = { src: query.trailer() };
return release;
}).filter(Boolean);
@ -56,18 +56,18 @@ function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
return release;
}
function scrapeProfile({ q, qa, qtx }) {
function scrapeProfile({ query }) {
const profile = {};
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const keys = query.all('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = query.all('.model-descr_line:not(.model-descr_rait) p.text').map(el => query.text(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)\s*cm\)/)?.[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)?.[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.measurements) {
@ -96,11 +96,11 @@ function scrapeProfile({ q, qa, qtx }) {
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
const avatar = q('.model-img img');
const avatar = query.q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
const releases = query.all('.video-thumb');
profile.releases = scrapeAll(qu.initAll(releases));
return profile;
}
@ -109,18 +109,18 @@ async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
const res = await qu.getAll(url, 'div.video-thumb');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site, release) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile(actorName, { site, network, scraper }) {
async function fetchProfile({ name: actorName }, { site, network, scraper }) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
@ -130,10 +130,10 @@ async function fetchProfile(actorName, { site, network, scraper }) {
? [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`]
: [`${origin}/models/${actorSlug}.html`, `${origin}/models/${actorSlug2}.html`];
const res = await get(url);
const res = await qu.get(url);
if (res.ok) return scrapeProfile(res.item);
const res2 = await get(url2);
const res2 = await qu.get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
}

View File

@ -156,7 +156,7 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
{
type: 'hints',

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'digitalplayground', 'modelprofile');
}
module.exports = {

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'fakehub', 'modelprofile');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'fakehub', 'modelprofile');
}
module.exports = {

View File

@ -90,10 +90,10 @@ async function fetchClassicProfile(actorName, { site }) {
return null;
}
async function networkFetchProfile(actorName, context, include) {
async function networkFetchProfile({ name: actorName }, context, include) {
const profile = await ((context.site.parameters?.api && fetchApiProfile(actorName, context, include))
|| (context.site.parameters?.classic && include.scenes && fetchClassicProfile(actorName, context, include)) // classic profiles only have scenes, no bio
|| fetchProfile(actorName, context, true, getActorReleasesUrl, include));
|| fetchProfile({ name: actorName }, context, true, getActorReleasesUrl, include));
return profile;
}

View File

@ -63,7 +63,7 @@ function scrapeSearch(html) {
return document.querySelector('a.image-link')?.href || null;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);

View File

@ -107,7 +107,7 @@ async function scrapeProfileBio(html, frontpageProfile, url, name) {
return profile;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;

View File

@ -96,7 +96,7 @@ async function fetchScene(url, site) {
return res.ok && res.item ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, { site }) {
async function fetchProfile({ name: actorName }, { site }) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/1/model/${actorSlug}`;

View File

@ -559,7 +559,7 @@ async function fetchActorScenes(actorName, apiUrl, siteSlug) {
return [];
}
async function fetchProfile(actorName, context, altSearchUrl, getActorReleasesUrl, include) {
async function fetchProfile({ name: actorName }, context, altSearchUrl, getActorReleasesUrl, include) {
const siteSlug = context.entity.slug || context.site?.slug || context.network?.slug;
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');

View File

@ -381,7 +381,7 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
return scrapeScene(res.item, site, url, baseRelease);
}
async function fetchProfile(actorName, { site }) {
async function fetchProfile({ name: actorName }, { site }) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);

View File

@ -2,8 +2,8 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'iconmale');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'iconmale');
}
module.exports = {

View File

@ -381,7 +381,7 @@ async function fetchMovie(url, site) {
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');

View File

@ -154,7 +154,7 @@ async function fetchScene(url, channel, baseRelease) {
return res.ok ? scrapeScene(res.item, url, baseRelease) : res.status;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlug = slugify(actorName);
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
'X-Requested-With': 'XMLHttpRequest',

View File

@ -103,7 +103,7 @@ async function fetchScene(url, channel) {
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
async function fetchProfile(actorName, entity, include) {
async function fetchProfile({ name: actorName }, entity, include) {
const url = `http://killergram.com/episodes.asp?page=episodes&model=${encodeURI(actorName)}&ct=model`;
const res = await qu.get(url, '#content', null, {
followRedirects: false,

View File

@ -148,7 +148,7 @@ async function fetchScene(url, site) {
return res.status;
}
async function fetchProfile(actorName, entity, include) {
async function fetchProfile({ name: actorName }, entity, include) {
const searchRes = await qu.getAll(`https://kink.com/search?type=performers&q=${actorName}`, '.model');
if (searchRes.ok) {

View File

@ -179,7 +179,7 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site, useGallery);
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'men', 'modelprofile');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'men', 'modelprofile');
}
module.exports = {

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'devianthardcore');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'devianthardcore');
}
module.exports = {

View File

@ -86,7 +86,7 @@ async function fetchScene(url, channel) {
}
/* API protected
async function fetchProfile(actorName, context , site) {
async function fetchProfile({ name: actorName }, context , site) {
const session = bhttp.session();
await session.get(`https://tour.${site.slug}.com`);

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'milehighmedia');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'milehighmedia');
}
module.exports = {

View File

@ -1,4 +1,3 @@
'use strict';
/* eslint-disable newline-per-chained-call */
@ -7,7 +6,7 @@ const bhttp = require('bhttp');
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
const moment = require('moment');
const { ex } = require('../utils/q');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { inchesToCm, lbsToKg } = require('../utils/convert');
const { cookieToData } = require('../utils/cookies');
@ -140,7 +139,7 @@ async function getSession(url) {
}
function scrapeProfile(data, html, releases = [], networkName) {
const { qa, qd } = ex(html);
const { query } = qu.extract(html);
const profile = {
description: data.bio,
@ -169,8 +168,8 @@ function scrapeProfile(data, html, releases = [], networkName) {
|| data.images.card_main_rect[0].xs?.url;
}
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
const birthdate = query.all('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY');
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
@ -222,7 +221,7 @@ async function fetchScene(url, site) {
return null;
}
async function fetchProfile(actorName, networkSlug, actorPath = 'model') {
async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model') {
const url = `https://www.${networkSlug}.com`;
const { session, instanceToken } = await getSession(url);

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'mofos');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'mofos');
}
module.exports = {

View File

@ -139,7 +139,7 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlug = slugify(actorName);
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);

View File

@ -136,7 +136,7 @@ async function fetchScene(url, site) {
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, { site }) {
async function fetchProfile({ name: actorName }, { site }) {
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[site.slug] || site.url;

View File

@ -100,7 +100,7 @@ async function fetchScene(url, entity) {
return res.ok ? scrapeScene(res.item, entity) : res.status;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const url = `https://pervcity.com/models/${slugify(actorName)}.html`;
const res = await qu.get(url);

View File

@ -100,8 +100,6 @@ async function scrapeProfile({ query }, url, include) {
profile.releases = await fetchActorReleases({ query }, url);
}
console.log(profile);
return profile;
}
@ -117,7 +115,7 @@ async function fetchScene(url, channel) {
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
async function fetchProfile(actorName, entity, include) {
async function fetchProfile({ name: actorName }, entity, include) {
const url = `http://letsdoeit.com/models/${slugify(actorName)}.en.html`;
const res = await qu.get(url);

View File

@ -51,7 +51,7 @@ async function scrapeProfile(html, _url, actorName) {
return profile;
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
/* Model pages are not reliably associated with actual porn stars

View File

@ -175,7 +175,7 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
async function fetchProfile({ name: actorName }) {
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');

View File

@ -41,8 +41,8 @@ async function fetchLatestWrap(site, page = 1) {
return fetchLatest(site, page);
}
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'realitykings');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'realitykings');
}
module.exports = {

View File

@ -217,7 +217,7 @@ async function fetchScene(url, site) {
return null;
}
async function fetchProfile(actorName, context, include, page = 1, source = 0) {
async function fetchProfile({ name: actorName }, context, include, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase();
const sources = [
@ -244,11 +244,11 @@ async function fetchProfile(actorName, context, include, page = 1, source = 0) {
return null;
}
return fetchProfile(actorName, context, include, page + 1, source);
return fetchProfile({ name: actorName }, context, include, page + 1, source);
}
if (sources[source + 1]) {
return fetchProfile(actorName, context, include, 1, source + 1);
return fetchProfile({ name: actorName }, context, include, 1, source + 1);
}
return null;

View File

@ -134,7 +134,7 @@ async function fetchScene(url, entity) {
return res.status;
}
async function fetchProfile(actorName, { entity }, include) {
async function fetchProfile({ name: actorName }, { entity }, include) {
const res = await http.get(`https://teencoreclub.com/api/actors?query=${actorName}`);
if (res.ok) {

View File

@ -79,7 +79,7 @@ async function fetchScene(url, channel) {
return res.status;
}
async function fetchProfile(actorName, entity, include) {
async function fetchProfile({ name: actorName }, entity, include) {
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
const res = await qu.get(url);

View File

@ -2,8 +2,8 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'transangels');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'transangels');
}
module.exports = {

View File

@ -2,8 +2,8 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'twistys');
async function networkFetchProfile({ name: actorName }) {
return fetchProfile({ name: actorName }, 'twistys');
}
module.exports = {

View File

@ -233,7 +233,7 @@ async function fetchScene(url, site, baseRelease) {
return res.code;
}
async function fetchProfile(actorName, { site }, include) {
async function fetchProfile({ name: actorName }, { site }, include) {
const origin = site.url;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;

View File

@ -23,8 +23,8 @@ function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
}
async function networkFetchProfile(actorName, context, include) {
return fetchProfile(actorName, context, null, getActorReleasesUrl, include);
async function networkFetchProfile(baseActor, context, include) {
return fetchProfile(baseActor, context, null, getActorReleasesUrl, include);
}
module.exports = {

View File

@ -326,7 +326,7 @@ function init(element, window) {
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (args[0].nodeType === undefined // allow for different context
[key]: (...args) => (args[0]?.nodeType === undefined // allow for different context
? func(element, ...args)
: func(...args)),
}), {});