Added profile scraper tests (WIP), fixed some profile scrapers. Fixed slugify not breaking existing slugs.

This commit is contained in:
DebaucheryLibrarian 2026-01-10 02:58:50 +01:00
parent 5acc2c607b
commit bddc33a734
12 changed files with 293 additions and 111 deletions

View File

@ -430,13 +430,13 @@ const networks = [
{ {
slug: 'hussiepass', slug: 'hussiepass',
name: 'Hussie Pass', name: 'Hussie Pass',
url: 'https://www.hussiepass.com', url: 'https://hussiepass.com',
parent: 'hush', parent: 'hush',
}, },
{ {
slug: 'hushpass', slug: 'hushpass',
name: 'Hush Pass', name: 'Hush Pass',
url: 'https://www.hushpass.com', url: 'https://hushpass.com',
parent: 'hush', parent: 'hush',
parameters: { parameters: {
t1: true, t1: true,

View File

@ -4785,7 +4785,7 @@ const sites = [
{ {
slug: 'hussiepass', slug: 'hussiepass',
name: 'Hussie Pass', name: 'Hussie Pass',
url: 'https://www.hussiepass.com', url: 'https://hussiepass.com',
parent: 'hussiepass', parent: 'hussiepass',
}, },
{ {

View File

@ -216,7 +216,8 @@ function getUrl(site) {
} }
async function getSession(site, parameters, url) { async function getSession(site, parameters, url) {
if (site.slug === 'mindgeek' || site.parameters?.parentSession === false) { // if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
if (site.slug === 'aylo') {
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels // most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
return null; return null;
} }
@ -224,7 +225,7 @@ async function getSession(site, parameters, url) {
const cookieJar = new CookieJar(); const cookieJar = new CookieJar();
const session = http.session({ cookieJar }); const session = http.session({ cookieJar });
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession) const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
? site.parent.url ? site.parent.url
: (url || site.url); : (url || site.url);
@ -360,7 +361,12 @@ function scrapeProfile(data, networkName, _releases = []) {
}; };
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
if (profile.gender === 'male') {
profile.penisLength = Number(data.measurements);
} else {
profile.measurements = data.measurements; profile.measurements = data.measurements;
}
profile.dateOfBirth = qu.parseDate(data.birthday); profile.dateOfBirth = qu.parseDate(data.birthday);
profile.birthPlace = data.birthPlace; profile.birthPlace = data.birthPlace;

View File

@ -254,7 +254,7 @@ async function scrapeProfile({ query, el }, channel, options) {
}; };
}, {}); }, {});
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); if (bio.date_of_birth) profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace; if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.fun_fact) profile.description = bio.fun_fact; if (bio.fun_fact) profile.description = bio.fun_fact;
@ -262,6 +262,7 @@ async function scrapeProfile({ query, el }, channel, options) {
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]); if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]); if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.shoe_size) profile.foot = Number(bio.shoe_size);
profile.measurements = bio.measurements; profile.measurements = bio.measurements;
@ -280,7 +281,7 @@ async function scrapeProfile({ query, el }, channel, options) {
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim()); if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean); profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
profile.avatar = [ profile.avatar = [
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }), query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
@ -327,29 +328,29 @@ async function fetchScene(url, site, baseRelease) {
return scrapeScene(res.item, site, url, baseRelease); return scrapeScene(res.item, site, url, baseRelease);
} }
async function fetchProfile({ name: actorName }, { site }, options) { async function fetchProfile({ name: actorName }, { channel }, options) {
const actorSlugA = slugify(actorName, ''); const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName); const actorSlugB = slugify(actorName);
const t1 = site.parameters?.t1 ? 't1/' : ''; const t1 = channel.parameters?.t1 ? 't1/' : '';
const res1 = site.parameters?.profile const res1 = channel.parameters?.profile
? await qu.get(util.format(site.parameters.profile, actorSlugA)) ? await qu.get(util.format(channel.parameters.profile, actorSlugA))
: await qu.get(`${site.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false }); : await qu.get(`${channel.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false });
const res = (res1.ok && res1) const res = (res1.ok && res1)
|| (site.parameters?.profile && await qu.get(util.format(site.parameters.profile, actorSlugB))) || (channel.parameters?.profile && await qu.get(util.format(channel.parameters.profile, actorSlugB)))
|| await qu.get(`${site.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false }); || await qu.get(`${channel.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false });
if (!res.ok) { if (!res.ok) {
return res.status; return res.status;
} }
if (site.parameters?.t1) { if (channel.parameters?.t1) {
return scrapeProfileT1(res.item, site); return scrapeProfileT1(res.item, channel);
} }
return scrapeProfile(res.item, site, options); return scrapeProfile(res.item, channel, options);
} }
module.exports = { module.exports = {

View File

@ -208,7 +208,7 @@ async function fetchProfile({ name: actorName }, { entity }) {
const actorSlug = slugify(actorName); const actorSlug = slugify(actorName);
// 8K sites don't have avatar or interview on model page, always use 5K site // 8K sites don't have avatar or interview on model page, always use 5K site
const res = await unprint.get(`${entity.slug === '5kvids' ? 'https://www.5kporn.com' : entity.url}/models/${actorSlug}`, { const res = await unprint.get(`${entity.slug === '8kmembers' ? 'https://www.8kmilfs.com' : entity.url}/models/${actorSlug}`, {
headers: { headers: {
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}, },

View File

@ -3,7 +3,6 @@
const unprint = require('unprint'); const unprint = require('unprint');
const http = require('../utils/http'); const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert'); const { convert } = require('../utils/convert');
function scrapeAll(scenes, channel) { function scrapeAll(scenes, channel) {
@ -76,41 +75,6 @@ async function scrapeScene({ query }, url, channel) {
return release; return release;
} }
async function scrapeProfile({ query }) {
const profile = {};
const bio = Object.fromEntries(query.all('.model-info li, .model-desc li').map((el) => [
slugify(unprint.query.content(el, 'span')),
unprint.query.text(el),
]));
const avatar = query.img('.model-photo img, img[alt="model"]');
if (avatar) {
profile.avatar = [
avatar.replace(/-\d+x\d+/, ''),
avatar,
];
}
if (bio && Object.keys(bio).length > 0) {
profile.description = bio.bio;
profile.dateOfBirth = bio.birthdate && unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
profile.birthPlace = bio.born;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.eyes = bio.eyes;
profile.hairColor = bio.hair;
}
return profile;
}
async function fetchLatestContent(url, parameters) { async function fetchLatestContent(url, parameters) {
if (parameters.useBrowser) { if (parameters.useBrowser) {
const res = await http.get(url, { const res = await http.get(url, {
@ -187,16 +151,54 @@ async function fetchScene(url, channel) {
return res.status; return res.status;
} }
async function scrapeProfile(data) {
const profile = {};
// unreliable key case, lowercase all
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value]));
profile.entryId = bio.id;
profile.gender = bio.gender;
profile.description = bio.bio;
profile.birthPlace = bio.born;
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
profile.age = bio.age;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.eyes = bio.eyes;
profile.hairColor = bio.hair;
profile.avatar = bio.thumb;
const tags = bio.tags?.split(',') || [];
if (tags.includes('tattoos')) profile.hasTattoos = true;
if (tags.includes('piercing')) profile.hasPiercings = true;
return profile;
}
async function fetchProfile(actor, context) { async function fetchProfile(actor, context) {
const session = http.session();
await http.get(context.channel.url, { session });
const url = `${context.channel.url}/models/${actor.slug}`; const url = `${context.channel.url}/models/${actor.slug}`;
const res = await unprint.get(url);
const res = await unprint.get(url, {
parser: {
runScripts: 'dangerously',
},
});
if (res.ok) { if (res.ok) {
return scrapeProfile(res.context, context.channel); const data = res.context.query.json('#__NEXT_DATA__');
if (data.props.pageProps.model) {
return scrapeProfile(data.props.pageProps.model, context.channel);
}
return null;
} }
return res.status; return res.status;

View File

@ -220,8 +220,6 @@ const scrapers = {
bang, bang,
bangbros: aylo, bangbros: aylo,
bjraw: radical, bjraw: radical,
blacked: vixen,
blackedraw: vixen,
bluedonkeymedia, bluedonkeymedia,
delphine: modelmedia, delphine: modelmedia,
meidenvanholland: bluedonkeymedia, meidenvanholland: bluedonkeymedia,
@ -233,7 +231,6 @@ const scrapers = {
burningangel: gamma, burningangel: gamma,
cherrypimps, cherrypimps,
cumlouder, cumlouder,
deeper: vixen,
deeplush: nubiles, deeplush: nubiles,
devilsfilm: famedigital, devilsfilm: famedigital,
digitalplayground: aylo, digitalplayground: aylo,
@ -276,6 +273,7 @@ const scrapers = {
kink, kink,
kinkmen: kink, kinkmen: kink,
kinkvr: kink, kinkvr: kink,
letsdoeit: aylo,
loveherfilms, loveherfilms,
loveherfeet: loveherfilms, loveherfeet: loveherfilms,
shelovesblack: loveherfilms, shelovesblack: loveherfilms,
@ -287,7 +285,6 @@ const scrapers = {
mariskax, mariskax,
metrohd: aylo, metrohd: aylo,
milehighmedia: aylo, milehighmedia: aylo,
milfy: vixen,
milfvr: wankzvr, milfvr: wankzvr,
missax, missax,
mofos: aylo, mofos: aylo,
@ -299,7 +296,6 @@ const scrapers = {
nfbusty: nubiles, nfbusty: nubiles,
nubilefilms: nubiles, nubilefilms: nubiles,
nubiles, nubiles,
nubilesporn: nubiles,
nympho: mikeadriano, nympho: mikeadriano,
onlyprince: fullpornnetwork, onlyprince: fullpornnetwork,
pascalssubsluts, pascalssubsluts,
@ -353,15 +349,22 @@ const scrapers = {
transbella: porndoe, transbella: porndoe,
tranzvr: wankzvr, tranzvr: wankzvr,
trueanal: mikeadriano, trueanal: mikeadriano,
tushy: vixen,
tushyraw: vixen,
twistys: aylo, twistys: aylo,
vipsexvault: porndoe, vipsexvault: porndoe,
virtualtaboo, virtualtaboo,
darkroomvr: virtualtaboo, darkroomvr: virtualtaboo,
onlytarts: virtualtaboo, onlytarts: virtualtaboo,
oopsfamily: virtualtaboo, oopsfamily: virtualtaboo,
// vixen
vixen, vixen,
blacked: vixen,
blackedraw: vixen,
tushy: vixen,
tushyraw: vixen,
deeper: vixen,
milfy: vixen,
slayed: vixen,
wifey: vixen,
vrcosplayx: badoink, vrcosplayx: badoink,
wankzvr, wankzvr,
wicked: gamma, wicked: gamma,

View File

@ -31,12 +31,26 @@ function scrapeAll(scenes) {
}); });
} }
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, { url, entity }) { function scrapeScene({ query }, { url, entity }) {
const release = {}; const release = {};
release.entryId = getEntryId(url); release.entryId = getEntryId(url);
release.title = query.content(['#media-holder .title', '.content-holder h1', '#scene h1', 'h2.titular', 'title'])?.replace(/\s+-$/, ''); release.title = query.content(['#media-holder .title', '.content-holder h1', '#scene h1', 'h2.titular', 'title'])?.replace(/\s+-$/, '');
console.log(release);
release.date = query.date('#sceneInfo .date, #trailer-data .date', 'YYYY-MM-DD'); release.date = query.date('#sceneInfo .date, #trailer-data .date', 'YYYY-MM-DD');
release.duration = query.duration('#sceneInfo .data-others, #trailer-data', /\d+:\d+/); release.duration = query.duration('#sceneInfo .data-others, #trailer-data', /\d+:\d+/);
@ -67,6 +81,28 @@ function scrapeScene({ query }, { url, entity }) {
return release; return release;
} }
function stripSizeParams(source) {
if (!source) {
return [];
}
try {
const url = new URL(source);
const params = url.searchParams;
params.delete('imgh');
params.delete('imgw');
params.delete('imgq');
return [
`${url.origin}${url.pathname}?${params.toString()}`,
source,
];
} catch (error) {
return [];
}
}
function scrapeProfile({ query }) { function scrapeProfile({ query }) {
const profile = {}; const profile = {};
const bioKeys = query.contents('.statsText b'); const bioKeys = query.contents('.statsText b');
@ -77,13 +113,14 @@ function scrapeProfile({ query }) {
[slugify(key, '_')]: bioValues[index], [slugify(key, '_')]: bioValues[index],
}), {}); }), {});
profile.description = query.contents('.descriptionText'); profile.description = query.content('.descriptionText');
profile.avatar = [ profile.avatar = [
...stripSizeParams(query.img('.model-bio-pic img', { attribute: 'src' })), // not available on e.g. Raw Attack
query.img('.model-bio-pic img', { attribute: 'src0_3x' }),
query.img('.model-bio-pic img', { attribute: 'src0_2x' }), query.img('.model-bio-pic img', { attribute: 'src0_2x' }),
query.img('.model-bio-pic img', { attribute: 'src0_3x' }), // unnecessarily big
query.img('.model-bio-pic img', { attribute: 'src0_1x' }), query.img('.model-bio-pic img', { attribute: 'src0_1x' }),
]; ].filter(Boolean);
profile.height = Number(bio.height?.match(/(\d+)\s?cm/i)?.[1]); profile.height = Number(bio.height?.match(/(\d+)\s?cm/i)?.[1]);
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
@ -108,18 +145,6 @@ function scrapeProfile({ query }) {
return profile; return profile;
} }
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
});
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchProfile(actor, channel) { async function fetchProfile(actor, channel) {
if (actor.url) { if (actor.url) {
const res = await unprint.get(actor.url); const res = await unprint.get(actor.url);

View File

@ -411,19 +411,16 @@ async function fetchScene(url, channel, baseRelease, options) {
return res.status; return res.status;
} }
async function scrapeProfile(data, channel) { async function scrapeProfile(data, _channel) {
const model = data.model; const model = data.model;
const profile = {}; const profile = {};
// most details seemingly unavailable in graphql
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex]; profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography; if (model.biography.trim().length > 0) profile.description = model.biography;
// most details seemingly unavailable in graphql
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`; if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment; if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment; if (model.hipMeasurment) profile.hip = model.hipMeasurment;
@ -432,9 +429,11 @@ async function scrapeProfile(data, channel) {
profile.poster = getAvatarFallbacks(model.images.profile); profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster); profile.banner = getAvatarFallbacks(model.images.poster);
/*
if (model.videos) { if (model.videos) {
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel); profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
} }
*/
return profile; return profile;
} }
@ -558,6 +557,7 @@ async function fetchProfile(actor, { channel }) {
) { ) {
model: findOneModel(input: { slug: $slug, site: $site }) { model: findOneModel(input: { slug: $slug, site: $site }) {
name name
sex
biography biography
images { images {
listing { listing {

View File

@ -1,6 +1,7 @@
'use strict'; 'use strict';
const { convert, convertMany } = require('convert'); const { convert, convertMany } = require('convert');
const { decode } = require('html-entities');
const logger = require('../logger')(__filename); const logger = require('../logger')(__filename);
@ -60,18 +61,20 @@ function kgToLbs(kgs) {
function convertManyApi(input, to) { function convertManyApi(input, to) {
const curatedInput = input const curatedInput = input
.replace('\'', 'ft') .replace(/[']\s*/, 'ft ') // ensure 1 space
.replace(/"|''/, 'in') .replace(/["”]|('')/, 'in') // 54”
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol .replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
return Math.round(convertMany(curatedInput).to(to)) || null; return Math.round(convertMany(curatedInput).to(to)) || null;
} }
function convertApi(input, fromOrTo, to) { function convertApi(rawInput, fromOrTo, to) {
if (!input) { if (!rawInput) {
return null; return null;
} }
const input = decode(rawInput); // remove html entities, e.g. 5' 8" for 5' 8"
try { try {
if (typeof input === 'string' && to === undefined) { if (typeof input === 'string' && to === undefined) {
return convertManyApi(input, fromOrTo); return convertManyApi(input, fromOrTo);

View File

@ -42,7 +42,7 @@ const accentMap = {
}; };
const plainCharRegex = /[a-zA-Z0-9]/; const plainCharRegex = /[a-zA-Z0-9]/;
const defaultPunctuationRegex = /[.,?!:;&'"“”…()[]{}<>\/*—-]/; const defaultPunctuationRegex = /[.,?!:;&'"“”…()[]{}<>\/*—]/;
const defaultSymbolRegex = /[@$€£#%^+=\\~]/; const defaultSymbolRegex = /[@$€£#%^+=\\~]/;
function slugify(strings, delimiter = '-', { function slugify(strings, delimiter = '-', {
@ -66,6 +66,7 @@ function slugify(strings, delimiter = '-', {
: string; : string;
const normalized = casedString const normalized = casedString
.replace(/[_-]/g, ' ')
.split('') .split('')
.map((char) => { .map((char) => {
if (char === ' ') { if (char === ' ') {

View File

@ -4,10 +4,124 @@ const test = require('node:test');
const assert = require('node:assert/strict'); const assert = require('node:assert/strict');
const argv = require('../src/argv'); const argv = require('../src/argv');
const include = require('../src/utils/argv-include')(argv);
const slugify = require('../src/utils/slugify');
const scrapers = require('../src/scrapers/scrapers'); const scrapers = require('../src/scrapers/scrapers');
const { fetchEntitiesBySlug } = require('../src/entities'); const { fetchEntitiesBySlug } = require('../src/entities');
const { resolveLayoutScraper } = require('../src/scrapers/resolve');
const getRecursiveParameters = require('../src/utils/get-recursive-parameters');
const knex = require('../src/knex');
const actors = [
// jules jordan
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
// gamma
{ entity: 'wicked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'xempire', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
// vixen
{ entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'tushy', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'tushyraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'blacked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'blackedraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
{ entity: 'slayed', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] },
{ entity: 'deeper', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] },
{ entity: 'milfy', name: 'Clea Gaultier', fields: ['gender', 'avatar', 'description'] },
{ entity: 'wifey', name: 'Danielle Renae', fields: ['gender', 'avatar', 'description'] },
// teamskeet
{ entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] },
{ entity: 'teamskeet', name: 'Kali Roses', fields: ['description', 'avatar', 'measurements', 'nationality', 'ethnicity', 'hairColor', 'hasPiercings', 'hasTattoos'] }, // tattoos
// analvids
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
// mike adriano
{ entity: 'trueanal', name: 'Brenna McKenna', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },
{ entity: 'analonly', name: 'Lilith Grace', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
{ entity: 'allanal', name: 'Lexi Lore', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
{ entity: 'swallowed', name: 'Brooklyn Gray', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },
{ entity: 'nympho', name: 'Gianna Dior', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
{ entity: 'dirtyauditions', name: 'Nicole Kitt', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
// spizoo
{ entity: 'spizoo', name: 'Charlotte Sins', fields: ['description', 'avatar', 'dateOfBirth', 'ethnicity', 'nationality', 'height', 'measurements', 'hasTattoos', 'hasPiercings', 'hairColor', 'eyes', 'butt', 'pussy'] },
{ entity: 'rawattack', name: 'Kitana Montana', fields: ['avatar', 'dateOfBirth', 'nationality', 'measurements', 'eyes', 'height', 'hairColor', 'hasTattoos'] },
// hush / hussiepass
{ entity: 'hussiepass', name: 'Roxie Sinner', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'foot', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
{ entity: 'eyeontheguy', name: 'Tommy Gunn', fields: ['avatar'] },
{ entity: 'interracialpovs', name: 'Nia Nacci', fields: ['avatar', 'aliases', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
{ entity: 'povpornstars', name: 'Anna Bell Peaks', fields: ['avatar', 'aliases', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
{ entity: 'seehimfuck', name: 'Sheem The Dream', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'penisLength', 'circumcised', 'socials'] },
{ entity: 'hushpass', name: 'Dylan Ryder', fields: ['avatar'] },
{ entity: 'interracialpass', name: 'Aidra Fox', fields: ['avatar', 'height', 'measurements'] },
// kelly madison / 8K
{ entity: 'kellymadison', name: 'Ava Addams', fields: ['avatar', 'description', 'age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
{ entity: '8kmembers', name: 'Angie Lynx', fields: ['age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
// aylo
{ entity: 'brazzers', name: 'Lexi Lore', fields: ['avatar', 'description', 'gender', 'height', 'weight', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity', 'hairColor', 'hasTattoos', 'hasPiercings'] },
{ entity: 'digitalplayground', name: 'Elly Clutch', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] },
{ entity: 'realitykings', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] },
{ entity: 'fakehub', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] },
{ entity: 'babes', name: 'Alina Lopez', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity', 'hasTattoos', 'hasPiercings'] },
{ entity: 'letsdoeit', name: 'Nicole Doshi', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] },
{ entity: 'men', name: 'Cade Maddox', fields: ['avatar', 'description', 'gender', 'height', 'ethnicity', 'penisLength', 'dateOfBirth', 'weight', 'hairColor', 'hasTattoos'] },
];
const actorScrapers = scrapers.actors; const actorScrapers = scrapers.actors;
const source = argv.source?.[0] || null;
async function validateUrl(url, mime = 'image/') {
if (!url) {
return false;
}
const href = url.src || url;
try {
new URL(href); // eslint-disable-line no-new
} catch (_error) {
return false;
}
const res = await fetch(href);
const type = res.headers.get('content-type');
const resolvedType = url.expectType?.[type] || type;
return resolvedType.includes(mime);
}
const validators = {
age: (value) => !!Number(value),
gender: (value) => value && ['female', 'male', 'transsexual'].includes(value.toLowerCase()),
description: (value) => typeof value === 'string' && value.length > 3,
birthPlace: (value) => typeof value === 'string' && value.length > 3,
birthCountry: (value) => typeof value === 'string' && value.length > 1,
nationality: (value) => typeof value === 'string' && value.length > 3,
height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value),
weight: (value) => !!Number(value),
eyes: (value) => typeof value === 'string' && value.length > 3,
hairColor: (value) => typeof value === 'string' && value.length > 3,
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
hasTattoos: (value) => typeof value === 'boolean',
hasPiercings: (value) => typeof value === 'boolean',
avatar: async (value) => [].concat(value).reduce(async (chain, url) => {
const acc = await chain;
if (!acc) {
return acc;
}
return validateUrl(url);
}, Promise.resolve(true)),
socials: async (value) => [].concat(value).reduce(async (chain, url) => {
const acc = await chain;
if (!acc) {
return acc;
}
return validateUrl(url, 'text/html');
}, Promise.resolve(true)),
};
// profiler in this context is shorthand for profile scraper // profiler in this context is shorthand for profile scraper
async function init() { async function init() {
@ -17,31 +131,58 @@ async function init() {
await chain; await chain;
const entity = entitiesBySlug[entitySlug] || null; const entity = entitiesBySlug[entitySlug] || null;
const fetchProfile = resolveLayoutScraper(entity, scraper)?.fetchProfile;
const profilers = Array.from(new Set(Object.entries(scraper) // some layouts will use the same profiler const tests = actors.filter((actor) => actor.entity === entitySlug);
.flatMap(([fnKey, fnOrLayout]) => {
if (fnOrLayout.fetchProfile) { // TODO: remove when all tests are written
// layout if (tests.length === 0) {
return fnOrLayout.fetchProfile; console.log('TODO', entitySlug);
return;
} }
if (fnKey === 'fetchProfile') { if (source && source !== entitySlug) {
// primary console.log('____', entitySlug);
return fnOrLayout; return;
} }
return null;
}).filter(Boolean)));
await test(`${entitySlug} (${entity?.name})`, async () => { await test(`${entitySlug} (${entity?.name})`, async () => {
await test('has entity', () => assert.notEqual(entity, null)); await test(`${entitySlug} has scraper`, () => assert.notEqual(fetchProfile, null));
await test('has profilers', () => assert.ok(profilers.length > 0)); await test(`${entitySlug} has entity`, () => assert.notEqual(entity, null));
await test(`${entitySlug} has tests`, () => assert.notEqual(tests.length, 0));
await test('foo', () => { await test(`${entitySlug} has valid fields`, async () => Promise.all(tests.map(async (actor) => {
assert.strictEqual(5, 5); const profile = await fetchProfile({
}); name: actor.name,
slug: slugify(actor.name),
}, {
...entity,
entity,
channel: entity,
network: entity.parent,
parameters: getRecursiveParameters(entity),
}, include);
console.log(profile);
console.log('Untested fields', Object.keys(profile).filter((field) => !actor.fields.includes(field)).join(', '));
if (!profile) {
assert.fail('profile not found');
}
await Promise.all(actor.fields.map(async (field) => {
assert.ok(
validators[field]
? await validators[field](profile[field])
: typeof profile[field] !== 'undefined',
`broken field ${field}, got ${profile[field]}`,
);
}));
})));
}); });
}, Promise.resolve()); }, Promise.resolve());
await knex.destroy();
} }
init(); init();