forked from DebaucheryLibrarian/traxxx
Added profile scraper tests (WIP), fixed some profile scrapers. Fixed slugify not breaking existing slugs.
This commit is contained in:
parent
5acc2c607b
commit
bddc33a734
|
|
@ -430,13 +430,13 @@ const networks = [
|
||||||
{
|
{
|
||||||
slug: 'hussiepass',
|
slug: 'hussiepass',
|
||||||
name: 'Hussie Pass',
|
name: 'Hussie Pass',
|
||||||
url: 'https://www.hussiepass.com',
|
url: 'https://hussiepass.com',
|
||||||
parent: 'hush',
|
parent: 'hush',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'hushpass',
|
slug: 'hushpass',
|
||||||
name: 'Hush Pass',
|
name: 'Hush Pass',
|
||||||
url: 'https://www.hushpass.com',
|
url: 'https://hushpass.com',
|
||||||
parent: 'hush',
|
parent: 'hush',
|
||||||
parameters: {
|
parameters: {
|
||||||
t1: true,
|
t1: true,
|
||||||
|
|
|
||||||
|
|
@ -4785,7 +4785,7 @@ const sites = [
|
||||||
{
|
{
|
||||||
slug: 'hussiepass',
|
slug: 'hussiepass',
|
||||||
name: 'Hussie Pass',
|
name: 'Hussie Pass',
|
||||||
url: 'https://www.hussiepass.com',
|
url: 'https://hussiepass.com',
|
||||||
parent: 'hussiepass',
|
parent: 'hussiepass',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -216,7 +216,8 @@ function getUrl(site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getSession(site, parameters, url) {
|
async function getSession(site, parameters, url) {
|
||||||
if (site.slug === 'mindgeek' || site.parameters?.parentSession === false) {
|
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
|
||||||
|
if (site.slug === 'aylo') {
|
||||||
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -224,7 +225,7 @@ async function getSession(site, parameters, url) {
|
||||||
const cookieJar = new CookieJar();
|
const cookieJar = new CookieJar();
|
||||||
const session = http.session({ cookieJar });
|
const session = http.session({ cookieJar });
|
||||||
|
|
||||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession)
|
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
|
||||||
? site.parent.url
|
? site.parent.url
|
||||||
: (url || site.url);
|
: (url || site.url);
|
||||||
|
|
||||||
|
|
@ -360,7 +361,12 @@ function scrapeProfile(data, networkName, _releases = []) {
|
||||||
};
|
};
|
||||||
|
|
||||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||||
profile.measurements = data.measurements;
|
|
||||||
|
if (profile.gender === 'male') {
|
||||||
|
profile.penisLength = Number(data.measurements);
|
||||||
|
} else {
|
||||||
|
profile.measurements = data.measurements;
|
||||||
|
}
|
||||||
|
|
||||||
profile.dateOfBirth = qu.parseDate(data.birthday);
|
profile.dateOfBirth = qu.parseDate(data.birthday);
|
||||||
profile.birthPlace = data.birthPlace;
|
profile.birthPlace = data.birthPlace;
|
||||||
|
|
|
||||||
|
|
@ -254,7 +254,7 @@ async function scrapeProfile({ query, el }, channel, options) {
|
||||||
};
|
};
|
||||||
}, {});
|
}, {});
|
||||||
|
|
||||||
if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
if (bio.date_of_birth) profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||||
|
|
||||||
|
|
@ -262,6 +262,7 @@ async function scrapeProfile({ query, el }, channel, options) {
|
||||||
|
|
||||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||||
|
if (bio.shoe_size) profile.foot = Number(bio.shoe_size);
|
||||||
|
|
||||||
profile.measurements = bio.measurements;
|
profile.measurements = bio.measurements;
|
||||||
|
|
||||||
|
|
@ -280,7 +281,7 @@ async function scrapeProfile({ query, el }, channel, options) {
|
||||||
|
|
||||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
||||||
|
|
||||||
profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
||||||
|
|
||||||
profile.avatar = [
|
profile.avatar = [
|
||||||
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
||||||
|
|
@ -327,29 +328,29 @@ async function fetchScene(url, site, baseRelease) {
|
||||||
return scrapeScene(res.item, site, url, baseRelease);
|
return scrapeScene(res.item, site, url, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, { site }, options) {
|
async function fetchProfile({ name: actorName }, { channel }, options) {
|
||||||
const actorSlugA = slugify(actorName, '');
|
const actorSlugA = slugify(actorName, '');
|
||||||
const actorSlugB = slugify(actorName);
|
const actorSlugB = slugify(actorName);
|
||||||
|
|
||||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
const t1 = channel.parameters?.t1 ? 't1/' : '';
|
||||||
|
|
||||||
const res1 = site.parameters?.profile
|
const res1 = channel.parameters?.profile
|
||||||
? await qu.get(util.format(site.parameters.profile, actorSlugA))
|
? await qu.get(util.format(channel.parameters.profile, actorSlugA))
|
||||||
: await qu.get(`${site.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false });
|
: await qu.get(`${channel.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false });
|
||||||
|
|
||||||
const res = (res1.ok && res1)
|
const res = (res1.ok && res1)
|
||||||
|| (site.parameters?.profile && await qu.get(util.format(site.parameters.profile, actorSlugB)))
|
|| (channel.parameters?.profile && await qu.get(util.format(channel.parameters.profile, actorSlugB)))
|
||||||
|| await qu.get(`${site.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false });
|
|| await qu.get(`${channel.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false });
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (site.parameters?.t1) {
|
if (channel.parameters?.t1) {
|
||||||
return scrapeProfileT1(res.item, site);
|
return scrapeProfileT1(res.item, channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrapeProfile(res.item, site, options);
|
return scrapeProfile(res.item, channel, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,7 @@ async function fetchProfile({ name: actorName }, { entity }) {
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
|
|
||||||
// 8K sites don't have avatar or interview on model page, always use 5K site
|
// 8K sites don't have avatar or interview on model page, always use 5K site
|
||||||
const res = await unprint.get(`${entity.slug === '5kvids' ? 'https://www.5kporn.com' : entity.url}/models/${actorSlug}`, {
|
const res = await unprint.get(`${entity.slug === '8kmembers' ? 'https://www.8kmilfs.com' : entity.url}/models/${actorSlug}`, {
|
||||||
headers: {
|
headers: {
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
const unprint = require('unprint');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
|
||||||
const { convert } = require('../utils/convert');
|
const { convert } = require('../utils/convert');
|
||||||
|
|
||||||
function scrapeAll(scenes, channel) {
|
function scrapeAll(scenes, channel) {
|
||||||
|
|
@ -76,41 +75,6 @@ async function scrapeScene({ query }, url, channel) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile({ query }) {
|
|
||||||
const profile = {};
|
|
||||||
|
|
||||||
const bio = Object.fromEntries(query.all('.model-info li, .model-desc li').map((el) => [
|
|
||||||
slugify(unprint.query.content(el, 'span')),
|
|
||||||
unprint.query.text(el),
|
|
||||||
]));
|
|
||||||
|
|
||||||
const avatar = query.img('.model-photo img, img[alt="model"]');
|
|
||||||
|
|
||||||
if (avatar) {
|
|
||||||
profile.avatar = [
|
|
||||||
avatar.replace(/-\d+x\d+/, ''),
|
|
||||||
avatar,
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bio && Object.keys(bio).length > 0) {
|
|
||||||
profile.description = bio.bio;
|
|
||||||
|
|
||||||
profile.dateOfBirth = bio.birthdate && unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
|
||||||
profile.birthPlace = bio.born;
|
|
||||||
|
|
||||||
profile.measurements = bio.measurements;
|
|
||||||
|
|
||||||
profile.height = convert(bio.height, 'cm');
|
|
||||||
profile.weight = convert(bio.weight, 'lb', 'kg');
|
|
||||||
|
|
||||||
profile.eyes = bio.eyes;
|
|
||||||
profile.hairColor = bio.hair;
|
|
||||||
}
|
|
||||||
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatestContent(url, parameters) {
|
async function fetchLatestContent(url, parameters) {
|
||||||
if (parameters.useBrowser) {
|
if (parameters.useBrowser) {
|
||||||
const res = await http.get(url, {
|
const res = await http.get(url, {
|
||||||
|
|
@ -187,16 +151,54 @@ async function fetchScene(url, channel) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function scrapeProfile(data) {
|
||||||
|
const profile = {};
|
||||||
|
// unreliable key case, lowercase all
|
||||||
|
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value]));
|
||||||
|
|
||||||
|
profile.entryId = bio.id;
|
||||||
|
|
||||||
|
profile.gender = bio.gender;
|
||||||
|
profile.description = bio.bio;
|
||||||
|
|
||||||
|
profile.birthPlace = bio.born;
|
||||||
|
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||||
|
profile.age = bio.age;
|
||||||
|
|
||||||
|
profile.measurements = bio.measurements;
|
||||||
|
profile.height = convert(bio.height, 'cm');
|
||||||
|
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||||
|
|
||||||
|
profile.eyes = bio.eyes;
|
||||||
|
profile.hairColor = bio.hair;
|
||||||
|
|
||||||
|
profile.avatar = bio.thumb;
|
||||||
|
|
||||||
|
const tags = bio.tags?.split(',') || [];
|
||||||
|
|
||||||
|
if (tags.includes('tattoos')) profile.hasTattoos = true;
|
||||||
|
if (tags.includes('piercing')) profile.hasPiercings = true;
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchProfile(actor, context) {
|
async function fetchProfile(actor, context) {
|
||||||
const session = http.session();
|
|
||||||
|
|
||||||
await http.get(context.channel.url, { session });
|
|
||||||
|
|
||||||
const url = `${context.channel.url}/models/${actor.slug}`;
|
const url = `${context.channel.url}/models/${actor.slug}`;
|
||||||
const res = await unprint.get(url);
|
|
||||||
|
const res = await unprint.get(url, {
|
||||||
|
parser: {
|
||||||
|
runScripts: 'dangerously',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeProfile(res.context, context.channel);
|
const data = res.context.query.json('#__NEXT_DATA__');
|
||||||
|
|
||||||
|
if (data.props.pageProps.model) {
|
||||||
|
return scrapeProfile(data.props.pageProps.model, context.channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
|
|
||||||
|
|
@ -220,8 +220,6 @@ const scrapers = {
|
||||||
bang,
|
bang,
|
||||||
bangbros: aylo,
|
bangbros: aylo,
|
||||||
bjraw: radical,
|
bjraw: radical,
|
||||||
blacked: vixen,
|
|
||||||
blackedraw: vixen,
|
|
||||||
bluedonkeymedia,
|
bluedonkeymedia,
|
||||||
delphine: modelmedia,
|
delphine: modelmedia,
|
||||||
meidenvanholland: bluedonkeymedia,
|
meidenvanholland: bluedonkeymedia,
|
||||||
|
|
@ -233,7 +231,6 @@ const scrapers = {
|
||||||
burningangel: gamma,
|
burningangel: gamma,
|
||||||
cherrypimps,
|
cherrypimps,
|
||||||
cumlouder,
|
cumlouder,
|
||||||
deeper: vixen,
|
|
||||||
deeplush: nubiles,
|
deeplush: nubiles,
|
||||||
devilsfilm: famedigital,
|
devilsfilm: famedigital,
|
||||||
digitalplayground: aylo,
|
digitalplayground: aylo,
|
||||||
|
|
@ -276,6 +273,7 @@ const scrapers = {
|
||||||
kink,
|
kink,
|
||||||
kinkmen: kink,
|
kinkmen: kink,
|
||||||
kinkvr: kink,
|
kinkvr: kink,
|
||||||
|
letsdoeit: aylo,
|
||||||
loveherfilms,
|
loveherfilms,
|
||||||
loveherfeet: loveherfilms,
|
loveherfeet: loveherfilms,
|
||||||
shelovesblack: loveherfilms,
|
shelovesblack: loveherfilms,
|
||||||
|
|
@ -287,7 +285,6 @@ const scrapers = {
|
||||||
mariskax,
|
mariskax,
|
||||||
metrohd: aylo,
|
metrohd: aylo,
|
||||||
milehighmedia: aylo,
|
milehighmedia: aylo,
|
||||||
milfy: vixen,
|
|
||||||
milfvr: wankzvr,
|
milfvr: wankzvr,
|
||||||
missax,
|
missax,
|
||||||
mofos: aylo,
|
mofos: aylo,
|
||||||
|
|
@ -299,7 +296,6 @@ const scrapers = {
|
||||||
nfbusty: nubiles,
|
nfbusty: nubiles,
|
||||||
nubilefilms: nubiles,
|
nubilefilms: nubiles,
|
||||||
nubiles,
|
nubiles,
|
||||||
nubilesporn: nubiles,
|
|
||||||
nympho: mikeadriano,
|
nympho: mikeadriano,
|
||||||
onlyprince: fullpornnetwork,
|
onlyprince: fullpornnetwork,
|
||||||
pascalssubsluts,
|
pascalssubsluts,
|
||||||
|
|
@ -353,15 +349,22 @@ const scrapers = {
|
||||||
transbella: porndoe,
|
transbella: porndoe,
|
||||||
tranzvr: wankzvr,
|
tranzvr: wankzvr,
|
||||||
trueanal: mikeadriano,
|
trueanal: mikeadriano,
|
||||||
tushy: vixen,
|
|
||||||
tushyraw: vixen,
|
|
||||||
twistys: aylo,
|
twistys: aylo,
|
||||||
vipsexvault: porndoe,
|
vipsexvault: porndoe,
|
||||||
virtualtaboo,
|
virtualtaboo,
|
||||||
darkroomvr: virtualtaboo,
|
darkroomvr: virtualtaboo,
|
||||||
onlytarts: virtualtaboo,
|
onlytarts: virtualtaboo,
|
||||||
oopsfamily: virtualtaboo,
|
oopsfamily: virtualtaboo,
|
||||||
|
// vixen
|
||||||
vixen,
|
vixen,
|
||||||
|
blacked: vixen,
|
||||||
|
blackedraw: vixen,
|
||||||
|
tushy: vixen,
|
||||||
|
tushyraw: vixen,
|
||||||
|
deeper: vixen,
|
||||||
|
milfy: vixen,
|
||||||
|
slayed: vixen,
|
||||||
|
wifey: vixen,
|
||||||
vrcosplayx: badoink,
|
vrcosplayx: badoink,
|
||||||
wankzvr,
|
wankzvr,
|
||||||
wicked: gamma,
|
wicked: gamma,
|
||||||
|
|
|
||||||
|
|
@ -31,12 +31,26 @@ function scrapeAll(scenes) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(channel, page) {
|
||||||
|
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
|
||||||
|
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.context, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, { url, entity }) {
|
function scrapeScene({ query }, { url, entity }) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = getEntryId(url);
|
release.entryId = getEntryId(url);
|
||||||
release.title = query.content(['#media-holder .title', '.content-holder h1', '#scene h1', 'h2.titular', 'title'])?.replace(/\s+-$/, '');
|
release.title = query.content(['#media-holder .title', '.content-holder h1', '#scene h1', 'h2.titular', 'title'])?.replace(/\s+-$/, '');
|
||||||
|
|
||||||
|
console.log(release);
|
||||||
|
|
||||||
release.date = query.date('#sceneInfo .date, #trailer-data .date', 'YYYY-MM-DD');
|
release.date = query.date('#sceneInfo .date, #trailer-data .date', 'YYYY-MM-DD');
|
||||||
release.duration = query.duration('#sceneInfo .data-others, #trailer-data', /\d+:\d+/);
|
release.duration = query.duration('#sceneInfo .data-others, #trailer-data', /\d+:\d+/);
|
||||||
|
|
||||||
|
|
@ -67,6 +81,28 @@ function scrapeScene({ query }, { url, entity }) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function stripSizeParams(source) {
|
||||||
|
if (!source) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = new URL(source);
|
||||||
|
const params = url.searchParams;
|
||||||
|
|
||||||
|
params.delete('imgh');
|
||||||
|
params.delete('imgw');
|
||||||
|
params.delete('imgq');
|
||||||
|
|
||||||
|
return [
|
||||||
|
`${url.origin}${url.pathname}?${params.toString()}`,
|
||||||
|
source,
|
||||||
|
];
|
||||||
|
} catch (error) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function scrapeProfile({ query }) {
|
function scrapeProfile({ query }) {
|
||||||
const profile = {};
|
const profile = {};
|
||||||
const bioKeys = query.contents('.statsText b');
|
const bioKeys = query.contents('.statsText b');
|
||||||
|
|
@ -77,13 +113,14 @@ function scrapeProfile({ query }) {
|
||||||
[slugify(key, '_')]: bioValues[index],
|
[slugify(key, '_')]: bioValues[index],
|
||||||
}), {});
|
}), {});
|
||||||
|
|
||||||
profile.description = query.contents('.descriptionText');
|
profile.description = query.content('.descriptionText');
|
||||||
|
|
||||||
profile.avatar = [
|
profile.avatar = [
|
||||||
|
...stripSizeParams(query.img('.model-bio-pic img', { attribute: 'src' })), // not available on e.g. Raw Attack
|
||||||
|
query.img('.model-bio-pic img', { attribute: 'src0_3x' }),
|
||||||
query.img('.model-bio-pic img', { attribute: 'src0_2x' }),
|
query.img('.model-bio-pic img', { attribute: 'src0_2x' }),
|
||||||
query.img('.model-bio-pic img', { attribute: 'src0_3x' }), // unnecessarily big
|
|
||||||
query.img('.model-bio-pic img', { attribute: 'src0_1x' }),
|
query.img('.model-bio-pic img', { attribute: 'src0_1x' }),
|
||||||
];
|
].filter(Boolean);
|
||||||
|
|
||||||
profile.height = Number(bio.height?.match(/(\d+)\s?cm/i)?.[1]);
|
profile.height = Number(bio.height?.match(/(\d+)\s?cm/i)?.[1]);
|
||||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||||
|
|
@ -108,18 +145,6 @@ function scrapeProfile({ query }) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page) {
|
|
||||||
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
|
|
||||||
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.ok) {
|
|
||||||
return scrapeAll(res.context, channel);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchProfile(actor, channel) {
|
async function fetchProfile(actor, channel) {
|
||||||
if (actor.url) {
|
if (actor.url) {
|
||||||
const res = await unprint.get(actor.url);
|
const res = await unprint.get(actor.url);
|
||||||
|
|
|
||||||
|
|
@ -411,19 +411,16 @@ async function fetchScene(url, channel, baseRelease, options) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(data, channel) {
|
async function scrapeProfile(data, _channel) {
|
||||||
const model = data.model;
|
const model = data.model;
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
// most details seemingly unavailable in graphql
|
|
||||||
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
|
|
||||||
profile.gender = genderMap[model.sex];
|
profile.gender = genderMap[model.sex];
|
||||||
|
|
||||||
profile.hair = model.hairColour;
|
|
||||||
profile.nationality = model.nationality;
|
|
||||||
|
|
||||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||||
|
|
||||||
|
// most details seemingly unavailable in graphql
|
||||||
|
if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth);
|
||||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||||
|
|
@ -432,9 +429,11 @@ async function scrapeProfile(data, channel) {
|
||||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||||
|
|
||||||
|
/*
|
||||||
if (model.videos) {
|
if (model.videos) {
|
||||||
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
|
profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
@ -558,6 +557,7 @@ async function fetchProfile(actor, { channel }) {
|
||||||
) {
|
) {
|
||||||
model: findOneModel(input: { slug: $slug, site: $site }) {
|
model: findOneModel(input: { slug: $slug, site: $site }) {
|
||||||
name
|
name
|
||||||
|
sex
|
||||||
biography
|
biography
|
||||||
images {
|
images {
|
||||||
listing {
|
listing {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const { convert, convertMany } = require('convert');
|
const { convert, convertMany } = require('convert');
|
||||||
|
const { decode } = require('html-entities');
|
||||||
|
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
|
|
||||||
|
|
@ -60,18 +61,20 @@ function kgToLbs(kgs) {
|
||||||
|
|
||||||
function convertManyApi(input, to) {
|
function convertManyApi(input, to) {
|
||||||
const curatedInput = input
|
const curatedInput = input
|
||||||
.replace('\'', 'ft')
|
.replace(/['’]\s*/, 'ft ') // ensure 1 space
|
||||||
.replace(/"|''/, 'in')
|
.replace(/["”]|('')/, 'in') // 5’4”
|
||||||
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
|
.replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol
|
||||||
|
|
||||||
return Math.round(convertMany(curatedInput).to(to)) || null;
|
return Math.round(convertMany(curatedInput).to(to)) || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertApi(input, fromOrTo, to) {
|
function convertApi(rawInput, fromOrTo, to) {
|
||||||
if (!input) {
|
if (!rawInput) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const input = decode(rawInput); // remove html entities, e.g. 5' 8" for 5' 8"
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (typeof input === 'string' && to === undefined) {
|
if (typeof input === 'string' && to === undefined) {
|
||||||
return convertManyApi(input, fromOrTo);
|
return convertManyApi(input, fromOrTo);
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ const accentMap = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const plainCharRegex = /[a-zA-Z0-9]/;
|
const plainCharRegex = /[a-zA-Z0-9]/;
|
||||||
const defaultPunctuationRegex = /[.,?!:;&'‘’"“”…()[]{}<>\/*—-]/;
|
const defaultPunctuationRegex = /[.,?!:;&'‘’"“”…()[]{}<>\/*—]/;
|
||||||
const defaultSymbolRegex = /[@$€£#%^+=\\~]/;
|
const defaultSymbolRegex = /[@$€£#%^+=\\~]/;
|
||||||
|
|
||||||
function slugify(strings, delimiter = '-', {
|
function slugify(strings, delimiter = '-', {
|
||||||
|
|
@ -66,6 +66,7 @@ function slugify(strings, delimiter = '-', {
|
||||||
: string;
|
: string;
|
||||||
|
|
||||||
const normalized = casedString
|
const normalized = casedString
|
||||||
|
.replace(/[_-]/g, ' ')
|
||||||
.split('')
|
.split('')
|
||||||
.map((char) => {
|
.map((char) => {
|
||||||
if (char === ' ') {
|
if (char === ' ') {
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,124 @@ const test = require('node:test');
|
||||||
const assert = require('node:assert/strict');
|
const assert = require('node:assert/strict');
|
||||||
|
|
||||||
const argv = require('../src/argv');
|
const argv = require('../src/argv');
|
||||||
|
const include = require('../src/utils/argv-include')(argv);
|
||||||
|
const slugify = require('../src/utils/slugify');
|
||||||
const scrapers = require('../src/scrapers/scrapers');
|
const scrapers = require('../src/scrapers/scrapers');
|
||||||
const { fetchEntitiesBySlug } = require('../src/entities');
|
const { fetchEntitiesBySlug } = require('../src/entities');
|
||||||
|
const { resolveLayoutScraper } = require('../src/scrapers/resolve');
|
||||||
|
const getRecursiveParameters = require('../src/utils/get-recursive-parameters');
|
||||||
|
const knex = require('../src/knex');
|
||||||
|
|
||||||
|
const actors = [
|
||||||
|
// jules jordan
|
||||||
|
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
|
||||||
|
// gamma
|
||||||
|
{ entity: 'wicked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'xempire', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
// vixen
|
||||||
|
{ entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'tushy', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'tushyraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'blacked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'blackedraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'slayed', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'deeper', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'milfy', name: 'Clea Gaultier', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
{ entity: 'wifey', name: 'Danielle Renae', fields: ['gender', 'avatar', 'description'] },
|
||||||
|
// teamskeet
|
||||||
|
{ entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] },
|
||||||
|
{ entity: 'teamskeet', name: 'Kali Roses', fields: ['description', 'avatar', 'measurements', 'nationality', 'ethnicity', 'hairColor', 'hasPiercings', 'hasTattoos'] }, // tattoos
|
||||||
|
// analvids
|
||||||
|
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
|
||||||
|
// mike adriano
|
||||||
|
{ entity: 'trueanal', name: 'Brenna McKenna', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },
|
||||||
|
{ entity: 'analonly', name: 'Lilith Grace', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
|
||||||
|
{ entity: 'allanal', name: 'Lexi Lore', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
|
||||||
|
{ entity: 'swallowed', name: 'Brooklyn Gray', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },
|
||||||
|
{ entity: 'nympho', name: 'Gianna Dior', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
|
||||||
|
{ entity: 'dirtyauditions', name: 'Nicole Kitt', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
|
||||||
|
// spizoo
|
||||||
|
{ entity: 'spizoo', name: 'Charlotte Sins', fields: ['description', 'avatar', 'dateOfBirth', 'ethnicity', 'nationality', 'height', 'measurements', 'hasTattoos', 'hasPiercings', 'hairColor', 'eyes', 'butt', 'pussy'] },
|
||||||
|
{ entity: 'rawattack', name: 'Kitana Montana', fields: ['avatar', 'dateOfBirth', 'nationality', 'measurements', 'eyes', 'height', 'hairColor', 'hasTattoos'] },
|
||||||
|
// hush / hussiepass
|
||||||
|
{ entity: 'hussiepass', name: 'Roxie Sinner', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'foot', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
|
||||||
|
{ entity: 'eyeontheguy', name: 'Tommy Gunn', fields: ['avatar'] },
|
||||||
|
{ entity: 'interracialpovs', name: 'Nia Nacci', fields: ['avatar', 'aliases', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
|
||||||
|
{ entity: 'povpornstars', name: 'Anna Bell Peaks', fields: ['avatar', 'aliases', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
|
||||||
|
{ entity: 'seehimfuck', name: 'Sheem The Dream', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'penisLength', 'circumcised', 'socials'] },
|
||||||
|
{ entity: 'hushpass', name: 'Dylan Ryder', fields: ['avatar'] },
|
||||||
|
{ entity: 'interracialpass', name: 'Aidra Fox', fields: ['avatar', 'height', 'measurements'] },
|
||||||
|
// kelly madison / 8K
|
||||||
|
{ entity: 'kellymadison', name: 'Ava Addams', fields: ['avatar', 'description', 'age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
|
||||||
|
{ entity: '8kmembers', name: 'Angie Lynx', fields: ['age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
|
||||||
|
// aylo
|
||||||
|
{ entity: 'brazzers', name: 'Lexi Lore', fields: ['avatar', 'description', 'gender', 'height', 'weight', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity', 'hairColor', 'hasTattoos', 'hasPiercings'] },
|
||||||
|
{ entity: 'digitalplayground', name: 'Elly Clutch', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] },
|
||||||
|
{ entity: 'realitykings', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] },
|
||||||
|
{ entity: 'fakehub', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] },
|
||||||
|
{ entity: 'babes', name: 'Alina Lopez', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity', 'hasTattoos', 'hasPiercings'] },
|
||||||
|
{ entity: 'letsdoeit', name: 'Nicole Doshi', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] },
|
||||||
|
{ entity: 'men', name: 'Cade Maddox', fields: ['avatar', 'description', 'gender', 'height', 'ethnicity', 'penisLength', 'dateOfBirth', 'weight', 'hairColor', 'hasTattoos'] },
|
||||||
|
];
|
||||||
|
|
||||||
const actorScrapers = scrapers.actors;
|
const actorScrapers = scrapers.actors;
|
||||||
|
const source = argv.source?.[0] || null;
|
||||||
|
|
||||||
|
async function validateUrl(url, mime = 'image/') {
|
||||||
|
if (!url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const href = url.src || url;
|
||||||
|
|
||||||
|
try {
|
||||||
|
new URL(href); // eslint-disable-line no-new
|
||||||
|
} catch (_error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(href);
|
||||||
|
|
||||||
|
const type = res.headers.get('content-type');
|
||||||
|
const resolvedType = url.expectType?.[type] || type;
|
||||||
|
|
||||||
|
return resolvedType.includes(mime);
|
||||||
|
}
|
||||||
|
|
||||||
|
const validators = {
|
||||||
|
age: (value) => !!Number(value),
|
||||||
|
gender: (value) => value && ['female', 'male', 'transsexual'].includes(value.toLowerCase()),
|
||||||
|
description: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
birthPlace: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
||||||
|
nationality: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value),
|
||||||
|
weight: (value) => !!Number(value),
|
||||||
|
eyes: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
||||||
|
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
|
||||||
|
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
|
||||||
|
hasTattoos: (value) => typeof value === 'boolean',
|
||||||
|
hasPiercings: (value) => typeof value === 'boolean',
|
||||||
|
avatar: async (value) => [].concat(value).reduce(async (chain, url) => {
|
||||||
|
const acc = await chain;
|
||||||
|
|
||||||
|
if (!acc) {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
return validateUrl(url);
|
||||||
|
}, Promise.resolve(true)),
|
||||||
|
socials: async (value) => [].concat(value).reduce(async (chain, url) => {
|
||||||
|
const acc = await chain;
|
||||||
|
|
||||||
|
if (!acc) {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
return validateUrl(url, 'text/html');
|
||||||
|
}, Promise.resolve(true)),
|
||||||
|
};
|
||||||
|
|
||||||
// profiler in this context is shorthand for profile scraper
|
// profiler in this context is shorthand for profile scraper
|
||||||
async function init() {
|
async function init() {
|
||||||
|
|
@ -17,31 +131,58 @@ async function init() {
|
||||||
await chain;
|
await chain;
|
||||||
|
|
||||||
const entity = entitiesBySlug[entitySlug] || null;
|
const entity = entitiesBySlug[entitySlug] || null;
|
||||||
|
const fetchProfile = resolveLayoutScraper(entity, scraper)?.fetchProfile;
|
||||||
|
|
||||||
const profilers = Array.from(new Set(Object.entries(scraper) // some layouts will use the same profiler
|
const tests = actors.filter((actor) => actor.entity === entitySlug);
|
||||||
.flatMap(([fnKey, fnOrLayout]) => {
|
|
||||||
if (fnOrLayout.fetchProfile) {
|
|
||||||
// layout
|
|
||||||
return fnOrLayout.fetchProfile;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fnKey === 'fetchProfile') {
|
// TODO: remove when all tests are written
|
||||||
// primary
|
if (tests.length === 0) {
|
||||||
return fnOrLayout;
|
console.log('TODO', entitySlug);
|
||||||
}
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
if (source && source !== entitySlug) {
|
||||||
}).filter(Boolean)));
|
console.log('____', entitySlug);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
await test(`${entitySlug} (${entity?.name})`, async () => {
|
await test(`${entitySlug} (${entity?.name})`, async () => {
|
||||||
await test('has entity', () => assert.notEqual(entity, null));
|
await test(`${entitySlug} has scraper`, () => assert.notEqual(fetchProfile, null));
|
||||||
await test('has profilers', () => assert.ok(profilers.length > 0));
|
await test(`${entitySlug} has entity`, () => assert.notEqual(entity, null));
|
||||||
|
await test(`${entitySlug} has tests`, () => assert.notEqual(tests.length, 0));
|
||||||
|
|
||||||
await test('foo', () => {
|
await test(`${entitySlug} has valid fields`, async () => Promise.all(tests.map(async (actor) => {
|
||||||
assert.strictEqual(5, 5);
|
const profile = await fetchProfile({
|
||||||
});
|
name: actor.name,
|
||||||
|
slug: slugify(actor.name),
|
||||||
|
}, {
|
||||||
|
...entity,
|
||||||
|
entity,
|
||||||
|
channel: entity,
|
||||||
|
network: entity.parent,
|
||||||
|
parameters: getRecursiveParameters(entity),
|
||||||
|
}, include);
|
||||||
|
|
||||||
|
console.log(profile);
|
||||||
|
console.log('Untested fields', Object.keys(profile).filter((field) => !actor.fields.includes(field)).join(', '));
|
||||||
|
|
||||||
|
if (!profile) {
|
||||||
|
assert.fail('profile not found');
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.all(actor.fields.map(async (field) => {
|
||||||
|
assert.ok(
|
||||||
|
validators[field]
|
||||||
|
? await validators[field](profile[field])
|
||||||
|
: typeof profile[field] !== 'undefined',
|
||||||
|
`broken field ${field}, got ${profile[field]}`,
|
||||||
|
);
|
||||||
|
}));
|
||||||
|
})));
|
||||||
});
|
});
|
||||||
}, Promise.resolve());
|
}, Promise.resolve());
|
||||||
|
|
||||||
|
await knex.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
init();
|
init();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue