diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 6fee8c11..28c92616 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -430,13 +430,13 @@ const networks = [ { slug: 'hussiepass', name: 'Hussie Pass', - url: 'https://www.hussiepass.com', + url: 'https://hussiepass.com', parent: 'hush', }, { slug: 'hushpass', name: 'Hush Pass', - url: 'https://www.hushpass.com', + url: 'https://hushpass.com', parent: 'hush', parameters: { t1: true, diff --git a/seeds/02_sites.js b/seeds/02_sites.js index edc687c2..c892aa31 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -4785,7 +4785,7 @@ const sites = [ { slug: 'hussiepass', name: 'Hussie Pass', - url: 'https://www.hussiepass.com', + url: 'https://hussiepass.com', parent: 'hussiepass', }, { diff --git a/src/scrapers/aylo.js b/src/scrapers/aylo.js index b68b2784..733c6bc8 100755 --- a/src/scrapers/aylo.js +++ b/src/scrapers/aylo.js @@ -216,7 +216,8 @@ function getUrl(site) { } async function getSession(site, parameters, url) { - if (site.slug === 'mindgeek' || site.parameters?.parentSession === false) { + // if (site.slug === 'aylo' || site.parameters?.parentSession === false) { + if (site.slug === 'aylo') { // most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels return null; } @@ -224,7 +225,7 @@ async function getSession(site, parameters, url) { const cookieJar = new CookieJar(); const session = http.session({ cookieJar }); - const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession) + const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false) ? site.parent.url : (url || site.url); @@ -360,7 +361,12 @@ function scrapeProfile(data, networkName, _releases = []) { }; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; - profile.measurements = data.measurements; + + if (profile.gender === 'male') { + profile.penisLength = Number(data.measurements); + } else { + profile.measurements = data.measurements; + } profile.dateOfBirth = qu.parseDate(data.birthday); profile.birthPlace = data.birthPlace; diff --git a/src/scrapers/hush.js b/src/scrapers/hush.js index ef8eb297..93bc0f93 100755 --- a/src/scrapers/hush.js +++ b/src/scrapers/hush.js @@ -254,7 +254,7 @@ async function scrapeProfile({ query, el }, channel, options) { }; }, {}); - if (bio.date_of_birth) profile.birthdate = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); + if (bio.date_of_birth) profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); if (bio.birthplace) profile.birthPlace = bio.birthplace; if (bio.fun_fact) profile.description = bio.fun_fact; @@ -262,6 +262,7 @@ async function scrapeProfile({ query, el }, channel, options) { if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]); if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]); + if (bio.shoe_size) profile.foot = Number(bio.shoe_size); profile.measurements = bio.measurements; @@ -280,7 +281,7 @@ async function scrapeProfile({ query, el }, channel, options) { if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim()); - profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean); + profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean); profile.avatar = [ query.img('.profile-pic img', 'src0_3x', { origin: channel.url }), @@ -327,29 +328,29 @@ async function fetchScene(url, site, baseRelease) { return scrapeScene(res.item, site, url, baseRelease); } -async function fetchProfile({ name: actorName }, { site }, options) { +async function fetchProfile({ name: actorName }, { channel }, options) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName); - const t1 = site.parameters?.t1 ? 't1/' : ''; + const t1 = channel.parameters?.t1 ? 't1/' : ''; - const res1 = site.parameters?.profile - ? await qu.get(util.format(site.parameters.profile, actorSlugA)) - : await qu.get(`${site.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false }); + const res1 = channel.parameters?.profile + ? await qu.get(util.format(channel.parameters.profile, actorSlugA)) + : await qu.get(`${channel.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false }); const res = (res1.ok && res1) - || (site.parameters?.profile && await qu.get(util.format(site.parameters.profile, actorSlugB))) - || await qu.get(`${site.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false }); + || (channel.parameters?.profile && await qu.get(util.format(channel.parameters.profile, actorSlugB))) + || await qu.get(`${channel.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false }); if (!res.ok) { return res.status; } - if (site.parameters?.t1) { - return scrapeProfileT1(res.item, site); + if (channel.parameters?.t1) { + return scrapeProfileT1(res.item, channel); } - return scrapeProfile(res.item, site, options); + return scrapeProfile(res.item, channel, options); } module.exports = { diff --git a/src/scrapers/kellymadison.js b/src/scrapers/kellymadison.js index 9c43b5cd..0207c28c 100755 --- a/src/scrapers/kellymadison.js +++ b/src/scrapers/kellymadison.js @@ -208,7 +208,7 @@ async function fetchProfile({ name: actorName }, { entity }) { const actorSlug = slugify(actorName); // 8K sites don't have avatar or interview on model page, always use 5K site - const res = await unprint.get(`${entity.slug === '5kvids' ? 'https://www.5kporn.com' : entity.url}/models/${actorSlug}`, { + const res = await unprint.get(`${entity.slug === '8kmembers' ? 'https://www.8kmilfs.com' : entity.url}/models/${actorSlug}`, { headers: { 'X-Requested-With': 'XMLHttpRequest', }, diff --git a/src/scrapers/mikeadriano.js b/src/scrapers/mikeadriano.js index 8e4e7137..def1f1bd 100755 --- a/src/scrapers/mikeadriano.js +++ b/src/scrapers/mikeadriano.js @@ -3,7 +3,6 @@ const unprint = require('unprint'); const http = require('../utils/http'); -const slugify = require('../utils/slugify'); const { convert } = require('../utils/convert'); function scrapeAll(scenes, channel) { @@ -76,41 +75,6 @@ async function scrapeScene({ query }, url, channel) { return release; } -async function scrapeProfile({ query }) { - const profile = {}; - - const bio = Object.fromEntries(query.all('.model-info li, .model-desc li').map((el) => [ - slugify(unprint.query.content(el, 'span')), - unprint.query.text(el), - ])); - - const avatar = query.img('.model-photo img, img[alt="model"]'); - - if (avatar) { - profile.avatar = [ - avatar.replace(/-\d+x\d+/, ''), - avatar, - ]; - } - - if (bio && Object.keys(bio).length > 0) { - profile.description = bio.bio; - - profile.dateOfBirth = bio.birthdate && unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); - profile.birthPlace = bio.born; - - profile.measurements = bio.measurements; - - profile.height = convert(bio.height, 'cm'); - profile.weight = convert(bio.weight, 'lb', 'kg'); - - profile.eyes = bio.eyes; - profile.hairColor = bio.hair; - } - - return profile; -} - async function fetchLatestContent(url, parameters) { if (parameters.useBrowser) { const res = await http.get(url, { @@ -187,16 +151,54 @@ async function fetchScene(url, channel) { return res.status; } +async function scrapeProfile(data) { + const profile = {}; + // unreliable key case, lowercase all + const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); + + profile.entryId = bio.id; + + profile.gender = bio.gender; + profile.description = bio.bio; + + profile.birthPlace = bio.born; + profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); + profile.age = bio.age; + + profile.measurements = bio.measurements; + profile.height = convert(bio.height, 'cm'); + profile.weight = convert(bio.weight, 'lb', 'kg'); + + profile.eyes = bio.eyes; + profile.hairColor = bio.hair; + + profile.avatar = bio.thumb; + + const tags = bio.tags?.split(',') || []; + + if (tags.includes('tattoos')) profile.hasTattoos = true; + if (tags.includes('piercing')) profile.hasPiercings = true; + + return profile; +} + async function fetchProfile(actor, context) { - const session = http.session(); - - await http.get(context.channel.url, { session }); - const url = `${context.channel.url}/models/${actor.slug}`; - const res = await unprint.get(url); + + const res = await unprint.get(url, { + parser: { + runScripts: 'dangerously', + }, + }); if (res.ok) { - return scrapeProfile(res.context, context.channel); + const data = res.context.query.json('#__NEXT_DATA__'); + + if (data.props.pageProps.model) { + return scrapeProfile(data.props.pageProps.model, context.channel); + } + + return null; } return res.status; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 63b991f3..4390c096 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -220,8 +220,6 @@ const scrapers = { bang, bangbros: aylo, bjraw: radical, - blacked: vixen, - blackedraw: vixen, bluedonkeymedia, delphine: modelmedia, meidenvanholland: bluedonkeymedia, @@ -233,7 +231,6 @@ const scrapers = { burningangel: gamma, cherrypimps, cumlouder, - deeper: vixen, deeplush: nubiles, devilsfilm: famedigital, digitalplayground: aylo, @@ -276,6 +273,7 @@ const scrapers = { kink, kinkmen: kink, kinkvr: kink, + letsdoeit: aylo, loveherfilms, loveherfeet: loveherfilms, shelovesblack: loveherfilms, @@ -287,7 +285,6 @@ const scrapers = { mariskax, metrohd: aylo, milehighmedia: aylo, - milfy: vixen, milfvr: wankzvr, missax, mofos: aylo, @@ -299,7 +296,6 @@ const scrapers = { nfbusty: nubiles, nubilefilms: nubiles, nubiles, - nubilesporn: nubiles, nympho: mikeadriano, onlyprince: fullpornnetwork, pascalssubsluts, @@ -353,15 +349,22 @@ const scrapers = { transbella: porndoe, tranzvr: wankzvr, trueanal: mikeadriano, - tushy: vixen, - tushyraw: vixen, twistys: aylo, vipsexvault: porndoe, virtualtaboo, darkroomvr: virtualtaboo, onlytarts: virtualtaboo, oopsfamily: virtualtaboo, + // vixen vixen, + blacked: vixen, + blackedraw: vixen, + tushy: vixen, + tushyraw: vixen, + deeper: vixen, + milfy: vixen, + slayed: vixen, + wifey: vixen, vrcosplayx: badoink, wankzvr, wicked: gamma, diff --git a/src/scrapers/spizoo.js b/src/scrapers/spizoo.js index ac933a99..47ddd9fc 100755 --- a/src/scrapers/spizoo.js +++ b/src/scrapers/spizoo.js @@ -31,12 +31,26 @@ function scrapeAll(scenes) { }); } +async function fetchLatest(channel, page) { + const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, { + selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail', + }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + function scrapeScene({ query }, { url, entity }) { const release = {}; release.entryId = getEntryId(url); release.title = query.content(['#media-holder .title', '.content-holder h1', '#scene h1', 'h2.titular', 'title'])?.replace(/\s+-$/, ''); + console.log(release); + release.date = query.date('#sceneInfo .date, #trailer-data .date', 'YYYY-MM-DD'); release.duration = query.duration('#sceneInfo .data-others, #trailer-data', /\d+:\d+/); @@ -67,6 +81,28 @@ function scrapeScene({ query }, { url, entity }) { return release; } +function stripSizeParams(source) { + if (!source) { + return []; + } + + try { + const url = new URL(source); + const params = url.searchParams; + + params.delete('imgh'); + params.delete('imgw'); + params.delete('imgq'); + + return [ + `${url.origin}${url.pathname}?${params.toString()}`, + source, + ]; + } catch (error) { + return []; + } +} + function scrapeProfile({ query }) { const profile = {}; const bioKeys = query.contents('.statsText b'); @@ -77,13 +113,14 @@ function scrapeProfile({ query }) { [slugify(key, '_')]: bioValues[index], }), {}); - profile.description = query.contents('.descriptionText'); + profile.description = query.content('.descriptionText'); profile.avatar = [ + ...stripSizeParams(query.img('.model-bio-pic img', { attribute: 'src' })), // not available on e.g. Raw Attack + query.img('.model-bio-pic img', { attribute: 'src0_3x' }), query.img('.model-bio-pic img', { attribute: 'src0_2x' }), - query.img('.model-bio-pic img', { attribute: 'src0_3x' }), // unnecessarily big query.img('.model-bio-pic img', { attribute: 'src0_1x' }), - ]; + ].filter(Boolean); profile.height = Number(bio.height?.match(/(\d+)\s?cm/i)?.[1]); profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); @@ -108,18 +145,6 @@ function scrapeProfile({ query }) { return profile; } -async function fetchLatest(channel, page) { - const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, { - selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail', - }); - - if (res.ok) { - return scrapeAll(res.context, channel); - } - - return res.status; -} - async function fetchProfile(actor, channel) { if (actor.url) { const res = await unprint.get(actor.url); diff --git a/src/scrapers/vixen.js b/src/scrapers/vixen.js index bd28e57b..d13d1400 100755 --- a/src/scrapers/vixen.js +++ b/src/scrapers/vixen.js @@ -411,19 +411,16 @@ async function fetchScene(url, channel, baseRelease, options) { return res.status; } -async function scrapeProfile(data, channel) { +async function scrapeProfile(data, _channel) { const model = data.model; const profile = {}; - // most details seemingly unavailable in graphql - if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth); profile.gender = genderMap[model.sex]; - profile.hair = model.hairColour; - profile.nationality = model.nationality; - if (model.biography.trim().length > 0) profile.description = model.biography; + // most details seemingly unavailable in graphql + if (profile.dateOfBirth) profile.birthdate = new Date(model.dateOfBirth); if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`; if (model.waistMeasurment) profile.waist = model.waistMeasurment; if (model.hipMeasurment) profile.hip = model.hipMeasurment; @@ -432,9 +429,11 @@ async function scrapeProfile(data, channel) { profile.poster = getAvatarFallbacks(model.images.profile); profile.banner = getAvatarFallbacks(model.images.poster); + /* if (model.videos) { profile.scenes = scrapeAll(model.videos.edges.map((edge) => edge.node), channel); } + */ return profile; } @@ -558,6 +557,7 @@ async function fetchProfile(actor, { channel }) { ) { model: findOneModel(input: { slug: $slug, site: $site }) { name + sex biography images { listing { diff --git a/src/utils/convert.js b/src/utils/convert.js index e893887a..7266dca5 100755 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -1,6 +1,7 @@ 'use strict'; const { convert, convertMany } = require('convert'); +const { decode } = require('html-entities'); const logger = require('../logger')(__filename); @@ -60,18 +61,20 @@ function kgToLbs(kgs) { function convertManyApi(input, to) { const curatedInput = input - .replace('\'', 'ft') - .replace(/"|''/, 'in') + .replace(/['’]\s*/, 'ft ') // ensure 1 space + .replace(/["”]|('')/, 'in') // 5’4” .replace(/\d+ft\s*\d+\s*$/, (match) => `${match}in`); // height without any inch symbol return Math.round(convertMany(curatedInput).to(to)) || null; } -function convertApi(input, fromOrTo, to) { - if (!input) { +function convertApi(rawInput, fromOrTo, to) { + if (!rawInput) { return null; } + const input = decode(rawInput); // remove html entities, e.g. 5' 8" for 5' 8" + try { if (typeof input === 'string' && to === undefined) { return convertManyApi(input, fromOrTo); diff --git a/src/utils/slugify.js b/src/utils/slugify.js index 115efcae..324ed27d 100755 --- a/src/utils/slugify.js +++ b/src/utils/slugify.js @@ -42,7 +42,7 @@ const accentMap = { }; const plainCharRegex = /[a-zA-Z0-9]/; -const defaultPunctuationRegex = /[.,?!:;&'‘’"“”…()[]{}<>\/*—-]/; +const defaultPunctuationRegex = /[.,?!:;&'‘’"“”…()[]{}<>\/*—]/; const defaultSymbolRegex = /[@$€£#%^+=\\~]/; function slugify(strings, delimiter = '-', { @@ -66,6 +66,7 @@ function slugify(strings, delimiter = '-', { : string; const normalized = casedString + .replace(/[_-]/g, ' ') .split('') .map((char) => { if (char === ' ') { diff --git a/tests/profiles.js b/tests/profiles.js index d4370bd1..6d030e04 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -4,10 +4,124 @@ const test = require('node:test'); const assert = require('node:assert/strict'); const argv = require('../src/argv'); +const include = require('../src/utils/argv-include')(argv); +const slugify = require('../src/utils/slugify'); const scrapers = require('../src/scrapers/scrapers'); const { fetchEntitiesBySlug } = require('../src/entities'); +const { resolveLayoutScraper } = require('../src/scrapers/resolve'); +const getRecursiveParameters = require('../src/utils/get-recursive-parameters'); +const knex = require('../src/knex'); + +const actors = [ + // jules jordan + { entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] }, + // gamma + { entity: 'wicked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'xempire', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + // vixen + { entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'tushy', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'tushyraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'blacked', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'blackedraw', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] }, + { entity: 'slayed', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] }, + { entity: 'deeper', name: 'Vanna Bardot', fields: ['gender', 'avatar', 'description'] }, + { entity: 'milfy', name: 'Clea Gaultier', fields: ['gender', 'avatar', 'description'] }, + { entity: 'wifey', name: 'Danielle Renae', fields: ['gender', 'avatar', 'description'] }, + // teamskeet + { entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] }, + { entity: 'teamskeet', name: 'Kali Roses', fields: ['description', 'avatar', 'measurements', 'nationality', 'ethnicity', 'hairColor', 'hasPiercings', 'hasTattoos'] }, // tattoos + // analvids + { entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] }, + // mike adriano + { entity: 'trueanal', name: 'Brenna McKenna', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] }, + { entity: 'analonly', name: 'Lilith Grace', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] }, + { entity: 'allanal', name: 'Lexi Lore', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] }, + { entity: 'swallowed', name: 'Brooklyn Gray', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] }, + { entity: 'nympho', name: 'Gianna Dior', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] }, + { entity: 'dirtyauditions', name: 'Nicole Kitt', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] }, + // spizoo + { entity: 'spizoo', name: 'Charlotte Sins', fields: ['description', 'avatar', 'dateOfBirth', 'ethnicity', 'nationality', 'height', 'measurements', 'hasTattoos', 'hasPiercings', 'hairColor', 'eyes', 'butt', 'pussy'] }, + { entity: 'rawattack', name: 'Kitana Montana', fields: ['avatar', 'dateOfBirth', 'nationality', 'measurements', 'eyes', 'height', 'hairColor', 'hasTattoos'] }, + // hush / hussiepass + { entity: 'hussiepass', name: 'Roxie Sinner', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'foot', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] }, + { entity: 'eyeontheguy', name: 'Tommy Gunn', fields: ['avatar'] }, + { entity: 'interracialpovs', name: 'Nia Nacci', fields: ['avatar', 'aliases', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] }, + { entity: 'povpornstars', name: 'Anna Bell Peaks', fields: ['avatar', 'aliases', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] }, + { entity: 'seehimfuck', name: 'Sheem The Dream', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'penisLength', 'circumcised', 'socials'] }, + { entity: 'hushpass', name: 'Dylan Ryder', fields: ['avatar'] }, + { entity: 'interracialpass', name: 'Aidra Fox', fields: ['avatar', 'height', 'measurements'] }, + // kelly madison / 8K + { entity: 'kellymadison', name: 'Ava Addams', fields: ['avatar', 'description', 'age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] }, + { entity: '8kmembers', name: 'Angie Lynx', fields: ['age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] }, + // aylo + { entity: 'brazzers', name: 'Lexi Lore', fields: ['avatar', 'description', 'gender', 'height', 'weight', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity', 'hairColor', 'hasTattoos', 'hasPiercings'] }, + { entity: 'digitalplayground', name: 'Elly Clutch', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] }, + { entity: 'realitykings', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] }, + { entity: 'fakehub', name: 'Abella Danger', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity'] }, + { entity: 'babes', name: 'Alina Lopez', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity', 'hasTattoos', 'hasPiercings'] }, + { entity: 'letsdoeit', name: 'Nicole Doshi', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] }, + { entity: 'men', name: 'Cade Maddox', fields: ['avatar', 'description', 'gender', 'height', 'ethnicity', 'penisLength', 'dateOfBirth', 'weight', 'hairColor', 'hasTattoos'] }, +]; const actorScrapers = scrapers.actors; +const source = argv.source?.[0] || null; + +async function validateUrl(url, mime = 'image/') { + if (!url) { + return false; + } + + const href = url.src || url; + + try { + new URL(href); // eslint-disable-line no-new + } catch (_error) { + return false; + } + + const res = await fetch(href); + + const type = res.headers.get('content-type'); + const resolvedType = url.expectType?.[type] || type; + + return resolvedType.includes(mime); +} + +const validators = { + age: (value) => !!Number(value), + gender: (value) => value && ['female', 'male', 'transsexual'].includes(value.toLowerCase()), + description: (value) => typeof value === 'string' && value.length > 3, + birthPlace: (value) => typeof value === 'string' && value.length > 3, + birthCountry: (value) => typeof value === 'string' && value.length > 1, + nationality: (value) => typeof value === 'string' && value.length > 3, + height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), + weight: (value) => !!Number(value), + eyes: (value) => typeof value === 'string' && value.length > 3, + hairColor: (value) => typeof value === 'string' && value.length > 3, + measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module + dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()), + hasTattoos: (value) => typeof value === 'boolean', + hasPiercings: (value) => typeof value === 'boolean', + avatar: async (value) => [].concat(value).reduce(async (chain, url) => { + const acc = await chain; + + if (!acc) { + return acc; + } + + return validateUrl(url); + }, Promise.resolve(true)), + socials: async (value) => [].concat(value).reduce(async (chain, url) => { + const acc = await chain; + + if (!acc) { + return acc; + } + + return validateUrl(url, 'text/html'); + }, Promise.resolve(true)), +}; // profiler in this context is shorthand for profile scraper async function init() { @@ -17,31 +131,58 @@ async function init() { await chain; const entity = entitiesBySlug[entitySlug] || null; + const fetchProfile = resolveLayoutScraper(entity, scraper)?.fetchProfile; - const profilers = Array.from(new Set(Object.entries(scraper) // some layouts will use the same profiler - .flatMap(([fnKey, fnOrLayout]) => { - if (fnOrLayout.fetchProfile) { - // layout - return fnOrLayout.fetchProfile; - } + const tests = actors.filter((actor) => actor.entity === entitySlug); - if (fnKey === 'fetchProfile') { - // primary - return fnOrLayout; - } + // TODO: remove when all tests are written + if (tests.length === 0) { + console.log('TODO', entitySlug); + return; + } - return null; - }).filter(Boolean))); + if (source && source !== entitySlug) { + console.log('____', entitySlug); + return; + } await test(`${entitySlug} (${entity?.name})`, async () => { - await test('has entity', () => assert.notEqual(entity, null)); - await test('has profilers', () => assert.ok(profilers.length > 0)); + await test(`${entitySlug} has scraper`, () => assert.notEqual(fetchProfile, null)); + await test(`${entitySlug} has entity`, () => assert.notEqual(entity, null)); + await test(`${entitySlug} has tests`, () => assert.notEqual(tests.length, 0)); - await test('foo', () => { - assert.strictEqual(5, 5); - }); + await test(`${entitySlug} has valid fields`, async () => Promise.all(tests.map(async (actor) => { + const profile = await fetchProfile({ + name: actor.name, + slug: slugify(actor.name), + }, { + ...entity, + entity, + channel: entity, + network: entity.parent, + parameters: getRecursiveParameters(entity), + }, include); + + console.log(profile); + console.log('Untested fields', Object.keys(profile).filter((field) => !actor.fields.includes(field)).join(', ')); + + if (!profile) { + assert.fail('profile not found'); + } + + await Promise.all(actor.fields.map(async (field) => { + assert.ok( + validators[field] + ? await validators[field](profile[field]) + : typeof profile[field] !== 'undefined', + `broken field ${field}, got ${profile[field]}`, + ); + })); + }))); }); }, Promise.resolve()); + + await knex.destroy(); } init();