Added profile scraper tests (WIP), fixed some profile scrapers. Fixed slugify not breaking existing slugs.

This commit is contained in:
DebaucheryLibrarian
2026-01-10 02:58:50 +01:00
parent 5acc2c607b
commit bddc33a734
12 changed files with 293 additions and 111 deletions

View File

@@ -3,7 +3,6 @@
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function scrapeAll(scenes, channel) {
@@ -76,41 +75,6 @@ async function scrapeScene({ query }, url, channel) {
return release;
}
async function scrapeProfile({ query }) {
const profile = {};
const bio = Object.fromEntries(query.all('.model-info li, .model-desc li').map((el) => [
slugify(unprint.query.content(el, 'span')),
unprint.query.text(el),
]));
const avatar = query.img('.model-photo img, img[alt="model"]');
if (avatar) {
profile.avatar = [
avatar.replace(/-\d+x\d+/, ''),
avatar,
];
}
if (bio && Object.keys(bio).length > 0) {
profile.description = bio.bio;
profile.dateOfBirth = bio.birthdate && unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
profile.birthPlace = bio.born;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.eyes = bio.eyes;
profile.hairColor = bio.hair;
}
return profile;
}
async function fetchLatestContent(url, parameters) {
if (parameters.useBrowser) {
const res = await http.get(url, {
@@ -187,16 +151,54 @@ async function fetchScene(url, channel) {
return res.status;
}
async function scrapeProfile(data) {
const profile = {};
// unreliable key case, lowercase all
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value]));
profile.entryId = bio.id;
profile.gender = bio.gender;
profile.description = bio.bio;
profile.birthPlace = bio.born;
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
profile.age = bio.age;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.eyes = bio.eyes;
profile.hairColor = bio.hair;
profile.avatar = bio.thumb;
const tags = bio.tags?.split(',') || [];
if (tags.includes('tattoos')) profile.hasTattoos = true;
if (tags.includes('piercing')) profile.hasPiercings = true;
return profile;
}
async function fetchProfile(actor, context) {
const session = http.session();
await http.get(context.channel.url, { session });
const url = `${context.channel.url}/models/${actor.slug}`;
const res = await unprint.get(url);
const res = await unprint.get(url, {
parser: {
runScripts: 'dangerously',
},
});
if (res.ok) {
return scrapeProfile(res.context, context.channel);
const data = res.context.query.json('#__NEXT_DATA__');
if (data.props.pageProps.model) {
return scrapeProfile(data.props.pageProps.model, context.channel);
}
return null;
}
return res.status;