forked from DebaucheryLibrarian/traxxx
Added profile scraper tests (WIP), fixed some profile scrapers. Fixed slugify not breaking existing slugs.
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
@@ -76,41 +75,6 @@ async function scrapeScene({ query }, url, channel) {
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bio = Object.fromEntries(query.all('.model-info li, .model-desc li').map((el) => [
|
||||
slugify(unprint.query.content(el, 'span')),
|
||||
unprint.query.text(el),
|
||||
]));
|
||||
|
||||
const avatar = query.img('.model-photo img, img[alt="model"]');
|
||||
|
||||
if (avatar) {
|
||||
profile.avatar = [
|
||||
avatar.replace(/-\d+x\d+/, ''),
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
if (bio && Object.keys(bio).length > 0) {
|
||||
profile.description = bio.bio;
|
||||
|
||||
profile.dateOfBirth = bio.birthdate && unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||
profile.birthPlace = bio.born;
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||
|
||||
profile.eyes = bio.eyes;
|
||||
profile.hairColor = bio.hair;
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatestContent(url, parameters) {
|
||||
if (parameters.useBrowser) {
|
||||
const res = await http.get(url, {
|
||||
@@ -187,16 +151,54 @@ async function fetchScene(url, channel) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function scrapeProfile(data) {
|
||||
const profile = {};
|
||||
// unreliable key case, lowercase all
|
||||
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value]));
|
||||
|
||||
profile.entryId = bio.id;
|
||||
|
||||
profile.gender = bio.gender;
|
||||
profile.description = bio.bio;
|
||||
|
||||
profile.birthPlace = bio.born;
|
||||
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||
profile.age = bio.age;
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||
|
||||
profile.eyes = bio.eyes;
|
||||
profile.hairColor = bio.hair;
|
||||
|
||||
profile.avatar = bio.thumb;
|
||||
|
||||
const tags = bio.tags?.split(',') || [];
|
||||
|
||||
if (tags.includes('tattoos')) profile.hasTattoos = true;
|
||||
if (tags.includes('piercing')) profile.hasPiercings = true;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, context) {
|
||||
const session = http.session();
|
||||
|
||||
await http.get(context.channel.url, { session });
|
||||
|
||||
const url = `${context.channel.url}/models/${actor.slug}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
parser: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, context.channel);
|
||||
const data = res.context.query.json('#__NEXT_DATA__');
|
||||
|
||||
if (data.props.pageProps.model) {
|
||||
return scrapeProfile(data.props.pageProps.model, context.channel);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
Reference in New Issue
Block a user