Improved MindGeek actor scraper.

This commit is contained in:
DebaucheryLibrarian 2022-05-30 00:05:33 +02:00
parent 587c111449
commit 85c73bad77
2 changed files with 30 additions and 41 deletions

View File

@ -22,7 +22,7 @@
class="favicon"
>
<img
:src="`/img/logos/${actor.entity.slug}/favicon_dark.png`"
:src="`/img/logos/${actor.entity.slug}/favicon_light.png`"
class="favicon-icon"
>
</RouterLink>

View File

@ -242,29 +242,22 @@ async function getSession(site, parameters, url) {
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
}
function scrapeProfile(data, html, releases = [], networkName) {
const { query } = qu.extract(html);
function scrapeProfile(data, releases = [], networkName) {
const profile = {
description: data.bio,
aliases: data.aliases,
aliases: data.aliases.filter(Boolean),
};
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.measurements = data.measurements;
if (data.measurements) {
const [bust, waist, hip] = data.measurements.split('-');
profile.dateOfBirth = qu.parseDate(data.birthday);
profile.birthPlace = data.birthPlace;
profile.height = inchesToCm(data.height);
profile.weight = lbsToKg(data.weight);
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
}
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
@ -274,9 +267,6 @@ function scrapeProfile(data, html, releases = [], networkName) {
|| data.images.card_main_rect[0].xs?.url;
}
const birthdate = query.all('li').find((el) => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY');
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
profile.naturalBoobs = true;
}
@ -285,6 +275,14 @@ function scrapeProfile(data, html, releases = [], networkName) {
profile.naturalBoobs = false;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
profile.hasTattoos = true;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
profile.hasPiercings = true;
}
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
return profile;
@ -377,7 +375,7 @@ async function fetchRelease(url, site, baseScene, options) {
return null;
}
async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, parameters }) {
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
const { session, instanceToken } = await getSession(entity, parameters);
@ -395,31 +393,22 @@ async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, para
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) {
const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/${actorSlug}`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const [actorRes, actorReleasesRes] = await Promise.all([
http.get(actorUrl, {
interval: parameters.interval,
concurrency: parameters.concurrency,
}),
http.get(actorReleasesUrl, {
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
session,
interval: parameters.interval,
concurrency: parameters.concurrency,
headers: {
Instance: instanceToken,
},
}),
]);
});
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, entity.slug);
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorReleasesRes.body.result, entity.slug);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, entity.slug);
}
return scrapeProfile(actorData, [], entity.slug);
}
}