Improved MindGeek actor scraper.

This commit is contained in:
DebaucheryLibrarian 2022-05-30 00:05:33 +02:00
parent 587c111449
commit 85c73bad77
2 changed files with 30 additions and 41 deletions

View File

@ -22,7 +22,7 @@
class="favicon" class="favicon"
> >
<img <img
:src="`/img/logos/${actor.entity.slug}/favicon_dark.png`" :src="`/img/logos/${actor.entity.slug}/favicon_light.png`"
class="favicon-icon" class="favicon-icon"
> >
</RouterLink> </RouterLink>

View File

@ -242,29 +242,22 @@ async function getSession(site, parameters, url) {
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
} }
function scrapeProfile(data, html, releases = [], networkName) { function scrapeProfile(data, releases = [], networkName) {
const { query } = qu.extract(html);
const profile = { const profile = {
description: data.bio, description: data.bio,
aliases: data.aliases, aliases: data.aliases.filter(Boolean),
}; };
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.measurements = data.measurements;
if (data.measurements) { profile.dateOfBirth = qu.parseDate(data.birthday);
const [bust, waist, hip] = data.measurements.split('-'); profile.birthPlace = data.birthPlace;
profile.height = inchesToCm(data.height);
profile.weight = lbsToKg(data.weight);
if (profile.gender === 'female') { profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
if (bust) profile.bust = bust.toUpperCase(); profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
}
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.images.card_main_rect?.[0]) { if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url profile.avatar = data.images.card_main_rect[0].xl?.url
@ -274,9 +267,6 @@ function scrapeProfile(data, html, releases = [], networkName) {
|| data.images.card_main_rect[0].xs?.url; || data.images.card_main_rect[0].xs?.url;
} }
const birthdate = query.all('li').find((el) => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY');
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
profile.naturalBoobs = true; profile.naturalBoobs = true;
} }
@ -285,6 +275,14 @@ function scrapeProfile(data, html, releases = [], networkName) {
profile.naturalBoobs = false; profile.naturalBoobs = false;
} }
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
profile.hasTattoos = true;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
profile.hasPiercings = true;
}
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName)); profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
return profile; return profile;
@ -377,7 +375,7 @@ async function fetchRelease(url, site, baseScene, options) {
return null; return null;
} }
async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, parameters }) { async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
const { session, instanceToken } = await getSession(entity, parameters); const { session, instanceToken } = await getSession(entity, parameters);
@ -395,31 +393,22 @@ async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, para
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase()); const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) { if (actorData) {
const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/${actorSlug}`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const [actorRes, actorReleasesRes] = await Promise.all([ const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
http.get(actorUrl, {
interval: parameters.interval,
concurrency: parameters.concurrency,
}),
http.get(actorReleasesUrl, {
session, session,
interval: parameters.interval, interval: parameters.interval,
concurrency: parameters.concurrency, concurrency: parameters.concurrency,
headers: { headers: {
Instance: instanceToken, Instance: instanceToken,
}, },
}), });
]);
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, entity.slug); return scrapeProfile(actorData, actorReleasesRes.body.result, entity.slug);
} }
if (actorRes.statusCode === 200) { return scrapeProfile(actorData, [], entity.slug);
return scrapeProfile(actorData, actorRes.body.toString(), null, entity.slug);
}
} }
} }