Improved MindGeek actor scraper.
This commit is contained in:
parent
587c111449
commit
85c73bad77
|
@ -22,7 +22,7 @@
|
||||||
class="favicon"
|
class="favicon"
|
||||||
>
|
>
|
||||||
<img
|
<img
|
||||||
:src="`/img/logos/${actor.entity.slug}/favicon_dark.png`"
|
:src="`/img/logos/${actor.entity.slug}/favicon_light.png`"
|
||||||
class="favicon-icon"
|
class="favicon-icon"
|
||||||
>
|
>
|
||||||
</RouterLink>
|
</RouterLink>
|
||||||
|
|
|
@ -242,29 +242,22 @@ async function getSession(site, parameters, url) {
|
||||||
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
|
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile(data, html, releases = [], networkName) {
|
function scrapeProfile(data, releases = [], networkName) {
|
||||||
const { query } = qu.extract(html);
|
|
||||||
|
|
||||||
const profile = {
|
const profile = {
|
||||||
description: data.bio,
|
description: data.bio,
|
||||||
aliases: data.aliases,
|
aliases: data.aliases.filter(Boolean),
|
||||||
};
|
};
|
||||||
|
|
||||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||||
|
profile.measurements = data.measurements;
|
||||||
|
|
||||||
if (data.measurements) {
|
profile.dateOfBirth = qu.parseDate(data.birthday);
|
||||||
const [bust, waist, hip] = data.measurements.split('-');
|
profile.birthPlace = data.birthPlace;
|
||||||
|
profile.height = inchesToCm(data.height);
|
||||||
|
profile.weight = lbsToKg(data.weight);
|
||||||
|
|
||||||
if (profile.gender === 'female') {
|
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
|
||||||
if (bust) profile.bust = bust.toUpperCase();
|
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
|
||||||
if (waist) profile.waist = waist;
|
|
||||||
if (hip) profile.hip = hip;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
|
||||||
if (data.height) profile.height = inchesToCm(data.height);
|
|
||||||
if (data.weight) profile.weight = lbsToKg(data.weight);
|
|
||||||
|
|
||||||
if (data.images.card_main_rect?.[0]) {
|
if (data.images.card_main_rect?.[0]) {
|
||||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||||
|
@ -274,9 +267,6 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
||||||
|| data.images.card_main_rect[0].xs?.url;
|
|| data.images.card_main_rect[0].xs?.url;
|
||||||
}
|
}
|
||||||
|
|
||||||
const birthdate = query.all('li').find((el) => /Date of Birth/.test(el.textContent));
|
|
||||||
if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY');
|
|
||||||
|
|
||||||
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
||||||
profile.naturalBoobs = true;
|
profile.naturalBoobs = true;
|
||||||
}
|
}
|
||||||
|
@ -285,6 +275,14 @@ function scrapeProfile(data, html, releases = [], networkName) {
|
||||||
profile.naturalBoobs = false;
|
profile.naturalBoobs = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
|
||||||
|
profile.hasTattoos = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
|
||||||
|
profile.hasPiercings = true;
|
||||||
|
}
|
||||||
|
|
||||||
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
|
@ -377,7 +375,7 @@ async function fetchRelease(url, site, baseScene, options) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, parameters }) {
|
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
||||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||||
const { session, instanceToken } = await getSession(entity, parameters);
|
const { session, instanceToken } = await getSession(entity, parameters);
|
||||||
|
|
||||||
|
@ -395,31 +393,22 @@ async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, para
|
||||||
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||||
|
|
||||||
if (actorData) {
|
if (actorData) {
|
||||||
const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/${actorSlug}`;
|
|
||||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||||
|
|
||||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
|
||||||
http.get(actorUrl, {
|
|
||||||
interval: parameters.interval,
|
|
||||||
concurrency: parameters.concurrency,
|
|
||||||
}),
|
|
||||||
http.get(actorReleasesUrl, {
|
|
||||||
session,
|
session,
|
||||||
interval: parameters.interval,
|
interval: parameters.interval,
|
||||||
concurrency: parameters.concurrency,
|
concurrency: parameters.concurrency,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
},
|
},
|
||||||
}),
|
});
|
||||||
]);
|
|
||||||
|
|
||||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, entity.slug);
|
return scrapeProfile(actorData, actorReleasesRes.body.result, entity.slug);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (actorRes.statusCode === 200) {
|
return scrapeProfile(actorData, [], entity.slug);
|
||||||
return scrapeProfile(actorData, actorRes.body.toString(), null, entity.slug);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue