From 85c73bad775bef7567bf44f1f145001f3fb6a439 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 30 May 2022 00:05:33 +0200 Subject: [PATCH] Improved MindGeek actor scraper. --- assets/components/actors/tile.vue | 2 +- src/scrapers/mindgeek.js | 69 +++++++++++++------------------ 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/assets/components/actors/tile.vue b/assets/components/actors/tile.vue index 7e1b70a7..7caa7eff 100644 --- a/assets/components/actors/tile.vue +++ b/assets/components/actors/tile.vue @@ -22,7 +22,7 @@ class="favicon" > diff --git a/src/scrapers/mindgeek.js b/src/scrapers/mindgeek.js index 6ca0a319..72e61a20 100644 --- a/src/scrapers/mindgeek.js +++ b/src/scrapers/mindgeek.js @@ -242,29 +242,22 @@ async function getSession(site, parameters, url) { throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); } -function scrapeProfile(data, html, releases = [], networkName) { - const { query } = qu.extract(html); - +function scrapeProfile(data, releases = [], networkName) { const profile = { description: data.bio, - aliases: data.aliases, + aliases: data.aliases.filter(Boolean), }; profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; + profile.measurements = data.measurements; - if (data.measurements) { - const [bust, waist, hip] = data.measurements.split('-'); + profile.dateOfBirth = qu.parseDate(data.birthday); + profile.birthPlace = data.birthPlace; + profile.height = inchesToCm(data.height); + profile.weight = lbsToKg(data.weight); - if (profile.gender === 'female') { - if (bust) profile.bust = bust.toUpperCase(); - if (waist) profile.waist = waist; - if (hip) profile.hip = hip; - } - } - - if (data.birthPlace) profile.birthPlace = data.birthPlace; - if (data.height) profile.height = inchesToCm(data.height); - if (data.weight) profile.weight = lbsToKg(data.weight); + profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name; + profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name; if (data.images.card_main_rect?.[0]) { profile.avatar = data.images.card_main_rect[0].xl?.url @@ -274,9 +267,6 @@ function scrapeProfile(data, html, releases = [], networkName) { || data.images.card_main_rect[0].xs?.url; } - const birthdate = query.all('li').find((el) => /Date of Birth/.test(el.textContent)); - if (birthdate) profile.birthdate = query.date(birthdate, 'span', 'MMMM Do, YYYY'); - if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { profile.naturalBoobs = true; } @@ -285,6 +275,14 @@ function scrapeProfile(data, html, releases = [], networkName) { profile.naturalBoobs = false; } + if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) { + profile.hasTattoos = true; + } + + if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) { + profile.hasPiercings = true; + } + profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName)); return profile; @@ -377,7 +375,7 @@ async function fetchRelease(url, site, baseScene, options) { return null; } -async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, parameters }) { +async function fetchProfile({ name: actorName }, { entity, parameters }, include) { // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; const { session, instanceToken } = await getSession(entity, parameters); @@ -395,31 +393,22 @@ async function fetchProfile({ name: actorName, slug: actorSlug }, { entity, para const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { - const actorUrl = `https://www.${entity.slug}.com/${entity.parameters?.actorPath || 'model'}/${actorData.id}/${actorSlug}`; const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; - const [actorRes, actorReleasesRes] = await Promise.all([ - http.get(actorUrl, { - interval: parameters.interval, - concurrency: parameters.concurrency, - }), - http.get(actorReleasesUrl, { - session, - interval: parameters.interval, - concurrency: parameters.concurrency, - headers: { - Instance: instanceToken, - }, - }), - ]); + const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, { + session, + interval: parameters.interval, + concurrency: parameters.concurrency, + headers: { + Instance: instanceToken, + }, + }); - if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { - return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, entity.slug); + if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { + return scrapeProfile(actorData, actorReleasesRes.body.result, entity.slug); } - if (actorRes.statusCode === 200) { - return scrapeProfile(actorData, actorRes.body.toString(), null, entity.slug); - } + return scrapeProfile(actorData, [], entity.slug); } }