From 6d4fd5fd7715bf862be2f0d7c4aa29500c2a9304 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 27 Jan 2020 22:54:14 +0100 Subject: [PATCH] Added MindGeek profile scraper for all MG sites. --- assets/components/tile/actor.vue | 2 - src/scrapers/babes.js | 7 ++- src/scrapers/ddfnetwork.js | 4 +- src/scrapers/digitalplayground.js | 7 ++- src/scrapers/fakehub.js | 7 ++- src/scrapers/milehighmedia.js | 7 ++- src/scrapers/mindgeek.js | 81 ++++++++++++++++++++++++++----- src/scrapers/mofos.js | 7 ++- src/scrapers/realitykings.js | 12 ++++- src/scrapers/scrapers.js | 8 ++- src/utils/convert.js | 5 ++ 11 files changed, 124 insertions(+), 23 deletions(-) diff --git a/assets/components/tile/actor.vue b/assets/components/tile/actor.vue index 794c45e3..9745141a 100644 --- a/assets/components/tile/actor.vue +++ b/assets/components/tile/actor.vue @@ -5,8 +5,6 @@ > /Date of Birth/.test(el.textContent)); + if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY'); + + return profile; +} + +async function fetchLatest(site, page = 1) { + const url = getUrl(site); + const { search } = new URL(url); + const siteId = new URLSearchParams(search).get('site'); + + const { session, instanceToken } = await getSession(url); + const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD'); const limit = 10; const apiUrl = `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`; @@ -132,14 +171,7 @@ async function fetchLatest(site, page = 1) { async function fetchScene(url, site) { const entryId = url.match(/\d+/)[0]; - - const cookieJar = new CookieJar(); - const session = bhttp.session({ cookieJar }); - - await session.get(url); - - const cookieString = await cookieJar.getCookieStringAsync(url); - const { instance_token: instanceToken } = cookieToData(cookieString); + const { session, instanceToken } = await getSession(url); const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, { headers: { @@ -150,8 +182,31 @@ async function fetchScene(url, site) { return scrapeScene(res.body.result, url, site); } +async function fetchProfile(actorName, networkName, actorPath = 'model') { + const url = `https://www.${networkName}.com`; + const { session, instanceToken } = await getSession(url); + + const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { + headers: { + Instance: instanceToken, + }, + }); + + if (res.statusCode === 200) { + const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase()); + const actorRes = await bhttp.get(`https://www.${networkName}.com/${actorPath}/${actorData.id}/`); + + if (actorData && actorRes.statusCode === 200) { + return scrapeProfile(actorData, actorRes.body.toString()); + } + } + + return null; +} + module.exports = { scrapeLatestX, fetchLatest, fetchScene, + fetchProfile, }; diff --git a/src/scrapers/mofos.js b/src/scrapers/mofos.js index db3d9416..274c6cda 100644 --- a/src/scrapers/mofos.js +++ b/src/scrapers/mofos.js @@ -1,8 +1,13 @@ 'use strict'; -const { fetchScene, fetchLatest } = require('./mindgeek'); +const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek'); + +async function networkFetchProfile(actorName) { + return fetchProfile(actorName, 'mofos'); +} module.exports = { fetchLatest, fetchScene, + fetchProfile: networkFetchProfile, }; diff --git a/src/scrapers/realitykings.js b/src/scrapers/realitykings.js index 53505e3e..c1b552a2 100644 --- a/src/scrapers/realitykings.js +++ b/src/scrapers/realitykings.js @@ -3,7 +3,12 @@ const bhttp = require('bhttp'); const cheerio = require('cheerio'); -const { scrapeLatestX, fetchLatest, fetchScene } = require('./mindgeek'); +const { + scrapeLatestX, + fetchLatest, + fetchScene, + fetchProfile, +} = require('./mindgeek'); function scrapeLatestClassic(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); @@ -36,7 +41,12 @@ async function fetchLatestWrap(site, page = 1) { return fetchLatest(site, page); } +async function networkFetchProfile(actorName) { + return fetchProfile(actorName, 'realitykings'); +} + module.exports = { fetchLatest: fetchLatestWrap, + fetchProfile: networkFetchProfile, fetchScene, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index b32cd800..f5694d69 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -11,7 +11,6 @@ const jayrock = require('./jayrock'); const kink = require('./kink'); const mikeadriano = require('./mikeadriano'); const milehighmedia = require('./milehighmedia'); -const mofos = require('./mofos'); const perfectgonzo = require('./perfectgonzo'); const pervcity = require('./pervcity'); const pornpros = require('./pornpros'); @@ -29,6 +28,7 @@ const evilangel = require('./evilangel'); const julesjordan = require('./julesjordan'); const kellymadison = require('./kellymadison'); const legalporno = require('./legalporno'); +const mofos = require('./mofos'); const twentyonesextury = require('./21sextury'); const xempire = require('./xempire'); @@ -76,6 +76,12 @@ module.exports = { evilangel, xempire, blowpass, + mofos, + realitykings, + digitalplayground, + fakehub, + babes, + milehighmedia, julesjordan, brazzers, boobpedia, diff --git a/src/utils/convert.js b/src/utils/convert.js index eb667a93..eb9894bc 100644 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -1,5 +1,9 @@ 'use strict'; +function inchesToCm(inches) { + return Math.round(Number(inches) * 2.54); +} + function feetInchesToCm(feet, inches) { return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54)); } @@ -33,6 +37,7 @@ module.exports = { cmToFeetInches, feetInchesToCm, heightToCm, + inchesToCm, lbsToKg, kgToLbs, };