From a3c18ca57706b3c908ad16c25d31c7b0e4fa5cdc Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 4 Nov 2020 04:02:51 +0100 Subject: [PATCH] Rewrote Jay Rock scraper for new website. --- seeds/02_sites.js | 6 ++ seeds/04_media.js | 2 +- src/scrapers/jayrock-legacy.js | 124 ++++++++++++++++++++++ src/scrapers/jayrock.js | 183 +++++++++++++++------------------ 4 files changed, 213 insertions(+), 102 deletions(-) create mode 100644 src/scrapers/jayrock-legacy.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index c0cceab1..3d59cd4f 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -3447,6 +3447,9 @@ const sites = [ name: 'Jay\'s POV', url: 'https://jayspov.net', parent: 'jayrock', + parameters: { + referer: 'https://www.21sextury.com', + }, }, { slug: 'cospimps', @@ -3460,11 +3463,14 @@ const sites = [ url: 'https://www.blackforwife.com', parent: 'jayrock', parameters: { + referer: 'https://www.21sextury.com', + /* referer: 'https://freetour.adulttime.com/en/blackforwife', useGamma: true, scene: false, deep: 'https://21sextury.com/en/video', photos: false, + */ }, }, // JESSE LOADS MONSTER FACIALS diff --git a/seeds/04_media.js b/seeds/04_media.js index 4f6b041c..89173030 100644 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -790,8 +790,8 @@ const tagPhotos = [ ['fake-boobs', 9, 'Putri Cinta for StasyQ'], ['fake-boobs', 11, 'Jessa Rhodes and Cali Carter in "Busty Anal Workout" for LesbianX'], ['fake-boobs', 13, 'Kitana Lure for Asshole Fever'], - ['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'], ['fake-boobs', 3, 'Ashly Anderson for Passion HD'], + ['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'], ['fake-boobs', 8, 'Amber Alena for Score'], ['fake-boobs', 4, 'Capri Cavanni for Big Tits in Sports'], // ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'], diff --git a/src/scrapers/jayrock-legacy.js b/src/scrapers/jayrock-legacy.js new file mode 100644 index 00000000..d492927e --- /dev/null +++ b/src/scrapers/jayrock-legacy.js @@ -0,0 +1,124 @@ +'use strict'; + +/* example for other ModelCentro scrapers */ +const Promise = require('bluebird'); +const bhttp = require('bhttp'); + +const logger = require('../logger')(__filename); +const slugify = require('../utils/slugify'); +const { fetchApiLatest, fetchScene } = require('./gamma'); + +async function fetchToken(site) { + const res = await bhttp.get(site.url); + const html = res.body.toString(); + + const time = html.match(/"aet":\d+/)[0].split(':')[1]; + const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1); + const token = ah.split('').reverse().join(''); + + return { time, token }; +} + +async function fetchActors(entryId, site, { token, time }) { + const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`; + const res = await bhttp.get(url); + + if (res.statusCode === 200 && res.body.status === true) { + return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName); + } + + return []; +} + +async function fetchTrailerLocation(entryId, site) { + const url = `${site.url}/api/download/${entryId}/hd1080/stream`; + + try { + const res = await bhttp.get(url, { + followRedirects: false, + }); + + if (res.statusCode === 302) { + return res.headers.location; + } + } catch (error) { + logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`); + } + + return null; +} + +async function scrapeScene(scene, site, tokens) { + const release = { + entryId: scene.id, + title: scene.title, + duration: scene.length, + site, + meta: { + tokens, // attach tokens to reduce number of requests required for deep fetching + }, + }; + + release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`; + release.date = new Date(scene.sites.collection[scene.id].publishDate); + release.poster = scene._resources.primary[0].url; + + if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias); + if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url); + + const [actors, trailer] = await Promise.all([ + fetchActors(release.entryId, site, tokens), + fetchTrailerLocation(release.entryId, site), + ]); + + release.actors = actors; + if (trailer) release.trailer = { src: trailer, quality: 1080 }; + + return release; +} + +function scrapeLatest(scenes, site, tokens) { + return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 }); +} + +async function fetchLatest(site, page = 1) { + if (site.parameters?.useGamma) { + return fetchApiLatest(site, page); + } + + const { time, token } = await fetchToken(site); + + // transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory + const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`; + const res = await bhttp.get(url); + + if (res.statusCode === 200 && res.body.status) { + return scrapeLatest(res.body.response.collection, site, { time, token }); + } + + return null; +} + +async function fetchNetworkScene(url, site, release) { + if (site.parameters?.useGamma) { + return fetchScene(url, site, release); + } + + const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching + const { pathname } = new URL(url); + const entryId = pathname.split('/')[2]; + + const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`; + const res = await bhttp.get(apiUrl); + + if (res.statusCode === 200 && res.body.status) { + return scrapeScene(res.body.response.collection[0], site, { time, token }); + } + + return null; +} + +module.exports = { + fetchLatest, + fetchScene: fetchNetworkScene, +}; diff --git a/src/scrapers/jayrock.js b/src/scrapers/jayrock.js index d492927e..4322a1d8 100644 --- a/src/scrapers/jayrock.js +++ b/src/scrapers/jayrock.js @@ -1,124 +1,105 @@ 'use strict'; -/* example for other ModelCentro scrapers */ -const Promise = require('bluebird'); -const bhttp = require('bhttp'); +const qu = require('../utils/qu'); -const logger = require('../logger')(__filename); -const slugify = require('../utils/slugify'); -const { fetchApiLatest, fetchScene } = require('./gamma'); +function scrapeLatest(items, channel) { + return items.map(({ query }) => { + const release = {}; -async function fetchToken(site) { - const res = await bhttp.get(site.url); - const html = res.body.toString(); + release.url = query.url('h5 a', null, { origin: channel.url }); + release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; - const time = html.match(/"aet":\d+/)[0].split(':')[1]; - const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1); - const token = ah.split('').reverse().join(''); + release.title = query.cnt('h5 a'); - return { time, token }; + [release.poster, ...release.photos] = query.imgs('.screenshot').map(src => [ + // unnecessarily large + // src.replace(/\/\d+/, 3840), + // src.replace(/\/\d+/, '/2000'), + src.replace(/\/\d+/, '/1500'), + src.replace(/\/\d+/, '/1000'), + src, + ]); + + return release; + }); } -async function fetchActors(entryId, site, { token, time }) { - const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`; - const res = await bhttp.get(url); +function scrapeScene({ query, html }, url, channel) { + const release = {}; - if (res.statusCode === 200 && res.body.status === true) { - return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName); + release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1]; + + release.title = query.cnt('h1.description'); + release.actors = query + .all('.video-performer') + .map((actorEl) => { + const actorUrl = query.url(actorEl, 'a', 'href', { origin: channel.url }); + const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1]; + const avatar = query.img(actorEl, 'img', 'data-bgsrc'); + + return { + name: query.cnt(actorEl, '.video-performer-name'), + gender: 'female', + avatar: [ + avatar.replace(/\/actor\/(\d+)/, '/actor/500'), + avatar, + ], + url: actorUrl, + entryId, + }; + }) + .concat({ name: 'Jay Rock', gender: 'male' }); + + release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); + release.duration = query.number('.release-date:last-child') * 60; + + release.studio = query.cnt('.studio span:nth-child(2)'); + release.director = query.text('.director'); + + release.tags = query.cnts('.tags a'); + + const poster = html.match(/url\((https.+\.jpg)\)/)?.[1]; + const photos = query.imgs('#moreScreenshots img'); + + [release.poster, ...release.photos] = [poster] + .concat(photos) + .filter(Boolean) + .map(src => [ + src.replace(/\/(\d+)\/\d+/, '/$1/1500'), + src.replace(/\/(\d+)\/\d+/, '/$1/1000'), + src, + ]); + + const videoId = html.match(/item: (\d+)/)?.[1]; + + if (videoId) { + release.trailer = { stream: `https://trailer.adultempire.com/hls/trailer/${videoId}/master.m3u8` }; } - return []; -} - -async function fetchTrailerLocation(entryId, site) { - const url = `${site.url}/api/download/${entryId}/hd1080/stream`; - - try { - const res = await bhttp.get(url, { - followRedirects: false, - }); - - if (res.statusCode === 302) { - return res.headers.location; - } - } catch (error) { - logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`); - } - - return null; -} - -async function scrapeScene(scene, site, tokens) { - const release = { - entryId: scene.id, - title: scene.title, - duration: scene.length, - site, - meta: { - tokens, // attach tokens to reduce number of requests required for deep fetching - }, - }; - - release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`; - release.date = new Date(scene.sites.collection[scene.id].publishDate); - release.poster = scene._resources.primary[0].url; - - if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias); - if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url); - - const [actors, trailer] = await Promise.all([ - fetchActors(release.entryId, site, tokens), - fetchTrailerLocation(release.entryId, site), - ]); - - release.actors = actors; - if (trailer) release.trailer = { src: trailer, quality: 1080 }; - return release; } -function scrapeLatest(scenes, site, tokens) { - return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 }); +async function fetchLatest(channel, page = 1) { + const res = await qu.getAll(`https://jayspov.net/jays-pov-updates.html?view=list&page=${page}`, '.item-grid-list-view > .grid-item'); + + if (res.ok) { + return scrapeLatest(res.items, channel); + } + + return res.status; } -async function fetchLatest(site, page = 1) { - if (site.parameters?.useGamma) { - return fetchApiLatest(site, page); +async function fetchScene(url, channel) { + const res = await qu.get(url); + + if (res.ok) { + return scrapeScene(res.item, url, channel); } - const { time, token } = await fetchToken(site); - - // transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory - const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`; - const res = await bhttp.get(url); - - if (res.statusCode === 200 && res.body.status) { - return scrapeLatest(res.body.response.collection, site, { time, token }); - } - - return null; -} - -async function fetchNetworkScene(url, site, release) { - if (site.parameters?.useGamma) { - return fetchScene(url, site, release); - } - - const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching - const { pathname } = new URL(url); - const entryId = pathname.split('/')[2]; - - const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`; - const res = await bhttp.get(apiUrl); - - if (res.statusCode === 200 && res.body.status) { - return scrapeScene(res.body.response.collection[0], site, { time, token }); - } - - return null; + return res.status; } module.exports = { fetchLatest, - fetchScene: fetchNetworkScene, + fetchScene, };