From 8a22ff07a60ea9bbe528797de7501f39075a3d3b Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 13 Nov 2020 01:16:17 +0100 Subject: [PATCH] Merged legacy JayRock scraper into new scraper for CosPimps. --- seeds/02_sites.js | 17 +++-- src/scrapers/jayrock-legacy.js | 124 -------------------------------- src/scrapers/jayrock.js | 127 ++++++++++++++++++++++++++++++++- src/utils/argv-include.js | 37 ++++++---- 4 files changed, 162 insertions(+), 143 deletions(-) delete mode 100644 src/scrapers/jayrock-legacy.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 3d59cd4f..2448c469 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -3447,15 +3447,25 @@ const sites = [ name: 'Jay\'s POV', url: 'https://jayspov.net', parent: 'jayrock', + /* more recent scenes on own site parameters: { referer: 'https://www.21sextury.com', + scene: false, }, + */ }, { slug: 'cospimps', name: 'CosPimps', url: 'https://cospimps.com', parent: 'jayrock', + parameters: { + useApi: true, + /* Gamma scenes are out of date + referer: 'https://www.21sextury.com', + scene: false, + */ + }, }, { slug: 'blackforwife', @@ -3463,14 +3473,9 @@ const sites = [ url: 'https://www.blackforwife.com', parent: 'jayrock', parameters: { - referer: 'https://www.21sextury.com', - /* - referer: 'https://freetour.adulttime.com/en/blackforwife', useGamma: true, + referer: 'https://www.21sextury.com', scene: false, - deep: 'https://21sextury.com/en/video', - photos: false, - */ }, }, // JESSE LOADS MONSTER FACIALS diff --git a/src/scrapers/jayrock-legacy.js b/src/scrapers/jayrock-legacy.js deleted file mode 100644 index d492927e..00000000 --- a/src/scrapers/jayrock-legacy.js +++ /dev/null @@ -1,124 +0,0 @@ -'use strict'; - -/* example for other ModelCentro scrapers */ -const Promise = require('bluebird'); -const bhttp = require('bhttp'); - -const logger = require('../logger')(__filename); -const slugify = require('../utils/slugify'); -const { fetchApiLatest, fetchScene } = require('./gamma'); - -async function fetchToken(site) { - const res = await bhttp.get(site.url); - const html = res.body.toString(); - - const time = html.match(/"aet":\d+/)[0].split(':')[1]; - const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1); - const token = ah.split('').reverse().join(''); - - return { time, token }; -} - -async function fetchActors(entryId, site, { token, time }) { - const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`; - const res = await bhttp.get(url); - - if (res.statusCode === 200 && res.body.status === true) { - return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName); - } - - return []; -} - -async function fetchTrailerLocation(entryId, site) { - const url = `${site.url}/api/download/${entryId}/hd1080/stream`; - - try { - const res = await bhttp.get(url, { - followRedirects: false, - }); - - if (res.statusCode === 302) { - return res.headers.location; - } - } catch (error) { - logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`); - } - - return null; -} - -async function scrapeScene(scene, site, tokens) { - const release = { - entryId: scene.id, - title: scene.title, - duration: scene.length, - site, - meta: { - tokens, // attach tokens to reduce number of requests required for deep fetching - }, - }; - - release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`; - release.date = new Date(scene.sites.collection[scene.id].publishDate); - release.poster = scene._resources.primary[0].url; - - if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias); - if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url); - - const [actors, trailer] = await Promise.all([ - fetchActors(release.entryId, site, tokens), - fetchTrailerLocation(release.entryId, site), - ]); - - release.actors = actors; - if (trailer) release.trailer = { src: trailer, quality: 1080 }; - - return release; -} - -function scrapeLatest(scenes, site, tokens) { - return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 }); -} - -async function fetchLatest(site, page = 1) { - if (site.parameters?.useGamma) { - return fetchApiLatest(site, page); - } - - const { time, token } = await fetchToken(site); - - // transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory - const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`; - const res = await bhttp.get(url); - - if (res.statusCode === 200 && res.body.status) { - return scrapeLatest(res.body.response.collection, site, { time, token }); - } - - return null; -} - -async function fetchNetworkScene(url, site, release) { - if (site.parameters?.useGamma) { - return fetchScene(url, site, release); - } - - const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching - const { pathname } = new URL(url); - const entryId = pathname.split('/')[2]; - - const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`; - const res = await bhttp.get(apiUrl); - - if (res.statusCode === 200 && res.body.status) { - return scrapeScene(res.body.response.collection[0], site, { time, token }); - } - - return null; -} - -module.exports = { - fetchLatest, - fetchScene: fetchNetworkScene, -}; diff --git a/src/scrapers/jayrock.js b/src/scrapers/jayrock.js index d5d6e5f0..280381ee 100644 --- a/src/scrapers/jayrock.js +++ b/src/scrapers/jayrock.js @@ -1,6 +1,41 @@ 'use strict'; +const Promise = require('bluebird'); + +const logger = require('../logger'); +const { fetchApiLatest } = require('./gamma'); const qu = require('../utils/qu'); +const http = require('../utils/http'); +const slugify = require('../utils/slugify'); + +async function fetchActors(entryId, channel, { token, time }) { + const url = `${channel.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`; + const res = await http.get(url); + + if (res.statusCode === 200 && res.body.status === true) { + return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName); + } + + return []; +} + +async function fetchTrailerLocation(entryId, channel) { + const url = `${channel.url}/api/download/${entryId}/hd1080/stream`; + + try { + const res = await http.get(url, null, { + followRedirects: false, + }); + + if (res.statusCode === 302) { + return res.headers.location; + } + } catch (error) { + logger.warn(`${channel.name}: Unable to fetch trailer at '${url}': ${error.message}`); + } + + return null; +} function scrapeLatest(items, channel) { return items.map(({ query }) => { @@ -79,7 +114,78 @@ function scrapeScene({ query, html }, url, channel) { return release; } -async function fetchLatest(channel, page = 1) { +async function scrapeSceneApi(scene, channel, tokens, deep) { + const release = { + entryId: scene.id, + title: scene.title, + duration: scene.length, + meta: { + tokens, // attach tokens to reduce number of requests required for deep fetching + }, + }; + + release.url = `${channel.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`; + release.date = new Date(scene.sites.collection[scene.id].publishDate); + release.poster = scene._resources.primary[0].url; + + if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias); + if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url); + + if (deep) { + // don't make external requests during update scraping, as this would happen for every scene on the page + const [actors, trailer] = await Promise.all([ + fetchActors(release.entryId, channel, tokens), + fetchTrailerLocation(release.entryId, channel), + ]); + + release.actors = actors; + + if (trailer) { + release.trailer = { src: trailer, quality: 1080 }; + } + } + + return release; +} + +function scrapeLatestApi(scenes, site, tokens) { + return Promise.map(scenes, async scene => scrapeSceneApi(scene, site, tokens, false), { concurrency: 10 }); +} + +async function fetchToken(channel) { + const res = await http.get(channel.url); + const html = res.body.toString(); + + const time = html.match(/"aet":\d+/)[0].split(':')[1]; + const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1); + const token = ah.split('').reverse().join(''); + + return { time, token }; +} + +async function fetchLatestApi(channel, page = 1) { + const { time, token } = await fetchToken(channel); + + // transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory + const url = `${channel.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`; + const res = await http.get(url); + + if (res.statusCode === 200 && res.body.status) { + return scrapeLatestApi(res.body.response.collection, channel, { time, token }); + } + + return null; +} + +async function fetchLatest(channel, page = 1, options, preData) { + if (channel.parameters?.useApi) { + return fetchLatestApi(channel, page, options, preData); + } + + if (channel.parameters?.useGamma) { + return fetchApiLatest(channel, page, preData, options, false); + } + const res = await qu.getAll(`https://jayspov.net/jays-pov-updates.html?view=list&page=${page}`, '.item-grid-list-view > .grid-item'); if (res.ok) { @@ -89,7 +195,26 @@ async function fetchLatest(channel, page = 1) { return res.status; } +async function fetchSceneApi(url, channel, baseRelease) { + const { time, token } = baseRelease?.meta.tokens || await fetchToken(channel); // use attached tokens when deep fetching + const { pathname } = new URL(url); + const entryId = pathname.split('/')[2]; + + const apiUrl = `${channel.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`; + const res = await http.get(apiUrl); + + if (res.ok && res.body.status) { + return scrapeSceneApi(res.body.response.collection[0], channel, { time, token }, true); + } + + return res.ok ? res.body.status : res.status; +} + async function fetchScene(url, channel) { + if (channel.parameters?.useApi) { + return fetchSceneApi(url, channel); + } + const res = await qu.get(url); if (res.ok) { diff --git a/src/utils/argv-include.js b/src/utils/argv-include.js index 0ad65b3f..7c7853ba 100644 --- a/src/utils/argv-include.js +++ b/src/utils/argv-include.js @@ -1,19 +1,32 @@ 'use strict'; function include(argv) { + const options = { + includeCovers: argv.media && argv.covers, + includeMedia: argv.media, + includePhotos: argv.media && argv.photos, + includeVideos: argv.media && argv.videos, + includePosters: argv.media && argv.posters, + includeTeasers: argv.media && argv.videos && argv.teasers, + includeTrailers: argv.media && argv.videos && argv.trailers, + includeActorScenes: argv.actorsScenes, + }; + return { - covers: argv.media && argv.covers, - media: argv.media, - photos: argv.media && argv.photos, - poster: argv.media && argv.posters, - posters: argv.media && argv.posters, - releases: argv.actorsScenes, - scenes: argv.actorsScenes, - teaser: argv.media && argv.videos && argv.teasers, - teasers: argv.media && argv.videos && argv.teasers, - trailer: argv.media && argv.videos && argv.trailers, - trailers: argv.media && argv.videos && argv.trailers, - videos: argv.videos, + ...options, + // legacy + covers: include.includeCovers, + media: include.includeMedia, + photos: include.includePhotos, + videos: include.includeVideos, + poster: include.includePosters, + posters: include.includePosters, + teaser: include.includeTeasers, + teasers: include.includeTeasers, + trailer: include.includeTrailers, + trailers: include.includeTrailers, + releases: include.includeActorScenes, + scenes: include.includeActorScenes, }; }