Merged legacy JayRock scraper into new scraper for CosPimps.
This commit is contained in:
parent
2063d66550
commit
8a22ff07a6
|
@ -3447,15 +3447,25 @@ const sites = [
|
|||
name: 'Jay\'s POV',
|
||||
url: 'https://jayspov.net',
|
||||
parent: 'jayrock',
|
||||
/* more recent scenes on own site
|
||||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
scene: false,
|
||||
},
|
||||
*/
|
||||
},
|
||||
{
|
||||
slug: 'cospimps',
|
||||
name: 'CosPimps',
|
||||
url: 'https://cospimps.com',
|
||||
parent: 'jayrock',
|
||||
parameters: {
|
||||
useApi: true,
|
||||
/* Gamma scenes are out of date
|
||||
referer: 'https://www.21sextury.com',
|
||||
scene: false,
|
||||
*/
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'blackforwife',
|
||||
|
@ -3463,14 +3473,9 @@ const sites = [
|
|||
url: 'https://www.blackforwife.com',
|
||||
parent: 'jayrock',
|
||||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
/*
|
||||
referer: 'https://freetour.adulttime.com/en/blackforwife',
|
||||
useGamma: true,
|
||||
referer: 'https://www.21sextury.com',
|
||||
scene: false,
|
||||
deep: 'https://21sextury.com/en/video',
|
||||
photos: false,
|
||||
*/
|
||||
},
|
||||
},
|
||||
// JESSE LOADS MONSTER FACIALS
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
/* example for other ModelCentro scrapers */
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const { fetchApiLatest, fetchScene } = require('./gamma');
|
||||
|
||||
async function fetchToken(site) {
|
||||
const res = await bhttp.get(site.url);
|
||||
const html = res.body.toString();
|
||||
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
|
||||
return { time, token };
|
||||
}
|
||||
|
||||
async function fetchActors(entryId, site, { token, time }) {
|
||||
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchTrailerLocation(entryId, site) {
|
||||
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, site, tokens) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, site, tokens),
|
||||
fetchTrailerLocation(release.entryId, site),
|
||||
]);
|
||||
|
||||
release.actors = actors;
|
||||
if (trailer) release.trailer = { src: trailer, quality: 1080 };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site, tokens) {
|
||||
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const { time, token } = await fetchToken(site);
|
||||
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatest(res.body.response.collection, site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchNetworkScene(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await bhttp.get(apiUrl);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeScene(res.body.response.collection[0], site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene: fetchNetworkScene,
|
||||
};
|
|
@ -1,6 +1,41 @@
|
|||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const logger = require('../logger');
|
||||
const { fetchApiLatest } = require('./gamma');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function fetchActors(entryId, channel, { token, time }) {
|
||||
const url = `${channel.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchTrailerLocation(entryId, channel) {
|
||||
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await http.get(url, null, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${channel.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatest(items, channel) {
|
||||
return items.map(({ query }) => {
|
||||
|
@ -79,7 +114,78 @@ function scrapeScene({ query, html }, url, channel) {
|
|||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
async function scrapeSceneApi(scene, channel, tokens, deep) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${channel.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
|
||||
if (deep) {
|
||||
// don't make external requests during update scraping, as this would happen for every scene on the page
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, channel, tokens),
|
||||
fetchTrailerLocation(release.entryId, channel),
|
||||
]);
|
||||
|
||||
release.actors = actors;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = { src: trailer, quality: 1080 };
|
||||
}
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestApi(scenes, site, tokens) {
|
||||
return Promise.map(scenes, async scene => scrapeSceneApi(scene, site, tokens, false), { concurrency: 10 });
|
||||
}
|
||||
|
||||
async function fetchToken(channel) {
|
||||
const res = await http.get(channel.url);
|
||||
const html = res.body.toString();
|
||||
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
|
||||
return { time, token };
|
||||
}
|
||||
|
||||
async function fetchLatestApi(channel, page = 1) {
|
||||
const { time, token } = await fetchToken(channel);
|
||||
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${channel.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatestApi(res.body.response.collection, channel, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, options, preData) {
|
||||
if (channel.parameters?.useApi) {
|
||||
return fetchLatestApi(channel, page, options, preData);
|
||||
}
|
||||
|
||||
if (channel.parameters?.useGamma) {
|
||||
return fetchApiLatest(channel, page, preData, options, false);
|
||||
}
|
||||
|
||||
const res = await qu.getAll(`https://jayspov.net/jays-pov-updates.html?view=list&page=${page}`, '.item-grid-list-view > .grid-item');
|
||||
|
||||
if (res.ok) {
|
||||
|
@ -89,7 +195,26 @@ async function fetchLatest(channel, page = 1) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneApi(url, channel, baseRelease) {
|
||||
const { time, token } = baseRelease?.meta.tokens || await fetchToken(channel); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
const apiUrl = `${channel.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await http.get(apiUrl);
|
||||
|
||||
if (res.ok && res.body.status) {
|
||||
return scrapeSceneApi(res.body.response.collection[0], channel, { time, token }, true);
|
||||
}
|
||||
|
||||
return res.ok ? res.body.status : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
if (channel.parameters?.useApi) {
|
||||
return fetchSceneApi(url, channel);
|
||||
}
|
||||
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
|
|
|
@ -1,19 +1,32 @@
|
|||
'use strict';
|
||||
|
||||
function include(argv) {
|
||||
const options = {
|
||||
includeCovers: argv.media && argv.covers,
|
||||
includeMedia: argv.media,
|
||||
includePhotos: argv.media && argv.photos,
|
||||
includeVideos: argv.media && argv.videos,
|
||||
includePosters: argv.media && argv.posters,
|
||||
includeTeasers: argv.media && argv.videos && argv.teasers,
|
||||
includeTrailers: argv.media && argv.videos && argv.trailers,
|
||||
includeActorScenes: argv.actorsScenes,
|
||||
};
|
||||
|
||||
return {
|
||||
covers: argv.media && argv.covers,
|
||||
media: argv.media,
|
||||
photos: argv.media && argv.photos,
|
||||
poster: argv.media && argv.posters,
|
||||
posters: argv.media && argv.posters,
|
||||
releases: argv.actorsScenes,
|
||||
scenes: argv.actorsScenes,
|
||||
teaser: argv.media && argv.videos && argv.teasers,
|
||||
teasers: argv.media && argv.videos && argv.teasers,
|
||||
trailer: argv.media && argv.videos && argv.trailers,
|
||||
trailers: argv.media && argv.videos && argv.trailers,
|
||||
videos: argv.videos,
|
||||
...options,
|
||||
// legacy
|
||||
covers: include.includeCovers,
|
||||
media: include.includeMedia,
|
||||
photos: include.includePhotos,
|
||||
videos: include.includeVideos,
|
||||
poster: include.includePosters,
|
||||
posters: include.includePosters,
|
||||
teaser: include.includeTeasers,
|
||||
teasers: include.includeTeasers,
|
||||
trailer: include.includeTrailers,
|
||||
trailers: include.includeTrailers,
|
||||
releases: include.includeActorScenes,
|
||||
scenes: include.includeActorScenes,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue