Added Diabolic and Cum Louder, added content type expect option to media sources to fix Vixen thumbnails.

This commit is contained in:
DebaucheryLibrarian
2021-08-09 10:31:12 +02:00
parent 65c3053b49
commit a848d6991b
259 changed files with 880 additions and 15825 deletions

109
src/scrapers/cumlouder.js Normal file
View File

@@ -0,0 +1,109 @@
'use strict';
const { decode } = require('html-entities');
const qu = require('../utils/qu');
function scrapeAll(items, _channel) {
return items.map(({ query }) => {
const release = {};
const { date, precision } = query.dateAgo('.fecha');
const poster = query.img('.thumb');
release.entryId = query.number(null, /\d+/, 'onclick');
release.url = query.url(null, 'href', { origin: 'https://www.cumlouder.com' });
release.date = date;
release.datePrecision = precision;
release.title = query.cnt('h2');
release.duration = query.duration('.minutos');
release.poster = [
poster.replace(/\/(\w+)\.jpg/, '/previewhd.jpg'),
poster,
];
console.log(release);
return release;
});
}
function scrapeScene({ query }, channel, html) {
const release = {};
const { date, precision } = query.dateAgo('.sub-video .added');
release.entryId = html.match(/cumlouder_(\d+)/)?.[1];
release.title = query.cnt('.video-top h1');
release.description = query.text('.sub-video p');
release.date = date;
release.datePrecision = precision;
release.actors = query.all('.sub-video .pornstar-link').map(el => ({
name: query.cnt(el, null),
url: query.url(el, null, 'href', { origin: 'https://www.cumlouder.com' }),
}));
release.duration = query.duration('.video-top .duracion');
release.tags = query.cnts('.video-top .tag-link');
release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1];
release.trailer = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]);
release.shootId = release.poster?.match(/\/rc(\d+)/)?.[1] || release.trailer?.match(/\/episodio_(\d+)/)?.[1];
console.log(release);
return release;
}
function scrapeProfile({ query }) {
const profile = {};
profile.description = query.cnt('.data-bio p:last-of-type');
profile.avatar = query.img('.thumb-bio');
console.log(profile);
return profile;
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`${channel.url}/${page}/`, '.muestra-escena');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, channel, res.html);
}
return res.status;
}
async function fetchProfile(actor) {
const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas');
if (res.ok) {
return scrapeProfile(res.item);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -12,6 +12,41 @@ const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function getApiUrl(appId, apiKey) {
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
return {
appId,
apiKey,
userAgent,
apiUrl,
};
}
async function fetchApiCredentials(referer, site) {
if (site?.parameters?.appId && site?.parameters?.apiKey) {
return getApiUrl(site.parameters.appId, site.parameters.apiKey);
}
const res = await http.get(referer);
const body = res.body.toString();
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
if (!apiLine) {
throw new Error(`No Gamma API key found for ${referer}`);
}
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
const apiData = JSON.parse(apiSerial);
const { applicationID: appId, apiKey } = apiData.api.algolia;
return getApiUrl(appId, apiKey);
}
function getAlbumUrl(albumPath, site) {
if (site.parameters?.photos) {
return /^http/.test(site.parameters.photos)
@@ -100,6 +135,58 @@ async function getPhotos(albumPath, site, includeThumbnails = true) {
}
}
async function getFullPhotos(entryId, site) {
const res = await http.get(`${site.url}/media/signPhotoset/${entryId}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.ok) {
return Object.values(res.body);
}
return [];
}
async function getThumbs(entryId, site, parameters) {
const referer = parameters?.referer || `${parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer, site);
const res = await http.post(apiUrl, {
requests: [
{
indexName: 'all_photosets',
params: `query=&page=0&facets=[]&tagFilters=&facetFilters=[["set_id:${entryId}"]]`,
},
],
}, {
headers: {
Referer: referer,
},
}, {
encodeJSON: true,
});
if (res.ok && res.body.results?.[0]?.hits[0]?.set_pictures) {
return res.body.results[0].hits[0].set_pictures.map(img => ([
`https://transform.gammacdn.com/photo_set${img.thumb_path}`,
`https://images-evilangel.gammacdn.com/photo_set${img.thumb_path}`,
]));
}
return [];
}
async function getPhotosApi(entryId, site, parameters) {
const [photos, thumbs] = await Promise.all([
getFullPhotos(entryId, site, parameters),
getThumbs(entryId, site, parameters),
]);
return photos.concat(thumbs.slice(photos.length));
}
async function scrapeApiReleases(json, site) {
return json.map((scene) => {
if (site.parameters?.extract && scene.sitename !== site.parameters.extract) {
@@ -224,7 +311,7 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml, options) {
if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
else release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate;
else release.date = videoData?.playerOptions?.sceneInfos.sceneReleaseDate;
if (data) {
release.description = data.description;
@@ -260,7 +347,7 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml, options) {
if (channel) release.channel = slugify(channel, '');
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
if (videoData?.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
const photoLink = $('.picturesItem a').attr('href');
const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : [];
@@ -274,38 +361,41 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml, options) {
release.photos = mobilePhotos;
}
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
release.trailer = [
{
src: trailer.replace('hd', 'sm'),
quality: 240,
},
{
src: trailer.replace('hd', 'med'),
quality: 360,
},
{
src: trailer.replace('hd', 'big'),
quality: 480,
},
{
// probably 540p
src: trailer,
quality: parseInt(videoData.sizeOnLoad, 10),
},
{
src: trailer.replace('hd', '720p'),
quality: 720,
},
{
src: trailer.replace('hd', '1080p'),
quality: 1080,
},
{
src: trailer.replace('hd', '4k'),
quality: 2160,
},
];
const trailer = videoData && `${videoData.playerOptions.host}${videoData.url}`;
if (trailer) {
release.trailer = [
{
src: trailer.replace('hd', 'sm'),
quality: 240,
},
{
src: trailer.replace('hd', 'med'),
quality: 360,
},
{
src: trailer.replace('hd', 'big'),
quality: 480,
},
{
// probably 540p
src: trailer,
quality: parseInt(videoData.sizeOnLoad, 10),
},
{
src: trailer.replace('hd', '720p'),
quality: 720,
},
{
src: trailer.replace('hd', '1080p'),
quality: 1080,
},
{
src: trailer.replace('hd', '4k'),
quality: 2160,
},
];
}
const movie = $('.dvdLink');
const movieUrl = qu.prefixUrl(movie.attr('href'), site.url);
@@ -322,6 +412,59 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml, options) {
return release;
}
async function scrapeSceneApi(data, site, options) {
const release = {};
release.entryId = data.clip_id;
release.title = data.title;
release.duration = data.length;
release.date = new Date(data.date * 1000) || qu.parseDate(data.release_date, 'YYYY-MM-DD');
release.actors = data.actors.map(actor => ({
entryId: actor.actor_id,
name: actor.name,
gender: actor.gender,
url: options.parameters?.actors
? format(options.parameters.actors, { id: actor.actor_id, slug: actor.url_name })
: qu.prefixUrl(`/en/pornstar/${actor.url_name}/${data.actor_id}`, site.url),
}));
release.tags = data.categories.map(category => category.name);
if (data.pictures) {
release.poster = [
`https://transform.gammacdn.com/movies${data.pictures['1920x1080']}`,
`https://images-evilangel.gammacdn.com/movies${data.pictures['1920x1080']}`,
`https://transform.gammacdn.com/movies${data.pictures.resized}`,
`https://images-evilangel.gammacdn.com/movies${data.pictures.resized}`,
];
}
if (data.photoset_id && options.includePhotos) {
release.photos = await getPhotosApi(data.photoset_id, site, options.parameters);
}
if (data.trailers) {
release.trailer = Object.entries(data.trailers).map(([quality, source]) => ({ src: source, quality }));
}
if (data.movie_id) {
release.movie = {
entryId: data.movie_id,
title: data.movie_title,
url: qu.prefixUrl(`/en/movie/${data.url_movie_title}/${data.movie_id}`, site.url),
};
}
release.channel = data.sitename;
release.qualities = data.download_sizes;
console.log(data);
console.log(release);
return release;
}
async function fetchMovieTrailer(release) {
if (!release.entryId) {
return null;
@@ -469,42 +612,7 @@ function scrapeApiProfile(data, releases, siteSlug) {
return profile;
}
function getApiUrl(appId, apiKey) {
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
return {
appId,
apiKey,
userAgent,
apiUrl,
};
}
async function fetchApiCredentials(referer, site) {
if (site?.parameters?.appId && site?.parameters?.apiKey) {
return getApiUrl(site.parameters.appId, site.parameters.apiKey);
}
const res = await http.get(referer);
const body = res.body.toString();
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
if (!apiLine) {
throw new Error(`No Gamma API key found for ${referer}`);
}
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
const apiData = JSON.parse(apiSerial);
const { applicationID: appId, apiKey } = apiData.api.algolia;
return getApiUrl(appId, apiKey);
}
async function fetchApiLatest(site, page = 1, preData, include, upcoming = false) {
async function fetchLatestApi(site, page = 1, preData, include, upcoming = false) {
const referer = site.parameters?.referer || `${site.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer, site);
@@ -530,8 +638,40 @@ async function fetchApiLatest(site, page = 1, preData, include, upcoming = false
return res.status;
}
async function fetchApiUpcoming(site, page = 1, preData, include) {
return fetchApiLatest(site, page, preData, include, true);
async function fetchUpcomingApi(site, page = 1, preData, include) {
return fetchLatestApi(site, page, preData, include, true);
}
async function fetchSceneApi(url, site, baseRelease, options) {
const referer = options.parameters?.referer || `${site.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer, site);
const entryId = (baseRelease?.path || new URL(url).pathname).match(/\/(\d{2,})(\/|$)/)?.[1];
const res = await http.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&page=0&facets=[]&tagFilters=&facetFilters=[["clip_id:${entryId}"]]`,
},
{
indexName: 'all_scenes',
params: 'query=&page=0&hitsPerPage=1&attributesToRetrieve=[]&attributesToHighlight=[]&attributesToSnippet=[]&tagFilters=&analytics=false&clickAnalytics=false&facets=clip_id',
},
],
}, {
headers: {
Referer: referer,
},
}, {
encodeJSON: true,
});
if (res.status === 200 && res.body.results?.[0]?.hits) {
return scrapeSceneApi(res.body.results[0].hits[0], site, options);
}
return res.status;
}
function getLatestUrl(site, page) {
@@ -591,6 +731,8 @@ function getDeepUrl(url, site, baseRelease, mobile) {
const sceneId = baseRelease?.entryId || pathname.match(/\/(\d+)\//)?.[1];
console.log(pathname);
if (mobile && /%d/.test(mobile)) {
return util.format(mobile, sceneId);
}
@@ -739,19 +881,23 @@ async function fetchApiProfile({ name: actorName }, context, include) {
}
module.exports = {
fetchApiLatest,
fetchApiLatest: fetchLatestApi,
fetchApiProfile,
fetchApiUpcoming,
fetchApiUpcoming: fetchUpcomingApi,
fetchLatest,
fetchLatestApi,
fetchMovie,
fetchProfile,
fetchScene,
fetchSceneApi,
fetchUpcoming,
fetchUpcomingApi,
api: {
fetchLatest: fetchApiLatest,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchLatestApi,
fetchUpcoming: fetchUpcomingApi,
fetchProfile: fetchApiProfile,
fetchScene,
// fetchScene,
fetchScene: fetchSceneApi,
fetchMovie,
},
getPhotos,

View File

@@ -11,6 +11,7 @@ const bang = require('./bang');
const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const cherrypimps = require('./cherrypimps');
const cumlouder = require('./cumlouder');
const czechav = require('./czechav');
const ddfnetwork = require('./ddfnetwork');
const dogfart = require('./dogfart');
@@ -81,6 +82,7 @@ const scrapers = {
blowpass,
brazzers: mindgeek,
cherrypimps,
cumlouder,
czechav,
pornworld: ddfnetwork,
dogfart,
@@ -175,6 +177,7 @@ const scrapers = {
brazzers: mindgeek,
burningangel: gamma,
cherrypimps,
cumlouder,
deeper: vixen,
deeplush: nubiles,
devilsfilm: famedigital,

View File

@@ -22,7 +22,13 @@ function getPosterFallbacks(poster) {
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat();
.flat()
.map(src => ({
src,
expectType: {
'binary/octet-stream': 'image/jpeg',
},
}));
}
function getTeaserFallbacks(teaser) {
@@ -135,6 +141,26 @@ async function getTrailer(scene, channel, url) {
return null;
}
async function getPhotos(url) {
const htmlRes = await http.get(url, {
extract: {
runScripts: 'dangerously',
},
});
const state = htmlRes?.window.__APOLLO_STATE__;
const key = Object.values(state.ROOT_QUERY).find(query => query?.__ref)?.__ref;
const data = state[key];
console.log(data);
if (!data) {
return [];
}
return data.carousel.slice(1).map(photo => photo.main?.[0].src).filter(Boolean);
}
function scrapeAll(scenes, site, origin) {
return scenes.map((scene) => {
const release = {};
@@ -183,7 +209,7 @@ function scrapeUpcoming(scene, site) {
return [release];
}
async function scrapeScene(data, url, site, baseRelease) {
async function scrapeScene(data, url, site, baseRelease, options) {
const scene = data.video;
const release = {
@@ -206,7 +232,11 @@ async function scrapeScene(data, url, site, baseRelease) {
release.actors = baseRelease?.actors || scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.photos = data.pictureset.map(photo => photo.main[0].src);
// release.photos = data.pictureset.map(photo => photo.main[0]?.src).filter(Boolean);
if (options.includePhotos) {
release.photos = await getPhotos(url);
}
release.teaser = getTeaserFallbacks(scene.previews.poster);
@@ -300,15 +330,19 @@ async function fetchUpcoming(site) {
return res.status;
}
async function fetchScene(url, site, baseRelease) {
async function fetchScene(url, site, baseRelease, options) {
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api/${pathname.split('/').slice(-1)[0]}`;
const res = await http.get(apiUrl);
const res = await http.get(apiUrl, {
extract: {
runScripts: 'dangerously',
},
});
if (res.ok) {
if (res.body.data) {
return scrapeScene(res.body.data, url, site, baseRelease);
return scrapeScene(res.body.data, url, site, baseRelease, options);
}
return null;