Using new HTTP module with a dynamic rate limiter.

This commit is contained in:
DebaucheryLibrarian
2020-11-22 04:07:09 +01:00
parent 5d0fe44130
commit b9b777c621
27 changed files with 358 additions and 175 deletions

View File

@@ -1,8 +1,6 @@
'use strict';
const bhttp = require('@thependulum/bhttp');
const { post } = require('../utils/http');
const http = require('../utils/http');
const { extractDate } = require('../utils/qu');
const { inchesToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@@ -84,7 +82,7 @@ function scrapeAll(scenes) {
}
async function fetchActorReleases(actor) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
query: {
bool: {
@@ -179,7 +177,7 @@ async function scrapeProfile(actor, include) {
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
@@ -269,7 +267,7 @@ async function fetchScene(url) {
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
@@ -279,7 +277,7 @@ async function fetchScene(url) {
}
async function fetchProfile({ name: actorName }, context, include) {
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
@@ -306,8 +304,11 @@ async function fetchProfile({ name: actorName }, context, include) {
},
},
}, {
Authorization: `Basic ${authKey}`,
}, { encodeJSON: true });
headers: {
Authorization: `Basic ${authKey}`,
},
encodeJSON: true,
});
if (res.ok) {
const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase());

View File

@@ -41,7 +41,9 @@ function scrapeScene({ query }, url, channel) {
}));
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
release.poster = query.sourceSet('.player img', 'data-srcset');
const fallbackPoster = query.img('.player img');
release.poster = query.sourceSet('.player img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
release.movie = {
title: query.cnt('.movie a'),

View File

@@ -7,7 +7,7 @@ const cheerio = require('cheerio');
const moment = require('moment');
const logger = require('../logger')(__filename);
const { ex, get } = require('../utils/q');
const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
@@ -318,7 +318,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
const url = getActorReleasesUrl(profilePath, page);
const res = await get(url);
const res = await qu.get(url);
if (!res.ok) return [];
@@ -333,14 +333,14 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
}
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
const { q } = ex(html);
const { query } = qu.extract(html);
const avatar = q('img.actorPicture');
const hair = q('.actorProfile .attribute_hair_color', true);
const height = q('.actorProfile .attribute_height', true);
const weight = q('.actorProfile .attribute_weight', true);
const alias = q('.actorProfile .attribute_alternate_names', true);
const nationality = q('.actorProfile .attribute_home', true);
const avatar = query.el('img.actorPicture');
const hair = query.cnt('.actorProfile .attribute_hair_color');
const height = query.cnt('.actorProfile .attribute_height');
const weight = query.cnt('.actorProfile .attribute_weight');
const alias = query.cnt('.actorProfile .attribute_alternate_names');
const nationality = query.cnt('.actorProfile .attribute_home');
const profile = {
name: actorName,
@@ -358,7 +358,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
profile.avatar = avatars;
}
profile.description = q('.actorBio p:not(.bioTitle)', true);
profile.description = query.cnt('.actorBio p:not(.bioTitle)');
if (hair) profile.hair = hair.split(':')[1].trim();
if (height) profile.height = Number(height.match(/\d+/)[0]);

View File

@@ -129,7 +129,9 @@ async function fetchProfile(baseActor, entity, include) {
const searchRes = await http.post('https://tour.hitzefrei.com/search-preview', {
q: baseActor.name,
}, {
'Accept-Language': 'en-US',
headers: {
'Accept-Language': 'en-US',
},
});
if (searchRes.ok) {

View File

@@ -115,7 +115,7 @@ async function scrapeSceneAlt({ query }, url, channel, session) {
release.trailer = query.video();
if (!release.trailer) {
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, null, { useSession: session });
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
if (trailerRes.ok) {
release.trailer = trailerRes.body;
@@ -153,7 +153,7 @@ async function fetchLatest(site, page = 1) {
async function fetchScene(url, site) {
const session = http.session();
const res = await qu.get(url, null, null, { useSession: session });
const res = await qu.get(url, null, null, { session });
if (res.ok) {
if (site.parameters?.scraper === 'alt') {

View File

@@ -23,7 +23,7 @@ async function fetchTrailerLocation(entryId, channel) {
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
try {
const res = await http.get(url, null, {
const res = await http.get(url, {
followRedirects: false,
});

View File

@@ -7,7 +7,7 @@ const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
const qu = require('../utils/qu');
const http = require('../utils/http');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@@ -82,7 +82,7 @@ async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
async function getPhotos(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const res = await bhttp.get(albumUrl);
const res = await http.get(albumUrl);
const html = res.body.toString();
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
@@ -135,25 +135,25 @@ function getEntryId(html) {
}
function scrapeAll(scenes, site, entryIdFromTitle) {
return scenes.map(({ el, qu }) => {
return scenes.map(({ el, query }) => {
const release = {};
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.url = query.url('.update_title a, .dvd_info > a, a ~ a');
release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true);
release.date = query.date('.update_date', 'MM/DD/YYYY');
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
release.actors = qu.all('.update_models a', true);
release.actors = query.all('.update_models a', true);
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
const dvdPhotos = query.imgs('.dvd_preview_thumb');
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
const prefixedSrc = prefixUrl(src, site.url);
const src = query.img('a img.thumbs', `src${index}_1x`) || query.img('a img.thumbs', `src${index}`) || query.img('a img.thumbs');
const prefixedSrc = qu.prefixUrl(src, site.url);
if (src) {
return [
@@ -183,7 +183,7 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
return null;
}).filter(Boolean);
const teaserScript = qu.html('script');
const teaserScript = query.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
@@ -236,17 +236,17 @@ function scrapeUpcoming(html, site) {
});
}
async function scrapeScene({ html, qu }, url, site, include) {
async function scrapeScene({ html, query }, url, site, include) {
const release = { url, site };
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.title = query.q('.title_bar_hilite', true);
release.description = query.q('.update_description', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = query.all('.update_tags a', true);
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
@@ -280,14 +280,14 @@ async function scrapeScene({ html, qu }, url, site, include) {
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (qu.exists('.update_dvds a')) {
if (query.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
url: query.url('.update_dvds a'),
title: query.q('.update_dvds a', true),
};
}
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
return release;
@@ -302,7 +302,7 @@ function scrapeMovie({ el, query }, url, site) {
movie.channel = slugify(query.q('.update_date a', true), '');
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const sceneQus = qu.initAll(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes
@@ -332,7 +332,7 @@ function scrapeProfile(html, url, actorName, entity) {
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (heightString) profile.height = heightToCm(heightString[0]);
@@ -354,7 +354,7 @@ function scrapeProfile(html, url, actorName, entity) {
avatarEl.getAttribute('src'),
]
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map(avatar => prefixUrl(avatar, entity.url));
.map(avatar => qu.prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
@@ -370,7 +370,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
: `${site.url}/trial/categories/movies_${page}_d.html`;
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
const res = await qu.getAll(url, '.update_details');
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
}
@@ -389,13 +389,13 @@ async function fetchUpcoming(site) {
}
async function fetchScene(url, site, baseRelease, include) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
}
async function fetchMovie(url, site) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}

View File

@@ -97,8 +97,10 @@ async function scrapeScene({ query, html }, url, baseRelease) {
const token = query.meta('name=_token');
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
'X-CSRF-Token': token,
'X-Requested-With': 'XMLHttpRequest',
headers: {
'X-CSRF-Token': token,
'X-Requested-With': 'XMLHttpRequest',
},
});
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
@@ -136,7 +138,9 @@ function scrapeProfile({ query }) {
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await http.get(url, {
'X-Requested-With': 'XMLHttpRequest',
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.ok && res.body.status === 'success') {
@@ -157,7 +161,9 @@ async function fetchScene(url, channel, baseRelease) {
async function fetchProfile({ name: actorName }) {
const actorSlug = slugify(actorName);
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
'X-Requested-With': 'XMLHttpRequest',
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.ok) {

View File

@@ -6,7 +6,7 @@ const moment = require('moment');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { ex, get } = require('../utils/q');
const qu = require('../utils/q');
function titleExtractor(pathname) {
const components = pathname.split('/')[2].split('-');
@@ -102,24 +102,24 @@ function scrapeScene(html, url, site) {
}
async function fetchActorReleases(url) {
const res = await get(url);
const res = await qu.get(url);
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
? res.item.query.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
}
async function scrapeProfile(html) {
const { qu } = ex(html);
const { query } = qu.extract(html);
const profile = {};
profile.description = qu.q('.bio_about_text', true);
profile.description = query.q('.bio_about_text', true);
const avatar = qu.q('img.performer-pic', 'src');
const avatar = query.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const releases = query.urls('.scene-item > a:first-child');
const otherPages = query.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
profile.releases = releases.concat(olderReleases.flat());

View File

@@ -71,10 +71,10 @@ async function fetchLatest(channel, page = 1) {
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
for (let i = 0; i < page - 1; i += 1) {
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
}
const res = await http.get(url, headers, { useSession: session });
const res = await http.get(url, { headers, session });
if (res.ok) {
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');

View File

@@ -74,9 +74,14 @@ async function scrapeScene({ query }, url) {
release.photos = query.imgs('.detail-grabs img');
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
host,
referer: url,
}, { queueMethod: '5s' });
headers: {
host,
referer: url,
},
}, {
interval: 5000,
concurrency: 1,
});
if (streamData.ok && streamData.body.status === 'success') {
release.trailer = {

View File

@@ -4,7 +4,7 @@
const Promise = require('bluebird');
const moment = require('moment');
const { get, post } = require('../utils/http');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
@@ -45,13 +45,15 @@ function getAvatarFallbacks(avatar) {
async function getTrailer(scene, site, url) {
const qualities = [360, 480, 720, 1080, 2160];
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, {
referer: url,
origin: site.url,
headers: {
referer: url,
origin: site.url,
},
});
if (!tokenRes.ok) {
@@ -59,7 +61,7 @@ async function getTrailer(scene, site, url) {
}
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await post(trailerUrl, null, { referer: url });
const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } });
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
@@ -155,7 +157,7 @@ async function scrapeScene(data, url, site, baseRelease) {
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await get(url);
const res = await http.get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
@@ -203,7 +205,7 @@ async function scrapeProfile(data, origin, withReleases) {
async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await get(url);
const res = await http.get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos, site);
@@ -214,7 +216,7 @@ async function fetchLatest(site, page = 1) {
async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await get(apiUrl);
const res = await http.get(apiUrl);
if (res.code === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
@@ -227,7 +229,7 @@ async function fetchScene(url, site, baseRelease) {
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api${pathname}`;
const res = await get(apiUrl);
const res = await http.get(apiUrl);
if (res.code === 200) {
return scrapeScene(res.body.data, url, site, baseRelease);
@@ -240,7 +242,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
const origin = site.url;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await get(url);
const res = await http.get(url);
if (res.code === 200) {
return scrapeProfile(res.body.data, origin, include.scenes);