Using new HTTP module with a dynamic rate limiter.

This commit is contained in:
DebaucheryLibrarian
2020-11-22 04:07:09 +01:00
parent 5d0fe44130
commit b9b777c621
27 changed files with 358 additions and 175 deletions

View File

@@ -7,7 +7,7 @@ const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
const qu = require('../utils/qu');
const http = require('../utils/http');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@@ -82,7 +82,7 @@ async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
async function getPhotos(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const res = await bhttp.get(albumUrl);
const res = await http.get(albumUrl);
const html = res.body.toString();
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
@@ -135,25 +135,25 @@ function getEntryId(html) {
}
function scrapeAll(scenes, site, entryIdFromTitle) {
return scenes.map(({ el, qu }) => {
return scenes.map(({ el, query }) => {
const release = {};
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.url = query.url('.update_title a, .dvd_info > a, a ~ a');
release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true);
release.date = query.date('.update_date', 'MM/DD/YYYY');
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
release.actors = qu.all('.update_models a', true);
release.actors = query.all('.update_models a', true);
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
const dvdPhotos = query.imgs('.dvd_preview_thumb');
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
const prefixedSrc = prefixUrl(src, site.url);
const src = query.img('a img.thumbs', `src${index}_1x`) || query.img('a img.thumbs', `src${index}`) || query.img('a img.thumbs');
const prefixedSrc = qu.prefixUrl(src, site.url);
if (src) {
return [
@@ -183,7 +183,7 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
return null;
}).filter(Boolean);
const teaserScript = qu.html('script');
const teaserScript = query.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
@@ -236,17 +236,17 @@ function scrapeUpcoming(html, site) {
});
}
async function scrapeScene({ html, qu }, url, site, include) {
async function scrapeScene({ html, query }, url, site, include) {
const release = { url, site };
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.title = query.q('.title_bar_hilite', true);
release.description = query.q('.update_description', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = query.all('.update_tags a', true);
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
@@ -280,14 +280,14 @@ async function scrapeScene({ html, qu }, url, site, include) {
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (qu.exists('.update_dvds a')) {
if (query.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
url: query.url('.update_dvds a'),
title: query.q('.update_dvds a', true),
};
}
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
return release;
@@ -302,7 +302,7 @@ function scrapeMovie({ el, query }, url, site) {
movie.channel = slugify(query.q('.update_date a', true), '');
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const sceneQus = qu.initAll(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes
@@ -332,7 +332,7 @@ function scrapeProfile(html, url, actorName, entity) {
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (heightString) profile.height = heightToCm(heightString[0]);
@@ -354,7 +354,7 @@ function scrapeProfile(html, url, actorName, entity) {
avatarEl.getAttribute('src'),
]
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map(avatar => prefixUrl(avatar, entity.url));
.map(avatar => qu.prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
@@ -370,7 +370,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
: `${site.url}/trial/categories/movies_${page}_d.html`;
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
const res = await qu.getAll(url, '.update_details');
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
}
@@ -389,13 +389,13 @@ async function fetchUpcoming(site) {
}
async function fetchScene(url, site, baseRelease, include) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
}
async function fetchMovie(url, site) {
const res = await get(url);
const res = await qu.get(url);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}