Using new HTTP module with a dynamic rate limiter.

This commit is contained in:
DebaucheryLibrarian
2020-11-22 04:07:09 +01:00
parent 5d0fe44130
commit b9b777c621
27 changed files with 358 additions and 175 deletions

View File

@@ -6,7 +6,7 @@ const moment = require('moment');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { ex, get } = require('../utils/q');
const qu = require('../utils/q');
function titleExtractor(pathname) {
const components = pathname.split('/')[2].split('-');
@@ -102,24 +102,24 @@ function scrapeScene(html, url, site) {
}
async function fetchActorReleases(url) {
const res = await get(url);
const res = await qu.get(url);
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
? res.item.query.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
}
async function scrapeProfile(html) {
const { qu } = ex(html);
const { query } = qu.extract(html);
const profile = {};
profile.description = qu.q('.bio_about_text', true);
profile.description = query.q('.bio_about_text', true);
const avatar = qu.q('img.performer-pic', 'src');
const avatar = query.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const releases = query.urls('.scene-item > a:first-child');
const otherPages = query.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
profile.releases = releases.concat(olderReleases.flat());