Returning window.document instead of element as document from q. Fixed actor collisions when scrapers return same scene multiple times. Scraping all Score actor release pages. Fixed 21Sextury and PureTaboo photo scraping.

This commit is contained in:
2020-02-05 23:57:55 +01:00
parent 75dbe2548a
commit d4801bb240
6 changed files with 36 additions and 13 deletions

View File

@@ -2,7 +2,7 @@
const bhttp = require('bhttp');
const { ex, exa } = require('../utils/q');
const { ex, exa, get } = require('../utils/q');
const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
@@ -132,7 +132,24 @@ function scrapeModels(html, actorName) {
return model?.href || null;
}
function scrapeProfile(html) {
async function fetchActorReleases(url, accReleases = []) {
const { document, qu } = await get(url);
if (document) {
const releases = accReleases.concat(scrapeAll(document.body.outerHTML));
const nextPage = qu('.next-pg');
if (nextPage && new URL(nextPage).searchParams.has('page')) {
return fetchActorReleases(nextPage, releases);
}
return releases;
}
return null;
}
async function scrapeProfile(html) {
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
@@ -170,8 +187,10 @@ function scrapeProfile(html) {
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
const releases = ex(html, '#model-page + .container, #model-page + .container-fluid');
if (releases) profile.releases = scrapeAll(releases.document.outerHTML);
const { qu } = ex(html, '#model-page + .container, #model-page + .container-fluid');
const releasesPage = qu('.next-pg');
if (releasesPage) profile.releases = await fetchActorReleases(releasesPage);
return profile;
}