Returning window.document instead of element as document from q. Fixed actor collisions when scrapers return same scene multiple times. Scraping all Score actor release pages. Fixed 21Sextury and PureTaboo photo scraping.
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const { ex, exa } = require('../utils/q');
|
||||
const { ex, exa, get } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
@@ -132,7 +132,24 @@ function scrapeModels(html, actorName) {
|
||||
return model?.href || null;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
async function fetchActorReleases(url, accReleases = []) {
|
||||
const { document, qu } = await get(url);
|
||||
|
||||
if (document) {
|
||||
const releases = accReleases.concat(scrapeAll(document.body.outerHTML));
|
||||
const nextPage = qu('.next-pg');
|
||||
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) {
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
|
||||
@@ -170,8 +187,10 @@ function scrapeProfile(html) {
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
|
||||
const releases = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
||||
if (releases) profile.releases = scrapeAll(releases.document.outerHTML);
|
||||
const { qu } = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
||||
const releasesPage = qu('.next-pg');
|
||||
|
||||
if (releasesPage) profile.releases = await fetchActorReleases(releasesPage);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user