Updated Score scraper to accept site and with-releases argument.

This commit is contained in:
ThePendulum 2020-02-25 22:32:13 +01:00
parent 82e8ce432b
commit 646ff064a7
3 changed files with 11792 additions and 14511 deletions

26286
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -92,6 +92,7 @@
"longjohn": "^0.2.12",
"mime": "^2.4.4",
"moment": "^2.24.0",
"opencv4nodejs": "^5.5.0",
"opn": "^5.5.0",
"pg": "^7.18.1",
"postgraphile": "^4.5.5",

View File

@ -149,7 +149,7 @@ async function fetchActorReleases(url, accReleases = []) {
return null;
}
async function scrapeProfile(html, actorUrl) {
async function scrapeProfile(html, actorUrl, withReleases) {
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
@ -187,8 +187,10 @@ async function scrapeProfile(html, actorUrl) {
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
if (withReleases) {
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
}
return profile;
}
@ -215,7 +217,7 @@ async function fetchScene(url, site) {
return null;
}
async function fetchProfile(actorName, scraperSlug, page = 1, source = 0) {
async function fetchProfile(actorName, scraperSlug, site, withReleases, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase();
const sources = [
@ -236,17 +238,17 @@ async function fetchProfile(actorName, scraperSlug, page = 1, source = 0) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl);
return scrapeProfile(actorRes.body.toString(), actorUrl, withReleases);
}
return null;
}
return fetchProfile(actorName, scraperSlug, page + 1, source);
return fetchProfile(actorName, scraperSlug, site, withReleases, page + 1, source);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, 1, source + 1);
return fetchProfile(actorName, scraperSlug, site, withReleases, 1, source + 1);
}
return null;