Returning window.document instead of element as document from q. Fixed actor collisions when scrapers return same scene multiple times. Scraping all Score actor release pages. Fixed 21Sextury and PureTaboo photo scraping.
This commit is contained in:
parent
75dbe2548a
commit
d4801bb240
|
@ -456,7 +456,8 @@ async function associateActors(mappedActors, releases) {
|
|||
const actorEntry = existingActorEntries.find(actor => actor.slug === actorMap[actorName])
|
||||
|| await storeActor({ name: actorName });
|
||||
|
||||
return releaseIds
|
||||
// if a scene
|
||||
return Array.from(releaseIds)
|
||||
.map(releaseId => ({
|
||||
release_id: releaseId,
|
||||
actor_id: actorEntry.id,
|
||||
|
|
|
@ -292,8 +292,8 @@ function accumulateActors(releases) {
|
|||
release.actors.forEach((actor) => {
|
||||
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
||||
|
||||
if (!acc[actorName]) acc[actorName] = [];
|
||||
acc[actorName].push(release.id);
|
||||
if (!acc[actorName]) acc[actorName] = new Set();
|
||||
acc[actorName].add(release.id);
|
||||
});
|
||||
|
||||
return acc;
|
||||
|
@ -372,10 +372,11 @@ async function storeRelease(release) {
|
|||
site_id: release.site.id,
|
||||
})
|
||||
.first();
|
||||
|
||||
const curatedRelease = await curateReleaseEntry(release);
|
||||
|
||||
if (existingRelease && !argv.redownload) {
|
||||
return existingRelease.id;
|
||||
return existingRelease;
|
||||
}
|
||||
|
||||
if (existingRelease && argv.redownload) {
|
||||
|
@ -394,7 +395,7 @@ async function storeRelease(release) {
|
|||
|
||||
await associateTags(release, existingRelease.id);
|
||||
|
||||
return existingRelease.id;
|
||||
return existingRelease;
|
||||
}
|
||||
|
||||
const [releaseEntry] = await knex('releases')
|
||||
|
|
|
@ -90,7 +90,7 @@ async function scrapeScene(html, url, site) {
|
|||
const poster = videoData.picPreview;
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
|
||||
const photos = await getPhotos($('.picturesItem a').attr('href'), '21sextury.com', site);
|
||||
const photos = await getPhotos($('.picturesItem a').attr('href'), site);
|
||||
|
||||
const tags = data.keywords.split(', ');
|
||||
const siteName = data.productionCompany ? data.productionCompany.name : $('#logoLink a').attr('title');
|
||||
|
|
|
@ -69,7 +69,7 @@ async function scrapeScene(html, url, site) {
|
|||
src: `${videoData.playerOptions.host}${videoData.url}`,
|
||||
};
|
||||
|
||||
release.photos = await getPhotos(q('.picturesItem a').href, 'puretaboo.com', site);
|
||||
release.photos = await getPhotos(q('.picturesItem a').href, site);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const { ex, exa } = require('../utils/q');
|
||||
const { ex, exa, get } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
|
@ -132,7 +132,24 @@ function scrapeModels(html, actorName) {
|
|||
return model?.href || null;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
async function fetchActorReleases(url, accReleases = []) {
|
||||
const { document, qu } = await get(url);
|
||||
|
||||
if (document) {
|
||||
const releases = accReleases.concat(scrapeAll(document.body.outerHTML));
|
||||
const nextPage = qu('.next-pg');
|
||||
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) {
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
|
||||
|
@ -170,8 +187,10 @@ function scrapeProfile(html) {
|
|||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
|
||||
const releases = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
||||
if (releases) profile.releases = scrapeAll(releases.document.outerHTML);
|
||||
const { qu } = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
||||
const releasesPage = qu('.next-pg');
|
||||
|
||||
if (releasesPage) profile.releases = await fetchActorReleases(releasesPage);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
|
|
@ -161,8 +161,10 @@ function init(element, window) {
|
|||
|
||||
return {
|
||||
element,
|
||||
document: element,
|
||||
...(window && { window }),
|
||||
...(window && {
|
||||
window,
|
||||
document: window.document,
|
||||
}),
|
||||
...contextFuncs,
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue