forked from DebaucheryLibrarian/traxxx
Returning window.document instead of element as document from q. Fixed actor collisions when scrapers return same scene multiple times. Scraping all Score actor release pages. Fixed 21Sextury and PureTaboo photo scraping.
This commit is contained in:
parent
75dbe2548a
commit
d4801bb240
|
@ -456,7 +456,8 @@ async function associateActors(mappedActors, releases) {
|
||||||
const actorEntry = existingActorEntries.find(actor => actor.slug === actorMap[actorName])
|
const actorEntry = existingActorEntries.find(actor => actor.slug === actorMap[actorName])
|
||||||
|| await storeActor({ name: actorName });
|
|| await storeActor({ name: actorName });
|
||||||
|
|
||||||
return releaseIds
|
// if a scene
|
||||||
|
return Array.from(releaseIds)
|
||||||
.map(releaseId => ({
|
.map(releaseId => ({
|
||||||
release_id: releaseId,
|
release_id: releaseId,
|
||||||
actor_id: actorEntry.id,
|
actor_id: actorEntry.id,
|
||||||
|
|
|
@ -292,8 +292,8 @@ function accumulateActors(releases) {
|
||||||
release.actors.forEach((actor) => {
|
release.actors.forEach((actor) => {
|
||||||
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
const actorName = actor.name ? actor.name.trim() : actor.trim();
|
||||||
|
|
||||||
if (!acc[actorName]) acc[actorName] = [];
|
if (!acc[actorName]) acc[actorName] = new Set();
|
||||||
acc[actorName].push(release.id);
|
acc[actorName].add(release.id);
|
||||||
});
|
});
|
||||||
|
|
||||||
return acc;
|
return acc;
|
||||||
|
@ -372,10 +372,11 @@ async function storeRelease(release) {
|
||||||
site_id: release.site.id,
|
site_id: release.site.id,
|
||||||
})
|
})
|
||||||
.first();
|
.first();
|
||||||
|
|
||||||
const curatedRelease = await curateReleaseEntry(release);
|
const curatedRelease = await curateReleaseEntry(release);
|
||||||
|
|
||||||
if (existingRelease && !argv.redownload) {
|
if (existingRelease && !argv.redownload) {
|
||||||
return existingRelease.id;
|
return existingRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (existingRelease && argv.redownload) {
|
if (existingRelease && argv.redownload) {
|
||||||
|
@ -394,7 +395,7 @@ async function storeRelease(release) {
|
||||||
|
|
||||||
await associateTags(release, existingRelease.id);
|
await associateTags(release, existingRelease.id);
|
||||||
|
|
||||||
return existingRelease.id;
|
return existingRelease;
|
||||||
}
|
}
|
||||||
|
|
||||||
const [releaseEntry] = await knex('releases')
|
const [releaseEntry] = await knex('releases')
|
||||||
|
|
|
@ -90,7 +90,7 @@ async function scrapeScene(html, url, site) {
|
||||||
const poster = videoData.picPreview;
|
const poster = videoData.picPreview;
|
||||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||||
|
|
||||||
const photos = await getPhotos($('.picturesItem a').attr('href'), '21sextury.com', site);
|
const photos = await getPhotos($('.picturesItem a').attr('href'), site);
|
||||||
|
|
||||||
const tags = data.keywords.split(', ');
|
const tags = data.keywords.split(', ');
|
||||||
const siteName = data.productionCompany ? data.productionCompany.name : $('#logoLink a').attr('title');
|
const siteName = data.productionCompany ? data.productionCompany.name : $('#logoLink a').attr('title');
|
||||||
|
|
|
@ -69,7 +69,7 @@ async function scrapeScene(html, url, site) {
|
||||||
src: `${videoData.playerOptions.host}${videoData.url}`,
|
src: `${videoData.playerOptions.host}${videoData.url}`,
|
||||||
};
|
};
|
||||||
|
|
||||||
release.photos = await getPhotos(q('.picturesItem a').href, 'puretaboo.com', site);
|
release.photos = await getPhotos(q('.picturesItem a').href, site);
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
const bhttp = require('bhttp');
|
||||||
|
|
||||||
const { ex, exa } = require('../utils/q');
|
const { ex, exa, get } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||||
|
|
||||||
|
@ -132,7 +132,24 @@ function scrapeModels(html, actorName) {
|
||||||
return model?.href || null;
|
return model?.href || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile(html) {
|
async function fetchActorReleases(url, accReleases = []) {
|
||||||
|
const { document, qu } = await get(url);
|
||||||
|
|
||||||
|
if (document) {
|
||||||
|
const releases = accReleases.concat(scrapeAll(document.body.outerHTML));
|
||||||
|
const nextPage = qu('.next-pg');
|
||||||
|
|
||||||
|
if (nextPage && new URL(nextPage).searchParams.has('page')) {
|
||||||
|
return fetchActorReleases(nextPage, releases);
|
||||||
|
}
|
||||||
|
|
||||||
|
return releases;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeProfile(html) {
|
||||||
const { q, qa, qi } = ex(html, '#model-page');
|
const { q, qa, qi } = ex(html, '#model-page');
|
||||||
const profile = { gender: 'female' };
|
const profile = { gender: 'female' };
|
||||||
|
|
||||||
|
@ -170,8 +187,10 @@ function scrapeProfile(html) {
|
||||||
const avatar = qi('img');
|
const avatar = qi('img');
|
||||||
if (avatar) profile.avatar = avatar;
|
if (avatar) profile.avatar = avatar;
|
||||||
|
|
||||||
const releases = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
const { qu } = ex(html, '#model-page + .container, #model-page + .container-fluid');
|
||||||
if (releases) profile.releases = scrapeAll(releases.document.outerHTML);
|
const releasesPage = qu('.next-pg');
|
||||||
|
|
||||||
|
if (releasesPage) profile.releases = await fetchActorReleases(releasesPage);
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
|
@ -161,8 +161,10 @@ function init(element, window) {
|
||||||
|
|
||||||
return {
|
return {
|
||||||
element,
|
element,
|
||||||
document: element,
|
...(window && {
|
||||||
...(window && { window }),
|
window,
|
||||||
|
document: window.document,
|
||||||
|
}),
|
||||||
...contextFuncs,
|
...contextFuncs,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue