forked from DebaucheryLibrarian/traxxx
Added mobile album support to Gamma scraper.
This commit is contained in:
@@ -31,13 +31,15 @@ async function fetchPhotos(url) {
|
||||
return res.body.toString();
|
||||
}
|
||||
|
||||
function scrapePhotos(html) {
|
||||
function scrapePhotos(html, includeThumbnails = true) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => {
|
||||
const url = $(linkEl).attr('href');
|
||||
|
||||
if (/\/join|\/createaccount/.test(url)) {
|
||||
if (!includeThumbnails) return null;
|
||||
|
||||
// URL links to join page instead of full photo, extract thumbnail
|
||||
// /createaccount is used by e.g. Tricky Spa native site
|
||||
const src = $(linkEl).find('img').attr('src');
|
||||
@@ -57,10 +59,10 @@ function scrapePhotos(html) {
|
||||
|
||||
// URL links to full photo
|
||||
return url;
|
||||
});
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function getPhotos(albumPath, site) {
|
||||
async function getPhotos(albumPath, site, includeThumbnails = true) {
|
||||
const albumUrl = getAlbumUrl(albumPath, site);
|
||||
|
||||
if (!albumUrl) {
|
||||
@@ -70,7 +72,7 @@ async function getPhotos(albumPath, site) {
|
||||
try {
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = scrapePhotos(html, site);
|
||||
const photos = scrapePhotos(html, includeThumbnails);
|
||||
|
||||
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
|
||||
|
||||
@@ -81,7 +83,7 @@ async function getPhotos(albumPath, site) {
|
||||
const pageUrl = `${albumUrl}/${page}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml, site);
|
||||
return scrapePhotos(pageHtml, includeThumbnails);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
@@ -184,9 +186,10 @@ function scrapeAll(html, site, networkUrl, hasTeaser = true) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, scrapedRelease) {
|
||||
async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { $ };
|
||||
const m$ = mobileHtml && cheerio.load(mobileHtml, { normalizeWhitespace: true });
|
||||
const release = { $, url };
|
||||
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
@@ -235,7 +238,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
|
||||
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
||||
|
||||
const photoLink = $('.picturesItem a').attr('href');
|
||||
if (photoLink) release.photos = await getPhotos(photoLink, site);
|
||||
const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : [];
|
||||
|
||||
if (photoLink) {
|
||||
const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available
|
||||
release.photos = [...photos, ...mobilePhotos];
|
||||
} else {
|
||||
release.photos = mobilePhotos;
|
||||
}
|
||||
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
release.trailer = [
|
||||
@@ -457,9 +467,13 @@ async function fetchUpcoming(site) {
|
||||
return scrapeAll(res.body.toString(), site, null, false);
|
||||
}
|
||||
|
||||
function getDeepUrl(url, site, release) {
|
||||
function getDeepUrl(url, site, release, mobile = false) {
|
||||
const pathname = release?.path || new URL(url).pathname;
|
||||
|
||||
if (mobile) {
|
||||
return `${site.parameters.mobile}${pathname}`;
|
||||
}
|
||||
|
||||
if (site.parameters?.deep === 'network') {
|
||||
return `${site.network.url}${pathname}`;
|
||||
}
|
||||
@@ -477,13 +491,24 @@ async function fetchScene(url, site, release) {
|
||||
}
|
||||
|
||||
const deepUrl = getDeepUrl(url, site, release);
|
||||
const mobileUrl = site.parameters?.mobile && getDeepUrl(url, site, release, true);
|
||||
|
||||
console.log(mobileUrl);
|
||||
|
||||
if (deepUrl) {
|
||||
const res = await bhttp.get(deepUrl);
|
||||
const [res, mobileRes] = await Promise.all([
|
||||
bhttp.get(deepUrl),
|
||||
mobileUrl && bhttp.get(mobileUrl, {
|
||||
headers: {
|
||||
// don't redirect to main site
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36',
|
||||
},
|
||||
}),
|
||||
]);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const scene = await scrapeScene(res.body.toString(), url, site, release);
|
||||
|
||||
const mobileBody = mobileRes.statusCode === 200 ? mobileRes.body.toString() : null;
|
||||
const scene = await scrapeScene(res.body.toString(), url, site, release, mobileBody);
|
||||
return { ...scene, deepUrl };
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user