forked from DebaucheryLibrarian/traxxx
Added mobile album support to Gamma scraper.
This commit is contained in:
parent
3c14bb26c2
commit
90172ea19a
|
@ -156,6 +156,7 @@ const sites = [
|
|||
network: '21sextury',
|
||||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
mobile: 'https://m.analteenangels.com/',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -164,7 +165,10 @@ const sites = [
|
|||
url: 'https://www.assholefever.com',
|
||||
description: 'Welcome to AssholeFever, the most hardcore anal site on the net. Watch your favorite pornstars and anal sluts from all over the world in big booty hardcore porn, anal gape, beads, anal creampie and more! Look inside if you dare!',
|
||||
network: '21sextury',
|
||||
parameters: { networkReferer: true },
|
||||
parameters: {
|
||||
networkReferer: true,
|
||||
mobile: 'https://m.assholefever.com/',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'buttplays',
|
||||
|
@ -193,6 +197,7 @@ const sites = [
|
|||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
scene: 'https://www.21sextury.com/en/video',
|
||||
mobile: 'https://m.dpfanatics.com',
|
||||
photos: 'https://www.21sextury.com/en/photo',
|
||||
},
|
||||
},
|
||||
|
@ -204,6 +209,7 @@ const sites = [
|
|||
network: '21sextury',
|
||||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
mobile: 'https://m.dpfanatics.com/',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -215,6 +221,7 @@ const sites = [
|
|||
parameters: {
|
||||
referer: 'https://www.21sextury.com',
|
||||
scene: 'https://www.21sextury.com/en/video',
|
||||
mobile: 'https://m.footsiebabes.com',
|
||||
photos: 'https://www.21sextury.com/en/photo',
|
||||
},
|
||||
},
|
||||
|
|
|
@ -31,13 +31,15 @@ async function fetchPhotos(url) {
|
|||
return res.body.toString();
|
||||
}
|
||||
|
||||
function scrapePhotos(html) {
|
||||
function scrapePhotos(html, includeThumbnails = true) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => {
|
||||
const url = $(linkEl).attr('href');
|
||||
|
||||
if (/\/join|\/createaccount/.test(url)) {
|
||||
if (!includeThumbnails) return null;
|
||||
|
||||
// URL links to join page instead of full photo, extract thumbnail
|
||||
// /createaccount is used by e.g. Tricky Spa native site
|
||||
const src = $(linkEl).find('img').attr('src');
|
||||
|
@ -57,10 +59,10 @@ function scrapePhotos(html) {
|
|||
|
||||
// URL links to full photo
|
||||
return url;
|
||||
});
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function getPhotos(albumPath, site) {
|
||||
async function getPhotos(albumPath, site, includeThumbnails = true) {
|
||||
const albumUrl = getAlbumUrl(albumPath, site);
|
||||
|
||||
if (!albumUrl) {
|
||||
|
@ -70,7 +72,7 @@ async function getPhotos(albumPath, site) {
|
|||
try {
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = scrapePhotos(html, site);
|
||||
const photos = scrapePhotos(html, includeThumbnails);
|
||||
|
||||
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
|
||||
|
||||
|
@ -81,7 +83,7 @@ async function getPhotos(albumPath, site) {
|
|||
const pageUrl = `${albumUrl}/${page}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml, site);
|
||||
return scrapePhotos(pageHtml, includeThumbnails);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
@ -184,9 +186,10 @@ function scrapeAll(html, site, networkUrl, hasTeaser = true) {
|
|||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, scrapedRelease) {
|
||||
async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { $ };
|
||||
const m$ = mobileHtml && cheerio.load(mobileHtml, { normalizeWhitespace: true });
|
||||
const release = { $, url };
|
||||
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
|
@ -235,7 +238,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
|
|||
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
||||
|
||||
const photoLink = $('.picturesItem a').attr('href');
|
||||
if (photoLink) release.photos = await getPhotos(photoLink, site);
|
||||
const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : [];
|
||||
|
||||
if (photoLink) {
|
||||
const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available
|
||||
release.photos = [...photos, ...mobilePhotos];
|
||||
} else {
|
||||
release.photos = mobilePhotos;
|
||||
}
|
||||
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
release.trailer = [
|
||||
|
@ -457,9 +467,13 @@ async function fetchUpcoming(site) {
|
|||
return scrapeAll(res.body.toString(), site, null, false);
|
||||
}
|
||||
|
||||
function getDeepUrl(url, site, release) {
|
||||
function getDeepUrl(url, site, release, mobile = false) {
|
||||
const pathname = release?.path || new URL(url).pathname;
|
||||
|
||||
if (mobile) {
|
||||
return `${site.parameters.mobile}${pathname}`;
|
||||
}
|
||||
|
||||
if (site.parameters?.deep === 'network') {
|
||||
return `${site.network.url}${pathname}`;
|
||||
}
|
||||
|
@ -477,13 +491,24 @@ async function fetchScene(url, site, release) {
|
|||
}
|
||||
|
||||
const deepUrl = getDeepUrl(url, site, release);
|
||||
const mobileUrl = site.parameters?.mobile && getDeepUrl(url, site, release, true);
|
||||
|
||||
console.log(mobileUrl);
|
||||
|
||||
if (deepUrl) {
|
||||
const res = await bhttp.get(deepUrl);
|
||||
const [res, mobileRes] = await Promise.all([
|
||||
bhttp.get(deepUrl),
|
||||
mobileUrl && bhttp.get(mobileUrl, {
|
||||
headers: {
|
||||
// don't redirect to main site
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36',
|
||||
},
|
||||
}),
|
||||
]);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const scene = await scrapeScene(res.body.toString(), url, site, release);
|
||||
|
||||
const mobileBody = mobileRes.statusCode === 200 ? mobileRes.body.toString() : null;
|
||||
const scene = await scrapeScene(res.body.toString(), url, site, release, mobileBody);
|
||||
return { ...scene, deepUrl };
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue