Added mobile album support to Gamma scraper.

This commit is contained in:
ThePendulum 2020-03-06 04:28:01 +01:00
parent 3c14bb26c2
commit 90172ea19a
2 changed files with 45 additions and 13 deletions

View File

@ -156,6 +156,7 @@ const sites = [
network: '21sextury',
parameters: {
referer: 'https://www.21sextury.com',
mobile: 'https://m.analteenangels.com/',
},
},
{
@ -164,7 +165,10 @@ const sites = [
url: 'https://www.assholefever.com',
description: 'Welcome to AssholeFever, the most hardcore anal site on the net. Watch your favorite pornstars and anal sluts from all over the world in big booty hardcore porn, anal gape, beads, anal creampie and more! Look inside if you dare!',
network: '21sextury',
parameters: { networkReferer: true },
parameters: {
networkReferer: true,
mobile: 'https://m.assholefever.com/',
},
},
{
slug: 'buttplays',
@ -193,6 +197,7 @@ const sites = [
parameters: {
referer: 'https://www.21sextury.com',
scene: 'https://www.21sextury.com/en/video',
mobile: 'https://m.dpfanatics.com',
photos: 'https://www.21sextury.com/en/photo',
},
},
@ -204,6 +209,7 @@ const sites = [
network: '21sextury',
parameters: {
referer: 'https://www.21sextury.com',
mobile: 'https://m.dpfanatics.com/',
},
},
{
@ -215,6 +221,7 @@ const sites = [
parameters: {
referer: 'https://www.21sextury.com',
scene: 'https://www.21sextury.com/en/video',
mobile: 'https://m.footsiebabes.com',
photos: 'https://www.21sextury.com/en/photo',
},
},

View File

@ -31,13 +31,15 @@ async function fetchPhotos(url) {
return res.body.toString();
}
function scrapePhotos(html) {
function scrapePhotos(html, includeThumbnails = true) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => {
const url = $(linkEl).attr('href');
if (/\/join|\/createaccount/.test(url)) {
if (!includeThumbnails) return null;
// URL links to join page instead of full photo, extract thumbnail
// /createaccount is used by e.g. Tricky Spa native site
const src = $(linkEl).find('img').attr('src');
@ -57,10 +59,10 @@ function scrapePhotos(html) {
// URL links to full photo
return url;
});
}).filter(Boolean);
}
async function getPhotos(albumPath, site) {
async function getPhotos(albumPath, site, includeThumbnails = true) {
const albumUrl = getAlbumUrl(albumPath, site);
if (!albumUrl) {
@ -70,7 +72,7 @@ async function getPhotos(albumPath, site) {
try {
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = scrapePhotos(html, site);
const photos = scrapePhotos(html, includeThumbnails);
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
@ -81,7 +83,7 @@ async function getPhotos(albumPath, site) {
const pageUrl = `${albumUrl}/${page}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml, site);
return scrapePhotos(pageHtml, includeThumbnails);
}, {
concurrency: 2,
});
@ -184,9 +186,10 @@ function scrapeAll(html, site, networkUrl, hasTeaser = true) {
});
}
async function scrapeScene(html, url, site, scrapedRelease) {
async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { $ };
const m$ = mobileHtml && cheerio.load(mobileHtml, { normalizeWhitespace: true });
const release = { $, url };
const json = $('script[type="application/ld+json"]').html();
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
@ -235,7 +238,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
const photoLink = $('.picturesItem a').attr('href');
if (photoLink) release.photos = await getPhotos(photoLink, site);
const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : [];
if (photoLink) {
const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available
release.photos = [...photos, ...mobilePhotos];
} else {
release.photos = mobilePhotos;
}
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
release.trailer = [
@ -457,9 +467,13 @@ async function fetchUpcoming(site) {
return scrapeAll(res.body.toString(), site, null, false);
}
function getDeepUrl(url, site, release) {
function getDeepUrl(url, site, release, mobile = false) {
const pathname = release?.path || new URL(url).pathname;
if (mobile) {
return `${site.parameters.mobile}${pathname}`;
}
if (site.parameters?.deep === 'network') {
return `${site.network.url}${pathname}`;
}
@ -477,13 +491,24 @@ async function fetchScene(url, site, release) {
}
const deepUrl = getDeepUrl(url, site, release);
const mobileUrl = site.parameters?.mobile && getDeepUrl(url, site, release, true);
console.log(mobileUrl);
if (deepUrl) {
const res = await bhttp.get(deepUrl);
const [res, mobileRes] = await Promise.all([
bhttp.get(deepUrl),
mobileUrl && bhttp.get(mobileUrl, {
headers: {
// don't redirect to main site
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36',
},
}),
]);
if (res.statusCode === 200) {
const scene = await scrapeScene(res.body.toString(), url, site, release);
const mobileBody = mobileRes.statusCode === 200 ? mobileRes.body.toString() : null;
const scene = await scrapeScene(res.body.toString(), url, site, release, mobileBody);
return { ...scene, deepUrl };
}
}