Added mobile album support to Gamma scraper.
This commit is contained in:
parent
3c14bb26c2
commit
90172ea19a
|
@ -156,6 +156,7 @@ const sites = [
|
||||||
network: '21sextury',
|
network: '21sextury',
|
||||||
parameters: {
|
parameters: {
|
||||||
referer: 'https://www.21sextury.com',
|
referer: 'https://www.21sextury.com',
|
||||||
|
mobile: 'https://m.analteenangels.com/',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -164,7 +165,10 @@ const sites = [
|
||||||
url: 'https://www.assholefever.com',
|
url: 'https://www.assholefever.com',
|
||||||
description: 'Welcome to AssholeFever, the most hardcore anal site on the net. Watch your favorite pornstars and anal sluts from all over the world in big booty hardcore porn, anal gape, beads, anal creampie and more! Look inside if you dare!',
|
description: 'Welcome to AssholeFever, the most hardcore anal site on the net. Watch your favorite pornstars and anal sluts from all over the world in big booty hardcore porn, anal gape, beads, anal creampie and more! Look inside if you dare!',
|
||||||
network: '21sextury',
|
network: '21sextury',
|
||||||
parameters: { networkReferer: true },
|
parameters: {
|
||||||
|
networkReferer: true,
|
||||||
|
mobile: 'https://m.assholefever.com/',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'buttplays',
|
slug: 'buttplays',
|
||||||
|
@ -193,6 +197,7 @@ const sites = [
|
||||||
parameters: {
|
parameters: {
|
||||||
referer: 'https://www.21sextury.com',
|
referer: 'https://www.21sextury.com',
|
||||||
scene: 'https://www.21sextury.com/en/video',
|
scene: 'https://www.21sextury.com/en/video',
|
||||||
|
mobile: 'https://m.dpfanatics.com',
|
||||||
photos: 'https://www.21sextury.com/en/photo',
|
photos: 'https://www.21sextury.com/en/photo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -204,6 +209,7 @@ const sites = [
|
||||||
network: '21sextury',
|
network: '21sextury',
|
||||||
parameters: {
|
parameters: {
|
||||||
referer: 'https://www.21sextury.com',
|
referer: 'https://www.21sextury.com',
|
||||||
|
mobile: 'https://m.dpfanatics.com/',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -215,6 +221,7 @@ const sites = [
|
||||||
parameters: {
|
parameters: {
|
||||||
referer: 'https://www.21sextury.com',
|
referer: 'https://www.21sextury.com',
|
||||||
scene: 'https://www.21sextury.com/en/video',
|
scene: 'https://www.21sextury.com/en/video',
|
||||||
|
mobile: 'https://m.footsiebabes.com',
|
||||||
photos: 'https://www.21sextury.com/en/photo',
|
photos: 'https://www.21sextury.com/en/photo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -31,13 +31,15 @@ async function fetchPhotos(url) {
|
||||||
return res.body.toString();
|
return res.body.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapePhotos(html) {
|
function scrapePhotos(html, includeThumbnails = true) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => {
|
return $('.preview .imgLink, .pgFooterThumb a').toArray().map((linkEl) => {
|
||||||
const url = $(linkEl).attr('href');
|
const url = $(linkEl).attr('href');
|
||||||
|
|
||||||
if (/\/join|\/createaccount/.test(url)) {
|
if (/\/join|\/createaccount/.test(url)) {
|
||||||
|
if (!includeThumbnails) return null;
|
||||||
|
|
||||||
// URL links to join page instead of full photo, extract thumbnail
|
// URL links to join page instead of full photo, extract thumbnail
|
||||||
// /createaccount is used by e.g. Tricky Spa native site
|
// /createaccount is used by e.g. Tricky Spa native site
|
||||||
const src = $(linkEl).find('img').attr('src');
|
const src = $(linkEl).find('img').attr('src');
|
||||||
|
@ -57,10 +59,10 @@ function scrapePhotos(html) {
|
||||||
|
|
||||||
// URL links to full photo
|
// URL links to full photo
|
||||||
return url;
|
return url;
|
||||||
});
|
}).filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getPhotos(albumPath, site) {
|
async function getPhotos(albumPath, site, includeThumbnails = true) {
|
||||||
const albumUrl = getAlbumUrl(albumPath, site);
|
const albumUrl = getAlbumUrl(albumPath, site);
|
||||||
|
|
||||||
if (!albumUrl) {
|
if (!albumUrl) {
|
||||||
|
@ -70,7 +72,7 @@ async function getPhotos(albumPath, site) {
|
||||||
try {
|
try {
|
||||||
const html = await fetchPhotos(albumUrl);
|
const html = await fetchPhotos(albumUrl);
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
const photos = scrapePhotos(html, site);
|
const photos = scrapePhotos(html, includeThumbnails);
|
||||||
|
|
||||||
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
|
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
|
||||||
|
|
||||||
|
@ -81,7 +83,7 @@ async function getPhotos(albumPath, site) {
|
||||||
const pageUrl = `${albumUrl}/${page}`;
|
const pageUrl = `${albumUrl}/${page}`;
|
||||||
const pageHtml = await fetchPhotos(pageUrl);
|
const pageHtml = await fetchPhotos(pageUrl);
|
||||||
|
|
||||||
return scrapePhotos(pageHtml, site);
|
return scrapePhotos(pageHtml, includeThumbnails);
|
||||||
}, {
|
}, {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
});
|
});
|
||||||
|
@ -184,9 +186,10 @@ function scrapeAll(html, site, networkUrl, hasTeaser = true) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url, site, scrapedRelease) {
|
async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
const release = { $ };
|
const m$ = mobileHtml && cheerio.load(mobileHtml, { normalizeWhitespace: true });
|
||||||
|
const release = { $, url };
|
||||||
|
|
||||||
const json = $('script[type="application/ld+json"]').html();
|
const json = $('script[type="application/ld+json"]').html();
|
||||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||||
|
@ -235,7 +238,14 @@ async function scrapeScene(html, url, site, scrapedRelease) {
|
||||||
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
||||||
|
|
||||||
const photoLink = $('.picturesItem a').attr('href');
|
const photoLink = $('.picturesItem a').attr('href');
|
||||||
if (photoLink) release.photos = await getPhotos(photoLink, site);
|
const mobilePhotos = m$ ? m$('.preview-displayer a img').map((photoIndex, photoEl) => $(photoEl).attr('src')).toArray() : [];
|
||||||
|
|
||||||
|
if (photoLink) {
|
||||||
|
const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available
|
||||||
|
release.photos = [...photos, ...mobilePhotos];
|
||||||
|
} else {
|
||||||
|
release.photos = mobilePhotos;
|
||||||
|
}
|
||||||
|
|
||||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||||
release.trailer = [
|
release.trailer = [
|
||||||
|
@ -457,9 +467,13 @@ async function fetchUpcoming(site) {
|
||||||
return scrapeAll(res.body.toString(), site, null, false);
|
return scrapeAll(res.body.toString(), site, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getDeepUrl(url, site, release) {
|
function getDeepUrl(url, site, release, mobile = false) {
|
||||||
const pathname = release?.path || new URL(url).pathname;
|
const pathname = release?.path || new URL(url).pathname;
|
||||||
|
|
||||||
|
if (mobile) {
|
||||||
|
return `${site.parameters.mobile}${pathname}`;
|
||||||
|
}
|
||||||
|
|
||||||
if (site.parameters?.deep === 'network') {
|
if (site.parameters?.deep === 'network') {
|
||||||
return `${site.network.url}${pathname}`;
|
return `${site.network.url}${pathname}`;
|
||||||
}
|
}
|
||||||
|
@ -477,13 +491,24 @@ async function fetchScene(url, site, release) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const deepUrl = getDeepUrl(url, site, release);
|
const deepUrl = getDeepUrl(url, site, release);
|
||||||
|
const mobileUrl = site.parameters?.mobile && getDeepUrl(url, site, release, true);
|
||||||
|
|
||||||
|
console.log(mobileUrl);
|
||||||
|
|
||||||
if (deepUrl) {
|
if (deepUrl) {
|
||||||
const res = await bhttp.get(deepUrl);
|
const [res, mobileRes] = await Promise.all([
|
||||||
|
bhttp.get(deepUrl),
|
||||||
|
mobileUrl && bhttp.get(mobileUrl, {
|
||||||
|
headers: {
|
||||||
|
// don't redirect to main site
|
||||||
|
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Mobile Safari/537.36',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
const scene = await scrapeScene(res.body.toString(), url, site, release);
|
const mobileBody = mobileRes.statusCode === 200 ? mobileRes.body.toString() : null;
|
||||||
|
const scene = await scrapeScene(res.body.toString(), url, site, release, mobileBody);
|
||||||
return { ...scene, deepUrl };
|
return { ...scene, deepUrl };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue