Added mobile album scraping to Blowpass, improved wrapper.

This commit is contained in:
ThePendulum 2020-03-07 02:35:13 +01:00
parent 4773a388ac
commit ff3e956fc7
5 changed files with 56 additions and 38 deletions

View File

@ -94,6 +94,9 @@ const networks = [
name: 'Blowpass', name: 'Blowpass',
url: 'https://www.blowpass.com', url: 'https://www.blowpass.com',
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!', description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
parameters: {
mobile: 'https://m.blowpass.com/en/video/v/%d', // v can be any string, %d will be scene ID
},
parent: 'gamma', parent: 'gamma',
}, },
{ {

View File

@ -1011,8 +1011,8 @@ const sites = [
description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!', description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!',
network: 'blowpass', network: 'blowpass',
parameters: { parameters: {
latest: '/en/videos/latest/All-Categories/0/All-Pornstars/0/', latest: '/en/scenes/updates/%d/Category/0/Pornstar/0',
upcoming: '/en/videos/upcoming', upcoming: '/en/scenes/upcoming',
}, },
}, },
{ {
@ -1022,8 +1022,8 @@ const sites = [
description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside', description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside',
network: 'blowpass', network: 'blowpass',
parameters: { parameters: {
latest: '/en/videos/latest/All-Categories/0/All-Pornstars/0/', latest: '/en/videos/All-Categories/0/All-Pornstars/0/All/0/',
upcoming: '/en/videos/upcoming', upcoming: '/en/videos/All-Categories/0/All-Pornstars/0/All/0/1/upcoming',
}, },
}, },
{ {
@ -1033,8 +1033,8 @@ const sites = [
description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!', description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!',
network: 'blowpass', network: 'blowpass',
parameters: { parameters: {
latest: '/en/videos/latest/All-Categories/0/All-Pornstars/0/', latest: '/en/scenes/updates/0/Category/0/Actor/',
upcoming: '/en/videos/upcoming', upcoming: '/en/scenes/upcoming',
}, },
}, },
{ {
@ -1044,8 +1044,8 @@ const sites = [
description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!', description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!',
network: 'blowpass', network: 'blowpass',
parameters: { parameters: {
latest: '/en/videos/latest/All-Categories/0/All-Pornstars/0/', latest: '/en/scenes/updates/0/Category/0/Actor/',
upcoming: '/en/videos/upcoming', upcoming: '/en/scenes/upcoming',
}, },
}, },
{ {

View File

@ -1,22 +1,22 @@
'use strict'; 'use strict';
const bhttp = require('bhttp'); // const bhttp = require('bhttp');
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma'); const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
async function fetchScene(url, site) { async function fetchSceneWrapper(url, site, baseRelease) {
// const res = await bhttp.get(url); const release = await fetchScene(url, site, baseRelease);
const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`);
const release = await scrapeScene(res.body.toString(), url, site); if (site.isFallback && release.channel) {
release.channel = release.$('.siteNameSpan') const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
.text()
.trim()
.toLowerCase()
.replace('.com', '');
if (['onlyteenblowjobs.com', 'mommyblowsbest.com'].includes(release.channel)) release.url = url.replace(/video\/\w+\//, 'scene/'); if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
else release.url = url.replace(/video\/\w+\//, 'video/'); release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
return release;
}
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
}
return release; return release;
} }
@ -33,5 +33,5 @@ module.exports = {
fetchLatest, fetchLatest,
fetchProfile: networkFetchProfile, fetchProfile: networkFetchProfile,
fetchUpcoming, fetchUpcoming,
fetchScene, fetchScene: fetchSceneWrapper,
}; };

View File

@ -1,6 +1,7 @@
'use strict'; 'use strict';
const Promise = require('bluebird'); const Promise = require('bluebird');
const util = require('util');
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
@ -38,8 +39,6 @@ function scrapePhotos(html, includeThumbnails = true) {
const url = $(linkEl).attr('href'); const url = $(linkEl).attr('href');
if (/\/join|\/createaccount/.test(url)) { if (/\/join|\/createaccount/.test(url)) {
if (!includeThumbnails) return null;
// URL links to join page instead of full photo, extract thumbnail // URL links to join page instead of full photo, extract thumbnail
// /createaccount is used by e.g. Tricky Spa native site // /createaccount is used by e.g. Tricky Spa native site
const src = $(linkEl).find('img').attr('src'); const src = $(linkEl).find('img').attr('src');
@ -54,6 +53,8 @@ function scrapePhotos(html, includeThumbnails = true) {
return [highRes, src]; return [highRes, src];
} }
if (!includeThumbnails) return null;
return src; return src;
} }
@ -232,7 +233,7 @@ async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', ') || []; const rawTags = data?.keywords?.split(', ') || data2?.keywords?.split(', ') || [];
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim(); const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim() || $('.siteNameSpan').text()?.trim().toLowerCase().replace('.com', '');
if (channel) release.channel = slugify(channel, { delimiter: '' }); if (channel) release.channel = slugify(channel, { delimiter: '' });
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/ if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
@ -242,7 +243,9 @@ async function scrapeScene(html, url, site, scrapedRelease, mobileHtml) {
if (photoLink) { if (photoLink) {
const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available const photos = await getPhotos(photoLink, site, mobilePhotos.length < 3); // only get thumbnails when less than 3 mobile photos are available
release.photos = [...photos, ...mobilePhotos];
if (photos.length < 7) release.photos = [...photos, ...mobilePhotos]; // probably only teaser photos available, supplement with mobile album
else release.photos = photos;
} else { } else {
release.photos = mobilePhotos; release.photos = mobilePhotos;
} }
@ -435,9 +438,15 @@ async function fetchApiUpcoming(site) {
function getLatestUrl(site, page) { function getLatestUrl(site, page) {
if (site.parameters?.latest) { if (site.parameters?.latest) {
return /^http/.test(site.parameters.latest) if (/^http/.test(site.parameters.latest)) {
? `${site.parameters.latest}${page}` return /%d/.test(site.parameters.latest)
: `${site.url}${site.parameters.latest}${page}`; ? util.format(site.parameters.latest, page)
: `${site.parameters.latest}${page}`;
}
return /%d/.test(site.parameters.latest)
? util.format(`${site.url}${site.parameters.latest}`, page)
: `${site.url}${site.parameters.latest}${page}`;
} }
return `${site.url}/en/videos/AllCategories/0/${page}`; return `${site.url}/en/videos/AllCategories/0/${page}`;
@ -467,14 +476,20 @@ async function fetchUpcoming(site) {
return scrapeAll(res.body.toString(), site, null, false); return scrapeAll(res.body.toString(), site, null, false);
} }
function getDeepUrl(url, site, release, mobile) { function getDeepUrl(url, site, baseRelease, mobile) {
const filter = new Set(['en', 'video', 'scene', site.slug, site.network.slug]); const filter = new Set(['en', 'video', 'scene', site.slug, site.network.slug]);
const pathname = release?.path || new URL(url).pathname const pathname = baseRelease?.path || new URL(url).pathname
.split('/') .split('/')
.filter(component => !filter.has(component)) .filter(component => !filter.has(component))
.join('/'); // reduce to scene ID and title slug .join('/'); // reduce to scene ID and title slug
if (mobile) { const sceneId = baseRelease?.entryId || pathname.match(/\/(\d+)\//)?.[1];
if (mobile && /%d/.test(mobile)) {
return util.format(mobile, sceneId);
}
if (mobile && sceneId) {
return `${mobile}${pathname}`; return `${mobile}${pathname}`;
} }
@ -485,13 +500,13 @@ function getDeepUrl(url, site, release, mobile) {
return url; return url;
} }
async function fetchScene(url, site, release) { async function fetchScene(url, site, baseRelease) {
if (site.parameters?.deep === false) { if (site.parameters?.deep === false) {
return release; return baseRelease;
} }
const deepUrl = getDeepUrl(url, site, release); const deepUrl = getDeepUrl(url, site, baseRelease);
const mobileUrl = getDeepUrl(url, site, release, site.parameters?.mobile || site.network.parameters?.mobile); const mobileUrl = getDeepUrl(url, site, baseRelease, site.parameters?.mobile || site.network.parameters?.mobile);
if (deepUrl) { if (deepUrl) {
const [res, mobileRes] = await Promise.all([ const [res, mobileRes] = await Promise.all([
@ -506,7 +521,7 @@ async function fetchScene(url, site, release) {
if (res.statusCode === 200) { if (res.statusCode === 200) {
const mobileBody = mobileRes?.statusCode === 200 ? mobileRes.body.toString() : null; const mobileBody = mobileRes?.statusCode === 200 ? mobileRes.body.toString() : null;
const scene = await scrapeScene(res.body.toString(), url, site, release, mobileBody); const scene = await scrapeScene(res.body.toString(), url, site, baseRelease, mobileBody);
return { ...scene, deepUrl }; return { ...scene, deepUrl };
} }
} }

View File

@ -62,7 +62,7 @@ async function matchTags(rawTags) {
async function associateTags(release, releaseId) { async function associateTags(release, releaseId) {
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || []; const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
const rawReleaseTags = release.tags.filter(Boolean) || []; const rawReleaseTags = release.tags?.filter(Boolean) || [];
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string') const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
? await matchTags(release.tags) // scraper returned raw tags ? await matchTags(release.tags) // scraper returned raw tags
: rawReleaseTags; // tags already matched by (outdated) scraper : rawReleaseTags; // tags already matched by (outdated) scraper