Added Porn Pros sites and scraper.

This commit is contained in:
ThePendulum 2020-01-14 21:45:30 +01:00
parent 2ddb21958e
commit 75c53d338a
23 changed files with 196 additions and 4 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -2119,6 +2119,103 @@ function getSites(networksMap) {
network_id: networksMap.pervcity,
parameters: JSON.stringify({ tourId: 9 }),
},
// PORN PROS
{
name: 'Real Ex Girlfriends',
slug: 'realexgirlfriends',
url: 'https://pornpros.com/site/realexgirlfriends',
network_id: networksMap.pornpros,
},
{
name: '18 Years Old',
slug: 'eighteenyearsold',
url: 'https://pornpros.com/site/18yearsold',
network_id: networksMap.pornpros,
},
{
name: 'Massage Creep',
slug: 'massagecreep',
url: 'https://pornpros.com/site/massagecreep',
network_id: networksMap.pornpros,
},
{
name: 'Deep Throat Love',
slug: 'deepthroatlove',
url: 'https://pornpros.com/site/deepthroatlove',
network_id: networksMap.pornpros,
},
{
name: 'Teen BFF',
slug: 'teenbff',
url: 'https://pornpros.com/site/teenbff',
network_id: networksMap.pornpros,
},
{
name: 'Shady P.I.',
slug: 'shadypi',
url: 'https://pornpros.com/site/shadypi',
network_id: networksMap.pornpros,
},
{
name: 'Cruelty Party',
slug: 'crueltyparty',
url: 'https://pornpros.com/site/crueltyparty',
network_id: networksMap.pornpros,
},
{
name: 'Disgraced 18',
slug: 'disgracedeighteen',
url: 'https://pornpros.com/site/disgraced18',
network_id: networksMap.pornpros,
},
{
name: 'Cumshot Surprise',
slug: 'cumshotsurprise',
url: 'https://pornpros.com/site/cumshotsurprise',
network_id: networksMap.pornpros,
},
{
name: '40oz Bounce',
slug: 'fortyozbounce',
url: 'https://pornpros.com/site/40ozbounce',
network_id: networksMap.pornpros,
},
{
name: 'Jurassic Cock',
slug: 'jurassiccock',
url: 'https://pornpros.com/site/jurassiccock',
network_id: networksMap.pornpros,
},
{
name: 'Freaks Of Cock',
slug: 'freaksofcock',
url: 'https://pornpros.com/site/freaksofcock',
network_id: networksMap.pornpros,
},
{
name: 'Euro Humpers',
slug: 'eurohumpers',
url: 'https://pornpros.com/site/eurohumpers',
network_id: networksMap.pornpros,
},
{
name: 'Freaks Of Boobs',
slug: 'freaksofboobs',
url: 'https://pornpros.com/site/freaksofboobs',
network_id: networksMap.pornpros,
},
{
name: 'Cock Competition',
slug: 'cockcompetition',
url: 'https://pornpros.com/site/cockcompetition',
network_id: networksMap.pornpros,
},
{
name: 'Pimp Parade',
slug: 'pimpparade',
url: 'https://pornpros.com/site/pimpparade',
network_id: networksMap.pornpros,
},
// PRIVATE
{
slug: 'analintroductions',

View File

@ -564,6 +564,11 @@ function getTags(groupsMap) {
alias_for: null,
group_id: groupsMap.roleplay,
},
{
name: 'massage',
slug: 'massage',
alias_for: null,
},
{
name: 'masturbation',
slug: 'masturbation',
@ -1573,15 +1578,19 @@ function getTagAliases(tagsMap) {
function getSiteTags() {
return {
allanal: ['anal', 'mff'],
teenallanal: ['anal', 'mff'],
boundgods: ['gay'],
buttmachineboys: ['gay'],
deepthroatlove: ['blowjob', 'deepthroat'],
divinebitches: ['femdom'],
familystrokes: ['family'],
menonedge: ['gay'],
dpparodies: ['parody'],
eighteenyearsold: ['teen'],
familystrokes: ['family'],
massagecreep: ['massage'],
menonedge: ['gay'],
submissived: ['bdsm'],
swallowed: ['blowjob', 'deepthroat', 'facefucking'],
teenbff: ['mff'],
trueanal: ['anal'],
tspussyhunters: ['transsexual'],
};

View File

@ -318,7 +318,8 @@ async function storeReleaseAssets(releases) {
const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`;
const identifier = `"${release.title}" (${release.id})`;
await createMediaDirectory('releases', subpath);
try {
await createMediaDirectory('releases', subpath);
// don't use Promise.all to prevent concurrency issues with duplicate detection
if (release.poster) {
@ -345,6 +346,9 @@ async function storeReleaseAssets(releases) {
targetId: release.id,
subpath,
}, identifier);
} catch (error) {
logger.error(error.message);
}
}, {
concurrency: 10,
});

80
src/scrapers/pornpros.js Normal file
View File

@ -0,0 +1,80 @@
'use strict';
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const moment = require('moment');
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
return Array.from(document.querySelectorAll('.site-list .card'), (scene) => {
const release = { site };
release.url = `https://pornpros.com${scene.querySelector(':scope > a').href}`;
release.entryId = scene.dataset.videoId;
release.title = scene.querySelector('.card-title').textContent;
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
return release;
});
}
function scrapeScene(html, site) {
const { document } = new JSDOM(html).window;
const release = { site };
const scene = document.querySelector('#t2019-2col');
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
release.description = scene.querySelector('#t2019-description').textContent.trim();
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
const durationEl = scene.querySelector('#t2019-stime');
if (durationEl) release.duration = Number(durationEl.textContent.match(/\d+/)[0]) * 60;
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://pornpros.com/site/${site.slug}?page=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site);
}
return null;
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -15,6 +15,7 @@ const kink = require('./kink');
const mikeadriano = require('./mikeadriano');
const mofos = require('./mofos');
const pervcity = require('./pervcity');
const pornpros = require('./pornpros');
const privateNetwork = require('./private'); // reserved keyword
const naughtyamerica = require('./naughtyamerica');
const realitykings = require('./realitykings');
@ -56,6 +57,7 @@ module.exports = {
mikeadriano,
mofos,
pervcity,
pornpros,
private: privateNetwork,
naughtyamerica,
realitykings,