Refactored MOFOS scraper to use generic MindGeek scraper. Added Digital Playground and Fake Hub.

This commit is contained in:
ThePendulum 2020-01-14 19:56:46 +01:00
parent b41deaae36
commit 9255467091
38 changed files with 160 additions and 142 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 885 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 923 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -42,6 +42,12 @@ const networks = [
url: 'https://ddfnetwork.com',
description: 'European porn videos hub with exclusive VR, 4K and full HD XXX videos and hot sex photos of Europes finest porn star babes.',
},
{
slug: 'digitalplayground',
name: 'Digital Playground',
url: 'https://www.digitalplayground.com',
description: 'DigitalPlayground.com is the leader in high quality adult blockbuster movies and award winning sex parodies that feature the most exclusive pornstars online! Adult Film Database of adult movies.',
},
{
slug: 'dogfartnetwork',
name: 'Dogfart Network',
@ -55,6 +61,12 @@ const networks = [
url: 'https://evilangel.com',
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
},
{
slug: 'fakehub',
name: 'Fake Hub',
url: 'https://www.fakehub.com',
description: 'Wherever they go, there is porn. Hospital, Taxis, Casting… Maybe fucking to a fake cop, fake agent or fake taxi driver. And we record it all.',
},
{
slug: 'jayrock',
name: 'JayRock Productions',

View File

@ -1008,6 +1008,49 @@ function getSites(networksMap) {
description: 'Fantasy Blowjobs & POV Cock Sucking Videos and Photos Produced in VR, 4K and full HD featuring Sexy European Pornstars',
network_id: networksMap.ddfnetwork,
},
// FAKE HUB
{
slug: 'episodes',
name: 'Episodes',
url: 'https://www.digitalplayground.com/scenes?site=206',
description: '',
network_id: networksMap.digitalplayground,
},
{
slug: 'flixxx',
name: 'Flixxx',
url: 'https://www.digitalplayground.com/scenes?site=207',
description: '',
network_id: networksMap.digitalplayground,
},
{
slug: 'rawcuts',
name: 'Raw Cuts',
url: 'https://www.digitalplayground.com/scenes?site=208',
description: '',
network_id: networksMap.digitalplayground,
},
{
slug: 'dpstarepisodes',
name: 'DP Star Episodes',
url: 'https://www.digitalplayground.com/scenes?site=209',
description: '',
network_id: networksMap.digitalplayground,
},
{
slug: 'blockbuster',
name: 'Blockbuster',
url: 'https://www.digitalplayground.com/scenes?site=211',
description: '',
network_id: networksMap.digitalplayground,
},
{
slug: 'dpparodies',
name: 'DP Parodies',
url: 'https://www.digitalplayground.com/scenes?site=212',
description: '',
network_id: networksMap.digitalplayground,
},
// DOGFART NETWORK
{
slug: 'blacksonblondes',
@ -1179,6 +1222,84 @@ function getSites(networksMap) {
parameters: JSON.stringify({ independent: true }),
network_id: networksMap.evilangel,
},
// FAKE HUB
{
slug: 'fakeagentuk',
name: 'Fake Agent UK',
url: 'https://www.fakehub.com/scenes?site=277',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakecop',
name: 'Fake Cop',
url: 'https://www.fakehub.com/scenes?site=278',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakehospital',
name: 'Fake Hospital',
url: 'https://www.fakehub.com/scenes?site=279',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakeagent',
name: 'Fake Agent',
url: 'https://www.fakehub.com/scenes?site=280',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'faketaxi',
name: 'Fake Taxi',
url: 'https://www.fakehub.com/scenes?site=281',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'publicagent',
name: 'Public Agent',
url: 'https://www.fakehub.com/scenes?site=282',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'femaleagent',
name: 'Female Agent',
url: 'https://www.fakehub.com/scenes?site=283',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'femalefaketaxi',
name: 'Female Fake Taxi',
url: 'https://www.fakehub.com/scenes?site=284',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakedrivingschool',
name: 'Fake Driving School',
url: 'https://www.fakehub.com/scenes?site=285',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakehuboriginals',
name: 'Fake Hub Originals',
url: 'https://www.fakehub.com/scenes?site=287',
description: '',
network_id: networksMap.fakehub,
},
{
slug: 'fakehostel',
name: 'Fake Hostel',
url: 'https://www.fakehub.com/scenes?site=288',
description: '',
network_id: networksMap.fakehub,
},
// JAYS POV
{
slug: 'jayspov',

View File

@ -637,6 +637,11 @@ function getTags(groupsMap) {
slug: 'pain',
alias_for: null,
},
{
name: 'parody',
slug: 'parody',
alias_for: null,
},
{
name: 'pegging',
slug: 'pegging',
@ -1574,6 +1579,7 @@ function getSiteTags() {
divinebitches: ['femdom'],
familystrokes: ['family'],
menonedge: ['gay'],
dpparodies: ['parody'],
submissived: ['bdsm'],
swallowed: ['blowjob', 'deepthroat', 'facefucking'],
trueanal: ['anal'],

View File

@ -0,0 +1,8 @@
'use strict';
const { fetchScene, fetchLatest } = require('./mindgeek');
module.exports = {
fetchLatest,
fetchScene,
};

8
src/scrapers/fakehub.js Normal file
View File

@ -0,0 +1,8 @@
'use strict';
const { fetchScene, fetchLatest } = require('./mindgeek');
module.exports = {
fetchLatest,
fetchScene,
};

View File

@ -1,147 +1,6 @@
'use strict';
const Promise = require('bluebird');
const bhttp = require('bhttp');
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
const moment = require('moment');
const { fetchSites } = require('../sites');
const { cookieToData } = require('../utils/cookies');
const { matchTags } = require('../tags');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
return [];
}
/* eslint-disable newline-per-chained-call */
async function scrapeLatest(items, site) {
return Promise.all(items.map(async (data) => {
const { id: entryId, title, description } = data;
const url = `https://www.mofos.com/scene/${entryId}/`;
const date = new Date(data.dateReleased);
const actors = data.actors.map(actor => actor.name);
const rawTags = data.tags.map(tag => tag.name);
const tags = await matchTags(rawTags);
const [poster, ...photos] = getThumbs(data);
const trailer = data.videos.mediabook && (data.videos.mediabook.files['720p'] || data.videos.mediabook.files['320p']);
return {
url,
entryId,
title,
description,
actors,
tags,
poster,
photos,
trailer: {
src: trailer.urls.view,
quality: parseInt(trailer.format, 10),
},
date,
site,
};
}));
}
async function scrapeScene(data, url, site) {
const { id: entryId, title, description } = data;
const date = new Date(data.dateReleased);
const actors = data.actors.map(actor => actor.name);
const rawTags = data.tags.map(tag => tag.name);
const [poster, ...photos] = getThumbs(data);
const trailer = data.videos.mediabook && (data.videos.mediabook.files['720p'] || data.videos.mediabook.files['320p']);
const siteName = data.collections[0].name;
const siteId = data.collections[0].id;
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const siteUrl = `https://www.mofos.com/scenes?site=${siteId}`;
const [[channelSite], tags] = await Promise.all([
site.isFallback
? fetchSites({
slug: siteSlug,
name: siteName,
url: siteUrl,
})
: [site],
matchTags(rawTags),
]);
return {
url,
entryId,
title,
description,
actors,
tags,
poster,
photos,
trailer: {
src: trailer.urls.view,
quality: parseInt(trailer.format, 10),
},
date,
site: channelSite,
};
}
async function fetchLatest(site, page = 1) {
const { search } = new URL(site.url);
const siteId = new URLSearchParams(search).get('site');
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
await session.get(site.url);
const cookieString = await cookieJar.getCookieStringAsync(site.url);
const { instance_token: instanceToken } = cookieToData(cookieString);
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const res = await session.get(`https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`, {
headers: {
Instance: instanceToken,
},
});
return scrapeLatest(res.body.result, site);
}
async function fetchScene(url, site) {
const entryId = url.match(/\d+/)[0];
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
await session.get(url);
const cookieString = await cookieJar.getCookieStringAsync(url);
const { instance_token: instanceToken } = cookieToData(cookieString);
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
headers: {
Instance: instanceToken,
},
});
return scrapeScene(res.body.result, url, site);
}
const { fetchScene, fetchLatest } = require('./mindgeek');
module.exports = {
fetchLatest,

View File

@ -7,7 +7,9 @@ const bang = require('./bang');
const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const dogfart = require('./dogfart');
const digitalplayground = require('./digitalplayground');
const evilangel = require('./evilangel');
const fakehub = require('./fakehub');
const jayrock = require('./jayrock');
const kink = require('./kink');
const mikeadriano = require('./mikeadriano');
@ -41,9 +43,11 @@ module.exports = {
blowpass,
brazzers,
ddfnetwork,
digitalplayground,
dogfart,
dogfartnetwork: dogfart,
evilangel,
fakehub,
jayrock,
julesjordan,
kellymadison,