Added 21Sextreme and 21Naturals networks. Scraping all actor release pages for Brazzers.

This commit is contained in:
ThePendulum 2020-02-09 02:01:39 +01:00
parent 885f51943a
commit e61ed2bb5f
24 changed files with 2664 additions and 59 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

View File

@ -8,6 +8,18 @@ const networks = [
url: 'https://www.21sextury.com',
description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.',
},
{
slug: '21sextreme',
name: '21Sextreme',
url: 'https://www.21sextreme.com',
description: 'Welcome to 21Sextreme.com, your portal to fisting porn, old and young lesbians, horny grannies & extreme BDSM featuring the best Euro & American Pornstars',
},
{
slug: '21naturals',
name: '21Naturals',
url: 'https://www.21naturals.com',
description: 'Welcome to 21Naturals.com, the porn network featuring the hottest pornstars from all over the world in all natural porn and erotic sex videos. Watch thousands of girls with natural tits',
},
{
slug: 'adulttime',
name: 'Adult Time',

View File

@ -2,7 +2,74 @@ const upsert = require('../src/utils/upsert');
/* eslint-disable max-len */
const sites = [
// 21Sextury
// 21SEXTREME
{
slug: 'grandpasfuckteens',
name: 'Grandpas Fuck Teens',
url: 'https://grandpasfuckteens.21sextreme.com',
network: '21sextreme',
},
{
slug: 'oldyounglesbianlove',
name: 'Old Young Lesbian Love',
url: 'https://oldyounglesbianlove.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
{
slug: 'lustygrandmas',
name: 'Lusty Grandmas',
url: 'https://lustygrandmas.21sextreme.com',
network: '21sextreme',
},
{
slug: 'teachmefisting',
name: 'Teach Me Fisting',
url: 'https://teachmefisting.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
{
slug: 'zoliboy',
name: 'Zoliboy',
url: 'https://zoliboy.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
{
slug: 'mightymistress',
name: 'Mighty Mistress',
url: 'https://mightymistress.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
{
slug: 'dominatedgirls',
name: 'Dominated Girls',
url: 'https://dominatedgirls.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
{
slug: 'homepornreality',
name: 'Home Porn Reality',
url: 'https://homepornreality.21sextreme.com',
network: '21sextreme',
parameters: {
scene: 'https://21sextreme.com/en/video',
},
},
// 21SEXTURY
{
slug: 'analteenangels',
name: 'Anal Teen Angels',

View File

@ -12,6 +12,14 @@ const { scrapeRelease } = require('./scrape-releases');
const { storeReleases } = require('./releases');
function getAfterDate() {
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
// using date
return moment
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
.toDate();
}
// using time distance (e.g. "1 month")
return moment
.utc()
.subtract(...argv.after.split(' '))
@ -83,11 +91,17 @@ async function deepFetchReleases(baseReleases) {
try {
const fullRelease = await scrapeRelease(release.url, release, 'scene');
return {
...release,
...fullRelease,
deep: true,
};
if (fullRelease) {
return {
...release,
...fullRelease,
deep: true,
};
}
logger.warn(`Release scraper returned empty result for ${release.url}`);
return release;
} catch (error) {
logger.error(error.stack);

View File

@ -0,0 +1,10 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@ -0,0 +1,10 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@ -6,6 +6,8 @@ const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { get, ex } = require('../utils/q');
const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
const hairMap = {
@ -15,7 +17,7 @@ const hairMap = {
Redhead: 'red',
};
function scrape(html, site, upcoming) {
function scrapeAll(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
@ -41,6 +43,8 @@ function scrape(html, site, upcoming) {
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
return acc.concat({
url,
entryId,
@ -53,66 +57,55 @@ function scrape(html, site, upcoming) {
likes,
dislikes,
},
channel,
site,
});
}, []);
}
async function scrapeScene(html, url, site) {
async function scrapeScene(html, url, _site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
const entryId = url.split('/').slice(-3, -2)[0];
const title = $('.scene-title[itemprop="name"]').text();
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
const description = $('#scene-description p[itemprop="description"]')
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
const date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
const actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
const likes = Number($('.label-rating .like').text());
const dislikes = Number($('.label-rating .dislike').text());
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
const channel = siteName.replace(/\s+/g, '').toLowerCase();
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const poster = `https:${videoData.poster}`;
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
return {
url,
entryId,
title,
description,
actors,
date,
poster,
photos,
trailer,
duration,
rating: {
likes,
dislikes,
},
tags,
site,
channel,
};
return release;
}
function scrapeActorSearch(html, url, actorName) {
@ -122,13 +115,26 @@ function scrapeActorSearch(html, url, actorName) {
return actorLink ? actorLink.href : null;
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
async function fetchActorReleases({ qu, html }, accReleases = []) {
const releases = scrapeAll(html);
const next = qu('.pagination .next a');
const avatarEl = document.querySelector('.big-pic-model-container img');
const descriptionEl = document.querySelector('.model-profile-specs p');
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
if (next) {
const url = `https://www.brazzers.com${next}`;
const qNext = await get(url);
return fetchActorReleases(qNext, accReleases.concat(releases));
}
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName) {
const qProfile = ex(html);
const { q, qa } = qProfile;
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
@ -136,6 +142,8 @@ function scrapeProfile(html, url, actorName) {
name: actorName,
};
profile.description = q('.model-profile-specs p', true);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
@ -152,10 +160,10 @@ function scrapeProfile(html, url, actorName) {
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
profile.releases = await fetchActorReleases(qProfile);
return profile;
}
@ -163,13 +171,13 @@ function scrapeProfile(html, url, actorName) {
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
return scrape(res.body.toString(), site, false);
return scrapeAll(res.body.toString(), site, false);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/`);
return scrape(res.body.toString(), site, true);
return scrapeAll(res.body.toString(), site, true);
}
async function fetchScene(url, site) {

View File

@ -186,7 +186,7 @@ async function scrapeScene(html, url, site) {
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
const dateString = $('.updatedDate').first().text().trim();
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
const dateMatch = dateString.match(/\d{2,4}[-/]\d{2}[-/]\d{2,4}/)?.[0];
if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();

View File

@ -16,6 +16,7 @@ const famedigital = require('./famedigital');
const fantasymassage = require('./fantasymassage');
const freeones = require('./freeones');
const freeonesLegacy = require('./freeones_legacy');
const girlsway = require('./girlsway');
const iconmale = require('./iconmale');
const jayrock = require('./jayrock');
const julesjordan = require('./julesjordan');
@ -28,7 +29,7 @@ const mikeadriano = require('./mikeadriano');
const milehighmedia = require('./milehighmedia');
const mindgeek = require('./mindgeek');
const mofos = require('./mofos');
const girlsway = require('./girlsway');
const naturals = require('./21naturals');
const naughtyamerica = require('./naughtyamerica');
const perfectgonzo = require('./perfectgonzo');
const pervcity = require('./pervcity');
@ -38,9 +39,10 @@ const privateNetwork = require('./private'); // reserved keyword
const puretaboo = require('./puretaboo');
const realitykings = require('./realitykings');
const score = require('./score');
const sextreme = require('./21sextreme');
const sextury = require('./21sextury');
const teamskeet = require('./teamskeet');
const transangels = require('./transangels');
const twentyonesextury = require('./21sextury');
const twistys = require('./twistys');
const vixen = require('./vixen');
const vogov = require('./vogov');
@ -50,7 +52,9 @@ const xempire = require('./xempire');
module.exports = {
releases: {
adulttime,
'21sextury': twentyonesextury,
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
babes,
bang,
bangbros,
@ -93,7 +97,7 @@ module.exports = {
},
actors: {
// ordered by data priority
'21sextury': twentyonesextury,
'21sextury': sextury,
babes,
bangbros,
blowpass,