Added 21Sextreme and 21Naturals networks. Scraping all actor release pages for Brazzers.

This commit is contained in:
2020-02-09 02:01:39 +01:00
parent 885f51943a
commit e61ed2bb5f
24 changed files with 2664 additions and 59 deletions

View File

@@ -0,0 +1,10 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -0,0 +1,10 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -6,6 +6,8 @@ const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { get, ex } = require('../utils/q');
const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
const hairMap = {
@@ -15,7 +17,7 @@ const hairMap = {
Redhead: 'red',
};
function scrape(html, site, upcoming) {
function scrapeAll(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
@@ -41,6 +43,8 @@ function scrape(html, site, upcoming) {
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
return acc.concat({
url,
entryId,
@@ -53,66 +57,55 @@ function scrape(html, site, upcoming) {
likes,
dislikes,
},
channel,
site,
});
}, []);
}
async function scrapeScene(html, url, site) {
async function scrapeScene(html, url, _site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
const entryId = url.split('/').slice(-3, -2)[0];
const title = $('.scene-title[itemprop="name"]').text();
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
const description = $('#scene-description p[itemprop="description"]')
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
const date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
const actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
const likes = Number($('.label-rating .like').text());
const dislikes = Number($('.label-rating .dislike').text());
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
const channel = siteName.replace(/\s+/g, '').toLowerCase();
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const poster = `https:${videoData.poster}`;
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
return {
url,
entryId,
title,
description,
actors,
date,
poster,
photos,
trailer,
duration,
rating: {
likes,
dislikes,
},
tags,
site,
channel,
};
return release;
}
function scrapeActorSearch(html, url, actorName) {
@@ -122,13 +115,26 @@ function scrapeActorSearch(html, url, actorName) {
return actorLink ? actorLink.href : null;
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
async function fetchActorReleases({ qu, html }, accReleases = []) {
const releases = scrapeAll(html);
const next = qu('.pagination .next a');
const avatarEl = document.querySelector('.big-pic-model-container img');
const descriptionEl = document.querySelector('.model-profile-specs p');
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
if (next) {
const url = `https://www.brazzers.com${next}`;
const qNext = await get(url);
return fetchActorReleases(qNext, accReleases.concat(releases));
}
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName) {
const qProfile = ex(html);
const { q, qa } = qProfile;
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
@@ -136,6 +142,8 @@ function scrapeProfile(html, url, actorName) {
name: actorName,
};
profile.description = q('.model-profile-specs p', true);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
@@ -152,10 +160,10 @@ function scrapeProfile(html, url, actorName) {
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
profile.releases = await fetchActorReleases(qProfile);
return profile;
}
@@ -163,13 +171,13 @@ function scrapeProfile(html, url, actorName) {
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
return scrape(res.body.toString(), site, false);
return scrapeAll(res.body.toString(), site, false);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/`);
return scrape(res.body.toString(), site, true);
return scrapeAll(res.body.toString(), site, true);
}
async function fetchScene(url, site) {

View File

@@ -186,7 +186,7 @@ async function scrapeScene(html, url, site) {
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
const dateString = $('.updatedDate').first().text().trim();
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
const dateMatch = dateString.match(/\d{2,4}[-/]\d{2}[-/]\d{2,4}/)?.[0];
if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();

View File

@@ -16,6 +16,7 @@ const famedigital = require('./famedigital');
const fantasymassage = require('./fantasymassage');
const freeones = require('./freeones');
const freeonesLegacy = require('./freeones_legacy');
const girlsway = require('./girlsway');
const iconmale = require('./iconmale');
const jayrock = require('./jayrock');
const julesjordan = require('./julesjordan');
@@ -28,7 +29,7 @@ const mikeadriano = require('./mikeadriano');
const milehighmedia = require('./milehighmedia');
const mindgeek = require('./mindgeek');
const mofos = require('./mofos');
const girlsway = require('./girlsway');
const naturals = require('./21naturals');
const naughtyamerica = require('./naughtyamerica');
const perfectgonzo = require('./perfectgonzo');
const pervcity = require('./pervcity');
@@ -38,9 +39,10 @@ const privateNetwork = require('./private'); // reserved keyword
const puretaboo = require('./puretaboo');
const realitykings = require('./realitykings');
const score = require('./score');
const sextreme = require('./21sextreme');
const sextury = require('./21sextury');
const teamskeet = require('./teamskeet');
const transangels = require('./transangels');
const twentyonesextury = require('./21sextury');
const twistys = require('./twistys');
const vixen = require('./vixen');
const vogov = require('./vogov');
@@ -50,7 +52,9 @@ const xempire = require('./xempire');
module.exports = {
releases: {
adulttime,
'21sextury': twentyonesextury,
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
babes,
bang,
bangbros,
@@ -93,7 +97,7 @@ module.exports = {
},
actors: {
// ordered by data priority
'21sextury': twentyonesextury,
'21sextury': sextury,
babes,
bangbros,
blowpass,