forked from DebaucheryLibrarian/traxxx
Added 21Sextreme and 21Naturals networks. Scraping all actor release pages for Brazzers.
This commit is contained in:
10
src/scrapers/21naturals.js
Normal file
10
src/scrapers/21naturals.js
Normal file
@@ -0,0 +1,10 @@
|
||||
'use strict';
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
10
src/scrapers/21sextreme.js
Normal file
10
src/scrapers/21sextreme.js
Normal file
@@ -0,0 +1,10 @@
|
||||
'use strict';
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
@@ -6,6 +6,8 @@ const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, ex } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
const hairMap = {
|
||||
@@ -15,7 +17,7 @@ const hairMap = {
|
||||
Redhead: 'red',
|
||||
};
|
||||
|
||||
function scrape(html, site, upcoming) {
|
||||
function scrapeAll(html, site, upcoming) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
|
||||
@@ -41,6 +43,8 @@ function scrape(html, site, upcoming) {
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
|
||||
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
@@ -53,66 +57,55 @@ function scrape(html, site, upcoming) {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
const title = $('.scene-title[itemprop="name"]').text();
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
|
||||
const description = $('#scene-description p[itemprop="description"]')
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
const date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
|
||||
const likes = Number($('.label-rating .like').text());
|
||||
const dislikes = Number($('.label-rating .dislike').text());
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
const channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
const poster = `https:${videoData.poster}`;
|
||||
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
|
||||
const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
trailer,
|
||||
duration,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
@@ -122,13 +115,26 @@ function scrapeActorSearch(html, url, actorName) {
|
||||
return actorLink ? actorLink.href : null;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu('.pagination .next a');
|
||||
|
||||
const avatarEl = document.querySelector('.big-pic-model-container img');
|
||||
const descriptionEl = document.querySelector('.model-profile-specs p');
|
||||
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const qNext = await get(url);
|
||||
|
||||
return fetchActorReleases(qNext, accReleases.concat(releases));
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName) {
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
@@ -136,6 +142,8 @@ function scrapeProfile(html, url, actorName) {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
@@ -152,10 +160,10 @@ function scrapeProfile(html, url, actorName) {
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
@@ -163,13 +171,13 @@ function scrapeProfile(html, url, actorName) {
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
|
||||
return scrape(res.body.toString(), site, false);
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
|
||||
return scrape(res.body.toString(), site, true);
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
|
||||
@@ -186,7 +186,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
|
||||
const dateString = $('.updatedDate').first().text().trim();
|
||||
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
|
||||
const dateMatch = dateString.match(/\d{2,4}[-/]\d{2}[-/]\d{2,4}/)?.[0];
|
||||
|
||||
if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
|
||||
else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
|
||||
|
||||
@@ -16,6 +16,7 @@ const famedigital = require('./famedigital');
|
||||
const fantasymassage = require('./fantasymassage');
|
||||
const freeones = require('./freeones');
|
||||
const freeonesLegacy = require('./freeones_legacy');
|
||||
const girlsway = require('./girlsway');
|
||||
const iconmale = require('./iconmale');
|
||||
const jayrock = require('./jayrock');
|
||||
const julesjordan = require('./julesjordan');
|
||||
@@ -28,7 +29,7 @@ const mikeadriano = require('./mikeadriano');
|
||||
const milehighmedia = require('./milehighmedia');
|
||||
const mindgeek = require('./mindgeek');
|
||||
const mofos = require('./mofos');
|
||||
const girlsway = require('./girlsway');
|
||||
const naturals = require('./21naturals');
|
||||
const naughtyamerica = require('./naughtyamerica');
|
||||
const perfectgonzo = require('./perfectgonzo');
|
||||
const pervcity = require('./pervcity');
|
||||
@@ -38,9 +39,10 @@ const privateNetwork = require('./private'); // reserved keyword
|
||||
const puretaboo = require('./puretaboo');
|
||||
const realitykings = require('./realitykings');
|
||||
const score = require('./score');
|
||||
const sextreme = require('./21sextreme');
|
||||
const sextury = require('./21sextury');
|
||||
const teamskeet = require('./teamskeet');
|
||||
const transangels = require('./transangels');
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const twistys = require('./twistys');
|
||||
const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
@@ -50,7 +52,9 @@ const xempire = require('./xempire');
|
||||
module.exports = {
|
||||
releases: {
|
||||
adulttime,
|
||||
'21sextury': twentyonesextury,
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
babes,
|
||||
bang,
|
||||
bangbros,
|
||||
@@ -93,7 +97,7 @@ module.exports = {
|
||||
},
|
||||
actors: {
|
||||
// ordered by data priority
|
||||
'21sextury': twentyonesextury,
|
||||
'21sextury': sextury,
|
||||
babes,
|
||||
bangbros,
|
||||
blowpass,
|
||||
|
||||
Reference in New Issue
Block a user