Added 21Sextreme and 21Naturals networks. Scraping all actor release pages for Brazzers.
After Width: | Height: | Size: 1.5 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 2.6 KiB |
After Width: | Height: | Size: 8.4 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 189 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 55 KiB |
|
@ -8,6 +8,18 @@ const networks = [
|
|||
url: 'https://www.21sextury.com',
|
||||
description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.',
|
||||
},
|
||||
{
|
||||
slug: '21sextreme',
|
||||
name: '21Sextreme',
|
||||
url: 'https://www.21sextreme.com',
|
||||
description: 'Welcome to 21Sextreme.com, your portal to fisting porn, old and young lesbians, horny grannies & extreme BDSM featuring the best Euro & American Pornstars',
|
||||
},
|
||||
{
|
||||
slug: '21naturals',
|
||||
name: '21Naturals',
|
||||
url: 'https://www.21naturals.com',
|
||||
description: 'Welcome to 21Naturals.com, the porn network featuring the hottest pornstars from all over the world in all natural porn and erotic sex videos. Watch thousands of girls with natural tits',
|
||||
},
|
||||
{
|
||||
slug: 'adulttime',
|
||||
name: 'Adult Time',
|
||||
|
|
|
@ -2,7 +2,74 @@ const upsert = require('../src/utils/upsert');
|
|||
|
||||
/* eslint-disable max-len */
|
||||
const sites = [
|
||||
// 21Sextury
|
||||
// 21SEXTREME
|
||||
{
|
||||
slug: 'grandpasfuckteens',
|
||||
name: 'Grandpas Fuck Teens',
|
||||
url: 'https://grandpasfuckteens.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
},
|
||||
{
|
||||
slug: 'oldyounglesbianlove',
|
||||
name: 'Old Young Lesbian Love',
|
||||
url: 'https://oldyounglesbianlove.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'lustygrandmas',
|
||||
name: 'Lusty Grandmas',
|
||||
url: 'https://lustygrandmas.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
},
|
||||
{
|
||||
slug: 'teachmefisting',
|
||||
name: 'Teach Me Fisting',
|
||||
url: 'https://teachmefisting.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'zoliboy',
|
||||
name: 'Zoliboy',
|
||||
url: 'https://zoliboy.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'mightymistress',
|
||||
name: 'Mighty Mistress',
|
||||
url: 'https://mightymistress.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'dominatedgirls',
|
||||
name: 'Dominated Girls',
|
||||
url: 'https://dominatedgirls.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'homepornreality',
|
||||
name: 'Home Porn Reality',
|
||||
url: 'https://homepornreality.21sextreme.com',
|
||||
network: '21sextreme',
|
||||
parameters: {
|
||||
scene: 'https://21sextreme.com/en/video',
|
||||
},
|
||||
},
|
||||
// 21SEXTURY
|
||||
{
|
||||
slug: 'analteenangels',
|
||||
name: 'Anal Teen Angels',
|
||||
|
|
|
@ -12,6 +12,14 @@ const { scrapeRelease } = require('./scrape-releases');
|
|||
const { storeReleases } = require('./releases');
|
||||
|
||||
function getAfterDate() {
|
||||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
|
||||
// using date
|
||||
return moment
|
||||
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
|
||||
.toDate();
|
||||
}
|
||||
|
||||
// using time distance (e.g. "1 month")
|
||||
return moment
|
||||
.utc()
|
||||
.subtract(...argv.after.split(' '))
|
||||
|
@ -83,11 +91,17 @@ async function deepFetchReleases(baseReleases) {
|
|||
try {
|
||||
const fullRelease = await scrapeRelease(release.url, release, 'scene');
|
||||
|
||||
return {
|
||||
...release,
|
||||
...fullRelease,
|
||||
deep: true,
|
||||
};
|
||||
if (fullRelease) {
|
||||
return {
|
||||
...release,
|
||||
...fullRelease,
|
||||
deep: true,
|
||||
};
|
||||
}
|
||||
|
||||
logger.warn(`Release scraper returned empty result for ${release.url}`);
|
||||
|
||||
return release;
|
||||
} catch (error) {
|
||||
logger.error(error.stack);
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
|
@ -0,0 +1,10 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
|
@ -6,6 +6,8 @@ const cheerio = require('cheerio');
|
|||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, ex } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
const hairMap = {
|
||||
|
@ -15,7 +17,7 @@ const hairMap = {
|
|||
Redhead: 'red',
|
||||
};
|
||||
|
||||
function scrape(html, site, upcoming) {
|
||||
function scrapeAll(html, site, upcoming) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
|
||||
|
@ -41,6 +43,8 @@ function scrape(html, site, upcoming) {
|
|||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
|
||||
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
|
@ -53,66 +57,55 @@ function scrape(html, site, upcoming) {
|
|||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
const title = $('.scene-title[itemprop="name"]').text();
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
|
||||
const description = $('#scene-description p[itemprop="description"]')
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
const date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
|
||||
const likes = Number($('.label-rating .like').text());
|
||||
const dislikes = Number($('.label-rating .dislike').text());
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
const channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
const poster = `https:${videoData.poster}`;
|
||||
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
|
||||
const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
trailer,
|
||||
duration,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
|
@ -122,13 +115,26 @@ function scrapeActorSearch(html, url, actorName) {
|
|||
return actorLink ? actorLink.href : null;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu('.pagination .next a');
|
||||
|
||||
const avatarEl = document.querySelector('.big-pic-model-container img');
|
||||
const descriptionEl = document.querySelector('.model-profile-specs p');
|
||||
const bioKeys = Array.from(document.querySelectorAll('.profile-spec-list label'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = Array.from(document.querySelectorAll('.profile-spec-list var'), el => el.textContent.replace(/\n+|\s{2,}/g, '').trim());
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const qNext = await get(url);
|
||||
|
||||
return fetchActorReleases(qNext, accReleases.concat(releases));
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName) {
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
|
@ -136,6 +142,8 @@ function scrapeProfile(html, url, actorName) {
|
|||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
|
@ -152,10 +160,10 @@ function scrapeProfile(html, url, actorName) {
|
|||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.release-card-container .scene-card-title a'), el => `https://brazzers.com${el.href}`);
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
@ -163,13 +171,13 @@ function scrapeProfile(html, url, actorName) {
|
|||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
|
||||
return scrape(res.body.toString(), site, false);
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
|
||||
return scrape(res.body.toString(), site, true);
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
|
|
|
@ -186,7 +186,7 @@ async function scrapeScene(html, url, site) {
|
|||
|
||||
// date in data object is not the release date of the scene, but the date the entry was added; only use as fallback
|
||||
const dateString = $('.updatedDate').first().text().trim();
|
||||
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
|
||||
const dateMatch = dateString.match(/\d{2,4}[-/]\d{2}[-/]\d{2,4}/)?.[0];
|
||||
|
||||
if (dateMatch) release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
|
||||
else if (data?.dateCreated) release.date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate();
|
||||
|
|
|
@ -16,6 +16,7 @@ const famedigital = require('./famedigital');
|
|||
const fantasymassage = require('./fantasymassage');
|
||||
const freeones = require('./freeones');
|
||||
const freeonesLegacy = require('./freeones_legacy');
|
||||
const girlsway = require('./girlsway');
|
||||
const iconmale = require('./iconmale');
|
||||
const jayrock = require('./jayrock');
|
||||
const julesjordan = require('./julesjordan');
|
||||
|
@ -28,7 +29,7 @@ const mikeadriano = require('./mikeadriano');
|
|||
const milehighmedia = require('./milehighmedia');
|
||||
const mindgeek = require('./mindgeek');
|
||||
const mofos = require('./mofos');
|
||||
const girlsway = require('./girlsway');
|
||||
const naturals = require('./21naturals');
|
||||
const naughtyamerica = require('./naughtyamerica');
|
||||
const perfectgonzo = require('./perfectgonzo');
|
||||
const pervcity = require('./pervcity');
|
||||
|
@ -38,9 +39,10 @@ const privateNetwork = require('./private'); // reserved keyword
|
|||
const puretaboo = require('./puretaboo');
|
||||
const realitykings = require('./realitykings');
|
||||
const score = require('./score');
|
||||
const sextreme = require('./21sextreme');
|
||||
const sextury = require('./21sextury');
|
||||
const teamskeet = require('./teamskeet');
|
||||
const transangels = require('./transangels');
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const twistys = require('./twistys');
|
||||
const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
|
@ -50,7 +52,9 @@ const xempire = require('./xempire');
|
|||
module.exports = {
|
||||
releases: {
|
||||
adulttime,
|
||||
'21sextury': twentyonesextury,
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
babes,
|
||||
bang,
|
||||
bangbros,
|
||||
|
@ -93,7 +97,7 @@ module.exports = {
|
|||
},
|
||||
actors: {
|
||||
// ordered by data priority
|
||||
'21sextury': twentyonesextury,
|
||||
'21sextury': sextury,
|
||||
babes,
|
||||
bangbros,
|
||||
blowpass,
|
||||
|
|