Moved all of ExploitedX and Nebraska Coeds into generic ElevatedX scraper.

This commit is contained in:
DebaucheryLibrarian 2021-02-14 01:53:03 +01:00
parent f79505f3f6
commit 372b5da704
34 changed files with 251 additions and 252 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

View File

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 33 KiB

View File

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 33 KiB

View File

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 33 KiB

View File

Before

Width:  |  Height:  |  Size: 94 KiB

After

Width:  |  Height:  |  Size: 94 KiB

View File

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 68 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

Before

Width:  |  Height:  |  Size: 80 KiB

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

View File

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 170 KiB

View File

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -173,9 +173,22 @@ const networks = [
name: 'Marc Dorcel',
url: 'https://www.dorcel.com',
},
{
slug: 'evilangel',
name: 'Evil Angel',
url: 'https://www.evilangel.com',
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
parent: 'gamma',
parameters: {
layout: 'api',
},
},
{
slug: 'exploitedx',
name: 'ExpoitedX',
parameters: {
layout: 'tubular',
},
},
{
slug: 'freeones',
@ -210,16 +223,6 @@ const networks = [
url: 'https://dogfartnetwork.com',
description: 'The world famous Dogfart Interracial series. Online since 1996, we have the largest collection of Interracial videos, pictures and content on the web.',
},
{
slug: 'evilangel',
name: 'Evil Angel',
url: 'https://www.evilangel.com',
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
parent: 'gamma',
parameters: {
layout: 'api',
},
},
{
slug: 'fantasymassage',
name: 'Fantasy Massage',
@ -411,6 +414,14 @@ const networks = [
url: 'https://www.naughtyamerica.com',
description: 'The best porn movies daily at Naughty America! Experience the most seductive porn stars in stunning virtual reality, 4K and HD porn videos!',
},
{
slug: 'nebraskacoeds',
name: 'Nebraska Coeds',
url: 'https://nebraskacoeds.com',
parameters: {
layout: 'classic',
},
},
{
slug: 'newsensations',
name: 'New Sensations',

View File

@ -2762,8 +2762,8 @@ const sites = [
url: 'https://exploitedcollegegirls.com',
parent: 'exploitedx',
parameters: {
layout: 'blog',
latest: 'https://www.exploitedcollegegirls.com/site/categories/movies_{page}_d.html',
latest: 'https://exploitedcollegegirls.com/site/categories/movies_{page}_d.html',
profile: 'https://exploitedcollegegirls.com/site/models/{actorSlug}.html',
},
},
{
@ -2772,8 +2772,8 @@ const sites = [
url: 'https://backroomcastingcouch.com',
parent: 'exploitedx',
parameters: {
layout: 'blog',
latest: 'https://www.backroomcastingcouch.com/site/categories/movies_{page}_d.html',
latest: 'https://backroomcastingcouch.com/site/categories/movies_{page}_d.html',
profile: 'https://backroomcastingcouch.com/site/models/{actorSlug}.html',
},
},
{
@ -2782,39 +2782,6 @@ const sites = [
alias: ['interracial', 'bbc'],
url: 'https://blackambush.com',
parent: 'exploitedx',
parameters: {
layout: 'blog',
},
},
{
name: 'Nebraska Coeds',
slug: 'nebraskacoeds',
url: 'https://nebraskacoeds.com',
parent: 'exploitedx',
},
{
name: 'South Beach Coeds',
slug: 'southbeachcoeds',
url: 'https://southbeachcoeds.com',
parent: 'exploitedx',
},
{
name: 'Spring Break Life',
slug: 'springbreaklife',
url: 'https://springbreaklife.com',
parent: 'exploitedx',
},
{
name: 'Euro Coeds',
slug: 'eurocoeds',
url: 'https://eurocoeds.com',
parent: 'exploitedx',
},
{
name: 'After Hours Exposed',
slug: 'afterhoursexposed',
url: 'https://afterhoursexposed.com',
parent: 'exploitedx',
},
// FIRST ANAL QUEST
{
@ -5478,6 +5445,37 @@ const sites = [
url: 'https://www.naughtyamerica.com/site/tonight-s-fuck',
parent: 'naughtyamerica',
},
// NEBRASKA COEDS
{
name: 'Nebraska Coeds',
slug: 'nebraskacoeds',
url: 'https://nebraskacoeds.com',
parent: 'nebraskacoeds',
},
{
name: 'South Beach Coeds',
slug: 'southbeachcoeds',
url: 'https://southbeachcoeds.com',
parent: 'nebraskacoeds',
},
{
name: 'Spring Break Life',
slug: 'springbreaklife',
url: 'https://springbreaklife.com',
parent: 'nebraskacoeds',
},
{
name: 'Euro Coeds',
slug: 'eurocoeds',
url: 'https://eurocoeds.com',
parent: 'nebraskacoeds',
},
{
name: 'After Hours Exposed',
slug: 'afterhoursexposed',
url: 'https://afterhoursexposed.com',
parent: 'nebraskacoeds',
},
// NEW SENSATIONS
{
slug: 'hotwifexxx',

View File

@ -376,7 +376,7 @@ async function curateProfile(profile, actor) {
curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
// combined measurement value
const measurements = profile.measurements?.match(/(\d+)(\w+)-(\d+)-(\d+)/);
const measurements = profile.measurements?.match(/(\d+)(\w+)[-x](\d+)[-x](\d+)/); // ExCoGi uses x
if (measurements) {
curatedProfile.bust = Number(measurements[1]);

View File

@ -4,6 +4,7 @@ const format = require('template-format');
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function deriveEntryId(release) {
if (release.date && release.url) {
@ -19,6 +20,42 @@ function deriveEntryId(release) {
return null;
}
function extractPoster(posterPath, channel, baseRelease) {
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = qu.prefixUrl(posterPath, channel.parameters?.media || channel.url);
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
return [posterSources, []];
}
return [baseRelease?.poster || null, []];
}
function getImageWithFallbacks(q, selector, site, el) {
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
}
function scrapeAllClassic(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
@ -79,6 +116,106 @@ function scrapeAllTubular(scenes, channel, accNetworkReleases) {
});
}
function scrapeSceneClassic({ query, html }, url, channel) {
const release = {};
release.title = query.q('.updatesBlock h2', true);
release.poster = query.meta('property="og:image"');
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
}
function scrapeSceneTubular({ query, html }, entity, url, baseRelease) {
const release = {};
release.title = query.q('.trailer-section-head .section-title, .title-block .section-title', true);
release.description = query.text('.row .update-info-block');
release.date = query.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = query.dur('.update-info-row:nth-child(2)');
release.actors = query.all('.models-list-thumbs a').map(el => ({
name: query.cnt(el, 'span'),
avatar: getImageWithFallbacks(query.q, 'img', entity, el),
url: query.url(el, null),
}));
release.tags = query.all('.tags a', true);
const posterPath = query.q('.player-thumb img', 'src0_1x');
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, entity, baseRelease);
if (trailer) {
release.trailer = { src: qu.prefixUrl(trailer, entity.parameters?.media || entity.url), referer: url };
}
const stars = query.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
if (entity.type === 'network') {
const channelRegExp = new RegExp(entity.children.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channel) {
release.channel = slugify(channel, '');
}
}
release.entryId = deriveEntryId(release);
return release;
}
async function scrapeProfile({ query }, entity, parameters) {
const profile = {};
const bio = query.cnt('.model_bio, .detail-div');
const avatarEl = query.q('.model_bio_pic img, .model_bio_thumb');
profile.age = Number(bio?.match(/Age:\s*(\d{2})/)?.[1]) || null;
profile.dateOfBirth = qu.parseDate(bio?.match(/Age:\s*(\w+ \d{1,2}, \d{4})/)?.[0], 'MMMM D, YYYY');
profile.height = convert(bio?.match(/\d+\s*(feet|')\s*\d+\s*(inches|"|$)/)?.[0], 'cm');
profile.measurements = bio?.match(/\w+[-x]\d+[-x]\d+/)?.[0] || null;
profile.aliases = bio?.match(/also known as:\s*([\w\s]+(,\s*)?)+/i)?.[1].split(/,\s*/) || [];
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
]
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map(avatar => qu.prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
if (parameters?.layout === 'classic') {
profile.scenes = scrapeAllClassic(qu.initAll(query.all('.bodyArea .updateItem')), entity);
}
if (parameters?.layout === 'tubular') {
profile.scenes = scrapeAllTubular(qu.initAll(query.all('.modelfeature, .item-video')), entity);
}
return profile;
}
async function fetchLatest(site, page = 1, options, preData, allScraper) {
const url = (site.parameters?.latest && format(site.parameters.latest, { page }))
|| `${site.url}/categories/movies_${page}_d.html`;
@ -92,6 +229,16 @@ async function fetchLatest(site, page = 1, options, preData, allScraper) {
return allScraper(res.items, site, preData?.uniqueReleases);
}
async function fetchUpcomingClassic(channel) {
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
if (res.ok) {
return scrapeAllClassic(res.items, channel);
}
return res.status;
}
async function fetchLatestClassic(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllClassic);
}
@ -100,13 +247,53 @@ async function fetchLatestTubular(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllTubular);
}
async function fetchProfile({ name: actorName, url }, { entity, parameters }) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName, '-');
if (!url && !parameters?.profile && !entity.url) {
return null;
}
const urls = Array.from(new Set([
url,
entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugA }) : `${entity.url}/models/${actorSlugA}.html`,
entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugB }) : `${entity.url}/models/${actorSlugB}.html`,
]));
return urls.reduce(async (chain, profileUrl) => {
const profile = await chain;
if (profile) {
return profile;
}
if (!profileUrl) {
return null;
}
const res = await qu.get(profileUrl);
if (res.statusCode === 200) {
return scrapeProfile(res.item, entity, parameters);
}
return null;
}, Promise.resolve());
}
module.exports = {
classic: {
fetchLatest: fetchLatestClassic,
fetchUpcoming: fetchUpcomingClassic,
fetchProfile,
scrapeAll: scrapeAllClassic,
scrapeScene: scrapeSceneClassic,
},
tubular: {
fetchLatest: fetchLatestTubular,
fetchProfile,
scrapeAll: scrapeAllTubular,
scrapeScene: scrapeSceneTubular,
},
};

View File

@ -1,201 +0,0 @@
'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
const elevatedx = require('./elevatedx');
function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
if (release.url) {
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
} else {
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
}
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
release.description = query.text('p');
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
// remove common patterns so only the name is left
const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim();
if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) {
// scene title is probably the actor name
release.actors = [release.title];
}
release.poster = query.img('.bigthumb', null, { origin: channel.url });
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
release.tags = query.all('a[href*="/keywords"]', true);
return release;
});
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
release.entryId = query.url('.updateThumb img', 'alt');
release.title = query.q('.updateInfo h5 a', true);
release.actors = query.all('.tour_update_models a', true);
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
release.poster = query.img('.updateThumb img');
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
});
}
function scrapeSceneBlog({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
release.title = query.text('h4 strong, .videos h3');
release.description = query.q('#about p, .videos p', true);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
if (actors.length > 0) {
release.actors = actors;
} else {
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
release.actors = [release.title];
}
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
return release;
}
function scrapeScene({ query, html }, url, channel) {
const release = {};
release.title = query.q('.updatesBlock h2', true);
release.poster = query.meta('property="og:image"');
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
const bio = query.cnts('.info p').reduce((acc, info) => {
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
return { ...acc, [slugify(key, '_')]: value };
}, {});
profile.age = Number(bio.age);
profile.height = feetInchesToCm(bio.height);
profile.eyes = bio.eyes || bio.eyecolor;
if (bio.figure || bio.measurements) {
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
profile.bust = Number(bust);
profile.cup = cup;
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
return profile;
}
async function fetchLatestBlog(channel, page) {
/*
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const url = `${channel.parameters?.latest || channel.url}/movies_${page}_d.html`;
const res = await qu.getAll(url, '.item-update');
*/
const scenes = await elevatedx.tubular.fetchLatest(channel, page);
console.log(scenes);
// return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
}
async function fetchLatest(channel, page = 1) {
/*
const url = `${channel.url}/categories/Movies_${page}_d.html`;
const res = await qu.getAll(url, '.bodyArea .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
*/
const scenes = await elevatedx.classic.fetchLatest(channel, page);
return scenes;
}
async function fetchUpcoming(channel) {
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchProfile(baseActor, entity) {
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
if (modelsRes.ok) {
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
return Promise.all(models.map(async (model) => {
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
const modelRes = await qu.get(modelUrl);
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}));
}
return modelsRes.status;
}
module.exports = {
fetchLatest: elevatedx.classic.fetchLatest,
fetchUpcoming,
fetchProfile,
scrapeScene,
blog: {
fetchLatest: elevatedx.tubular.fetchLatest,
scrapeScene: scrapeSceneBlog,
},
};

View File

@ -18,7 +18,7 @@ const dorcel = require('./dorcel');
const elegantangel = require('./elegantangel');
const famedigital = require('./famedigital');
const firstanalquest = require('./firstanalquest');
const exploitedx = require('./exploitedx');
const elevatedx = require('./elevatedx');
const fullpornnetwork = require('./fullpornnetwork');
const gamma = require('./gamma');
const hitzefrei = require('./hitzefrei');
@ -87,7 +87,7 @@ const scrapers = {
dorcel,
elegantangel,
famedigital,
exploitedx,
exploitedx: elevatedx,
firstanalquest,
forbondage: porndoe,
fullpornnetwork,
@ -116,6 +116,7 @@ const scrapers = {
mikeadriano,
mindgeek,
naughtyamerica,
nebraskacoeds: elevatedx,
newsensations,
nubiles,
pascalssubsluts,
@ -156,6 +157,7 @@ const scrapers = {
aziani,
babes: mindgeek,
babevr: badoink,
backroomcastingcouch: elevatedx,
baddaddypov: fullpornnetwork,
badoinkvr: badoink,
bamvisions,
@ -163,6 +165,7 @@ const scrapers = {
bangbros,
blacked: vixen,
blackedraw: vixen,
blackambush: elevatedx,
blowpass,
boobpedia,
brattysis: nubiles,
@ -179,7 +182,7 @@ const scrapers = {
dtfsluts: fullpornnetwork,
elegantangel,
evilangel: gamma,
exploitedcollegegirls: exploitedx,
exploitedcollegegirls: elevatedx,
eyeontheguy: hush,
fakehub: mindgeek,
firstanalquest,
@ -218,6 +221,7 @@ const scrapers = {
mofos: mindgeek,
mugfucked: fullpornnetwork,
naughtyamerica,
nebraskacoeds: elevatedx,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,