Moved all of ExploitedX and Nebraska Coeds into generic ElevatedX scraper.

This commit is contained in:
DebaucheryLibrarian
2021-02-14 01:53:03 +01:00
parent f79505f3f6
commit 372b5da704
34 changed files with 251 additions and 252 deletions

View File

@@ -376,7 +376,7 @@ async function curateProfile(profile, actor) {
curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
// combined measurement value
const measurements = profile.measurements?.match(/(\d+)(\w+)-(\d+)-(\d+)/);
const measurements = profile.measurements?.match(/(\d+)(\w+)[-x](\d+)[-x](\d+)/); // ExCoGi uses x
if (measurements) {
curatedProfile.bust = Number(measurements[1]);

View File

@@ -4,6 +4,7 @@ const format = require('template-format');
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function deriveEntryId(release) {
if (release.date && release.url) {
@@ -19,6 +20,42 @@ function deriveEntryId(release) {
return null;
}
function extractPoster(posterPath, channel, baseRelease) {
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = qu.prefixUrl(posterPath, channel.parameters?.media || channel.url);
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
return [posterSources, []];
}
return [baseRelease?.poster || null, []];
}
function getImageWithFallbacks(q, selector, site, el) {
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
}
function scrapeAllClassic(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
@@ -79,6 +116,106 @@ function scrapeAllTubular(scenes, channel, accNetworkReleases) {
});
}
function scrapeSceneClassic({ query, html }, url, channel) {
const release = {};
release.title = query.q('.updatesBlock h2', true);
release.poster = query.meta('property="og:image"');
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
}
function scrapeSceneTubular({ query, html }, entity, url, baseRelease) {
const release = {};
release.title = query.q('.trailer-section-head .section-title, .title-block .section-title', true);
release.description = query.text('.row .update-info-block');
release.date = query.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = query.dur('.update-info-row:nth-child(2)');
release.actors = query.all('.models-list-thumbs a').map(el => ({
name: query.cnt(el, 'span'),
avatar: getImageWithFallbacks(query.q, 'img', entity, el),
url: query.url(el, null),
}));
release.tags = query.all('.tags a', true);
const posterPath = query.q('.player-thumb img', 'src0_1x');
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, entity, baseRelease);
if (trailer) {
release.trailer = { src: qu.prefixUrl(trailer, entity.parameters?.media || entity.url), referer: url };
}
const stars = query.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
if (entity.type === 'network') {
const channelRegExp = new RegExp(entity.children.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channel) {
release.channel = slugify(channel, '');
}
}
release.entryId = deriveEntryId(release);
return release;
}
async function scrapeProfile({ query }, entity, parameters) {
const profile = {};
const bio = query.cnt('.model_bio, .detail-div');
const avatarEl = query.q('.model_bio_pic img, .model_bio_thumb');
profile.age = Number(bio?.match(/Age:\s*(\d{2})/)?.[1]) || null;
profile.dateOfBirth = qu.parseDate(bio?.match(/Age:\s*(\w+ \d{1,2}, \d{4})/)?.[0], 'MMMM D, YYYY');
profile.height = convert(bio?.match(/\d+\s*(feet|')\s*\d+\s*(inches|"|$)/)?.[0], 'cm');
profile.measurements = bio?.match(/\w+[-x]\d+[-x]\d+/)?.[0] || null;
profile.aliases = bio?.match(/also known as:\s*([\w\s]+(,\s*)?)+/i)?.[1].split(/,\s*/) || [];
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
]
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map(avatar => qu.prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
if (parameters?.layout === 'classic') {
profile.scenes = scrapeAllClassic(qu.initAll(query.all('.bodyArea .updateItem')), entity);
}
if (parameters?.layout === 'tubular') {
profile.scenes = scrapeAllTubular(qu.initAll(query.all('.modelfeature, .item-video')), entity);
}
return profile;
}
async function fetchLatest(site, page = 1, options, preData, allScraper) {
const url = (site.parameters?.latest && format(site.parameters.latest, { page }))
|| `${site.url}/categories/movies_${page}_d.html`;
@@ -92,6 +229,16 @@ async function fetchLatest(site, page = 1, options, preData, allScraper) {
return allScraper(res.items, site, preData?.uniqueReleases);
}
async function fetchUpcomingClassic(channel) {
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
if (res.ok) {
return scrapeAllClassic(res.items, channel);
}
return res.status;
}
async function fetchLatestClassic(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllClassic);
}
@@ -100,13 +247,53 @@ async function fetchLatestTubular(channel, page, options, preData) {
return fetchLatest(channel, page, options, preData, scrapeAllTubular);
}
async function fetchProfile({ name: actorName, url }, { entity, parameters }) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName, '-');
if (!url && !parameters?.profile && !entity.url) {
return null;
}
const urls = Array.from(new Set([
url,
entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugA }) : `${entity.url}/models/${actorSlugA}.html`,
entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugB }) : `${entity.url}/models/${actorSlugB}.html`,
]));
return urls.reduce(async (chain, profileUrl) => {
const profile = await chain;
if (profile) {
return profile;
}
if (!profileUrl) {
return null;
}
const res = await qu.get(profileUrl);
if (res.statusCode === 200) {
return scrapeProfile(res.item, entity, parameters);
}
return null;
}, Promise.resolve());
}
module.exports = {
classic: {
fetchLatest: fetchLatestClassic,
fetchUpcoming: fetchUpcomingClassic,
fetchProfile,
scrapeAll: scrapeAllClassic,
scrapeScene: scrapeSceneClassic,
},
tubular: {
fetchLatest: fetchLatestTubular,
fetchProfile,
scrapeAll: scrapeAllTubular,
scrapeScene: scrapeSceneTubular,
},
};

View File

@@ -1,201 +0,0 @@
'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
const elevatedx = require('./elevatedx');
function scrapeLatestBlog(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url });
if (release.url) {
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
} else {
release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1];
}
release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3');
release.description = query.text('p');
release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/);
// remove common patterns so only the name is left
const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim();
if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) {
// scene title is probably the actor name
release.actors = [release.title];
}
release.poster = query.img('.bigthumb', null, { origin: channel.url });
release.photos = query.imgs('.smallthumb', null, { origin: channel.url });
release.tags = query.all('a[href*="/keywords"]', true);
return release;
});
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
release.entryId = query.url('.updateThumb img', 'alt');
release.title = query.q('.updateInfo h5 a', true);
release.actors = query.all('.tour_update_models a', true);
release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
release.poster = query.img('.updateThumb img');
const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
});
}
function scrapeSceneBlog({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-');
release.title = query.text('h4 strong, .videos h3');
release.description = query.q('#about p, .videos p', true);
const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean);
if (actors.length > 0) {
release.actors = actors;
} else {
// release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean);
release.actors = [release.title];
}
release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true);
release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url });
release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo);
return release;
}
function scrapeScene({ query, html }, url, channel) {
const release = {};
release.title = query.q('.updatesBlock h2', true);
release.poster = query.meta('property="og:image"');
release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1];
const trailer = html.match(/src="(.+\.mp4)"/)?.[1];
if (trailer) {
release.trailer = {
src: `${channel.url}${trailer}`,
};
}
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
const bio = query.cnts('.info p').reduce((acc, info) => {
const [key, value] = info.match(/(\w+):\s*(.*)/).slice(1);
return { ...acc, [slugify(key, '_')]: value };
}, {});
profile.age = Number(bio.age);
profile.height = feetInchesToCm(bio.height);
profile.eyes = bio.eyes || bio.eyecolor;
if (bio.figure || bio.measurements) {
const [bust, cup, waist, hip] = (bio.figure || bio.measurements)?.match(/(\d+)(\w+)-(\d+)-(\d+)/).slice(1);
profile.bust = Number(bust);
profile.cup = cup;
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.avatar = query.img('img.main-preview', 'src', { origin: entity.url });
return profile;
}
async function fetchLatestBlog(channel, page) {
/*
const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
const url = `${channel.parameters?.latest || channel.url}/movies_${page}_d.html`;
const res = await qu.getAll(url, '.item-update');
*/
const scenes = await elevatedx.tubular.fetchLatest(channel, page);
console.log(scenes);
// return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
}
async function fetchLatest(channel, page = 1) {
/*
const url = `${channel.url}/categories/Movies_${page}_d.html`;
const res = await qu.getAll(url, '.bodyArea .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
*/
const scenes = await elevatedx.classic.fetchLatest(channel, page);
return scenes;
}
async function fetchUpcoming(channel) {
const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchProfile(baseActor, entity) {
const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');
if (modelsRes.ok) {
const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);
return Promise.all(models.map(async (model) => {
const modelUrl = model.query.url('a', 'href', { origin: entity.url });
const modelRes = await qu.get(modelUrl);
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}));
}
return modelsRes.status;
}
module.exports = {
fetchLatest: elevatedx.classic.fetchLatest,
fetchUpcoming,
fetchProfile,
scrapeScene,
blog: {
fetchLatest: elevatedx.tubular.fetchLatest,
scrapeScene: scrapeSceneBlog,
},
};

View File

@@ -18,7 +18,7 @@ const dorcel = require('./dorcel');
const elegantangel = require('./elegantangel');
const famedigital = require('./famedigital');
const firstanalquest = require('./firstanalquest');
const exploitedx = require('./exploitedx');
const elevatedx = require('./elevatedx');
const fullpornnetwork = require('./fullpornnetwork');
const gamma = require('./gamma');
const hitzefrei = require('./hitzefrei');
@@ -87,7 +87,7 @@ const scrapers = {
dorcel,
elegantangel,
famedigital,
exploitedx,
exploitedx: elevatedx,
firstanalquest,
forbondage: porndoe,
fullpornnetwork,
@@ -116,6 +116,7 @@ const scrapers = {
mikeadriano,
mindgeek,
naughtyamerica,
nebraskacoeds: elevatedx,
newsensations,
nubiles,
pascalssubsluts,
@@ -156,6 +157,7 @@ const scrapers = {
aziani,
babes: mindgeek,
babevr: badoink,
backroomcastingcouch: elevatedx,
baddaddypov: fullpornnetwork,
badoinkvr: badoink,
bamvisions,
@@ -163,6 +165,7 @@ const scrapers = {
bangbros,
blacked: vixen,
blackedraw: vixen,
blackambush: elevatedx,
blowpass,
boobpedia,
brattysis: nubiles,
@@ -179,7 +182,7 @@ const scrapers = {
dtfsluts: fullpornnetwork,
elegantangel,
evilangel: gamma,
exploitedcollegegirls: exploitedx,
exploitedcollegegirls: elevatedx,
eyeontheguy: hush,
fakehub: mindgeek,
firstanalquest,
@@ -218,6 +221,7 @@ const scrapers = {
mofos: mindgeek,
mugfucked: fullpornnetwork,
naughtyamerica,
nebraskacoeds: elevatedx,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,