From 7cb41c7c5d2e61c0f83c522d6eebb334cbec6b54 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 8 Sep 2024 05:41:05 +0200 Subject: [PATCH] Added deciated ExploitedX scraper. --- seeds/02_sites.js | 29 ++++---- src/scrapers/exploitedx.js | 131 +++++++++++++++++++++++++++++++++++++ src/scrapers/scrapers.js | 7 +- src/scrapers/template.js | 19 +++--- 4 files changed, 161 insertions(+), 25 deletions(-) create mode 100755 src/scrapers/exploitedx.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 8f6c6993..9e5fc3bc 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -3885,26 +3885,33 @@ const sites = [ alias: ['excogi', 'ecg'], url: 'https://exploitedcollegegirls.com', parent: 'exploitedx', - parameters: { - latest: 'https://exploitedcollegegirls.com/categories/movies_{page}_d.html', - profile: 'https://exploitedcollegegirls.com/models/{actorSlug}.html', - }, }, { name: 'Backroom Casting Couch', slug: 'backroomcastingcouch', url: 'https://backroomcastingcouch.com', parent: 'exploitedx', - parameters: { - latest: 'https://backroomcastingcouch.com/categories/movies_{page}_d.html', - profile: 'https://backroomcastingcouch.com/models/{actorSlug}.html', - }, }, { - name: 'Black Ambush', - slug: 'blackambush', + name: 'BBC Surprise', + slug: 'bbcsurprise', + rename: 'blackambush', tags: ['bbc'], - url: 'https://blackambush.com', + url: 'https://bbcsurprise.com', + parent: 'exploitedx', + }, + { + name: 'ExCoGi Girls', + slug: 'excogigirls', + tags: ['lesbian'], + url: 'https://excogigirls.com', + parent: 'exploitedx', + }, + { + name: 'Hot MILFs Fuck', + slug: 'hotmilfsfuck', + tags: ['milf'], + url: 'https://hotmilfsfuck.com', parent: 'exploitedx', }, // FILTHY KINGS diff --git a/src/scrapers/exploitedx.js b/src/scrapers/exploitedx.js new file mode 100755 index 00000000..5945c5ff --- /dev/null +++ b/src/scrapers/exploitedx.js @@ -0,0 +1,131 @@ +'use strict'; + +const unprint = require('unprint'); + +const slugify = require('../utils/slugify'); +const { convert } = require('../utils/convert'); + +function scrapeAll(scenes) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('.img-div a[href*="/trailers"], .content-div h4 a[href*="/trailers"]'); // empty anchor in markup for some reason + release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase(); + + release.title = query.content('.content-div h4 a[href]'); + + release.date = query.date('.more-info-div', 'MMM DD, YYYY'); + release.duration = query.duration('.more-info-div'); + + release.photoCount = query.number('.more-info-div', { match: /(\d+) photos/i, matchIndex: 1 }) + || query.number('//i[contains(@class, "fa-camera")]//following-sibling::text()[1]'); + + const poster = query.img('.video_placeholder') || query.poster(); + + if (poster) { + release.poster = [ + poster.replace('-1x', '-2x'), + poster.replace('-1x', '-3x'), + poster, + poster.replace('-1x', '-4x'), // too big, only use as fallback + ]; + } + + release.teaser = query.video(); + + return release; + }); +} + +function scrapeScene({ query }, { url }) { + const release = {}; + + release.entryId = new URL(url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase(); + + // ExGoGiGirls deviates most from the other sites + release.title = query.content('.video-player .section-title, #scene-info h1') || query.content('.bio-article .section-title'); // model-name class not on all sites + release.description = (query.content('.descriptionFull') || query.content('.description'))?.replace(/(read more)|(read less)/i, '').trim(); // querying text nodes breaks a lot of descriptions + + release.date = query.date('//*[strong[contains(text(), "Released")]]', 'MMMM D, YYYY'); + release.duration = query.duration('//*[strong[contains(text(), "Runtime")]]'); + release.photoCount = query.number('//*[strong[contains(text(), "Runtime")]]', { match: /(\d+) photos/i, matchIndex: 1 }); + + release.actors = query.all('.models-list-thumbs li, [id="model bio"] .card').map((actorEl) => { // not all actors have links + const actorUrl = unprint.query.url(actorEl); + + return { + name: unprint.query.content(actorEl, 'span, .model-name'), + url: actorUrl, + entryId: actorUrl && new URL(actorUrl).pathname.match(/\/models\/(.*)\.html/)?.[1].toLowerCase(), + avatar: [ + unprint.query.img(actorEl, 'img', { attribute: 'src0_2x' }), + unprint.query.img(actorEl, 'img', { attribute: 'src0_1x' }), + unprint.query.img(actorEl, 'img', { attribute: 'src0_3x' }), // too big + ], + }; + }); + + release.tags = query.contents('.tags a[href]'); + + release.poster = query.img('.update_thumb', { attribute: 'src0_1x' }); + + return release; +} + +function scrapeProfile({ query }, _entity) { + const profile = {}; + + const bio = Object.fromEntries(query.all('.detail-div p').map((detailEl) => [ + slugify(unprint.query.content(detailEl, 'strong'), '_'), + unprint.query.text(detailEl), + ])); + + profile.age = Number(bio.age) || null; + profile.height = convert(bio.height, 'cm'); + profile.measurements = bio.measurements; + + profile.description = [ + bio.favorite_position && `Favorite position: ${bio.favorite_position}`, + bio.likes && `Likes: ${bio.likes}`, + ].filter(Boolean).join('\n'); + + profile.avatar = [ + query.img('.model_bio_thumb', { attribute: 'src0_2x' }), + query.img('.model_bio_thumb', { attribute: 'src0_1x' }), + query.img('.model_bio_thumb', { attribute: 'src0_3x' }), // too big + ]; + + return profile; +} + +async function fetchLatest(channel, page = 1) { + const url = `${channel.url}/categories/movies_${page}_d.html`; + const res = await unprint.get(url, { selectAll: '.main-article .item-update' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +async function fetchProfile({ url }, entity) { + if (!url) { + // ExploitedX has loads of performers with the same name, don't search for the name, only use known URLs + return null; + } + + const res = await unprint.get(url); + + if (res.ok) { + return scrapeProfile(res.context, entity); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchProfile, + scrapeScene, +}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index e6bc2666..4466ffb8 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -21,6 +21,7 @@ const fabulouscash = require('./fabulouscash'); const famedigital = require('./famedigital'); const firstanalquest = require('./firstanalquest'); const elevatedx = require('./elevatedx'); +const exploitedx = require('./exploitedx'); const fullpornnetwork = require('./fullpornnetwork'); const gamma = require('./gamma'); const hitzefrei = require('./hitzefrei'); @@ -107,7 +108,7 @@ const scrapers = { dorcel, elegantangel: adultempire, famedigital, - exploitedx: elevatedx, + exploitedx, fabulouscash, firstanalquest, forbondage: porndoe, @@ -198,7 +199,6 @@ const scrapers = { asiam: modelmedia, babes: aylo, babevr: badoink, - backroomcastingcouch: elevatedx, baddaddypov: fullpornnetwork, badoinkvr: badoink, bamvisions, @@ -207,7 +207,6 @@ const scrapers = { bjraw: radical, blacked: vixen, blackedraw: vixen, - blackambush: elevatedx, bluedonkeymedia, delphine: modelmedia, meidenvanholland: bluedonkeymedia, @@ -228,7 +227,7 @@ const scrapers = { doubleviewcasting: firstanalquest, dtfsluts: fullpornnetwork, evilangel: gamma, - exploitedcollegegirls: elevatedx, + exploitedx, // only from known URL that will specify site eyeontheguy: hush, fakehub: aylo, firstanalquest, diff --git a/src/scrapers/template.js b/src/scrapers/template.js index e47c65f4..33c57c36 100755 --- a/src/scrapers/template.js +++ b/src/scrapers/template.js @@ -24,10 +24,8 @@ function scrapeAll(scenes) { release.poster = query.img('img.poster'); release.teaser = query.video('.teaser video'); - release.stars = query.number('.rating'); - release.likes = query.number('.likes'); - console.log(release); + return release; }); } @@ -40,14 +38,18 @@ function scrapeScene({ query }, { url }) { release.title = query.content('h3.title'); release.description = query.content('p.description'); + release.date = query.date('.date', 'MMMM D, YYYY'); + release.duration = query.duration('.duration'); + [release.poster, ...release.photos] = query.imgs('.preview-thumb'); release.trailer = query.video('.trailer video'); console.log(release); + return release; } -function scrapeProfile({ query }, actorName, entity, include) { +function scrapeProfile({ query }) { const profile = {}; profile.description = query.content('.bio-text'); @@ -55,11 +57,8 @@ function scrapeProfile({ query }, actorName, entity, include) { profile.avatar = query.img('.actor-photo img'); - if (include.releases) { - return scrapeAll(unprint.initAll(query.all('.scene'))); - } - console.log(profile); + return profile; } @@ -74,12 +73,12 @@ async function fetchLatest(channel, page = 1) { return res.status; } -async function fetchProfile({ name: actorName }, entity, include) { +async function fetchProfile({ name: actorName }, entity) { const url = `${entity.url}/actors/${slugify(actorName, '_')}`; const res = await unprint.get(url); if (res.ok) { - return scrapeProfile(res.context, actorName, entity, include); + return scrapeProfile(res.context, entity); } return res.status;