Added deciated ExploitedX scraper.

2024-09-08 05:41:05 +02:00 · 2024-09-08 05:41:05 +02:00 · 7cb41c7c5d
parent 6b1fec4b30
commit 7cb41c7c5d
4 changed files with 161 additions and 25 deletions
--- a/seeds/02_sites.js
+++ b/seeds/02_sites.js
@ -3885,26 +3885,33 @@ const sites = [
 		alias: ['excogi', 'ecg'],
 		url: 'https://exploitedcollegegirls.com',
 		parent: 'exploitedx',
-		parameters: {
-			latest: 'https://exploitedcollegegirls.com/categories/movies_{page}_d.html',
-			profile: 'https://exploitedcollegegirls.com/models/{actorSlug}.html',
-		},
 	},
 	{
 		name: 'Backroom Casting Couch',
 		slug: 'backroomcastingcouch',
 		url: 'https://backroomcastingcouch.com',
 		parent: 'exploitedx',
-		parameters: {
-			latest: 'https://backroomcastingcouch.com/categories/movies_{page}_d.html',
-			profile: 'https://backroomcastingcouch.com/models/{actorSlug}.html',
-		},
 	},
 	{
-		name: 'Black Ambush',
-		slug: 'blackambush',
+		name: 'BBC Surprise',
+		slug: 'bbcsurprise',
+		rename: 'blackambush',
 		tags: ['bbc'],
-		url: 'https://blackambush.com',
+		url: 'https://bbcsurprise.com',
+		parent: 'exploitedx',
+	},
+	{
+		name: 'ExCoGi Girls',
+		slug: 'excogigirls',
+		tags: ['lesbian'],
+		url: 'https://excogigirls.com',
+		parent: 'exploitedx',
+	},
+	{
+		name: 'Hot MILFs Fuck',
+		slug: 'hotmilfsfuck',
+		tags: ['milf'],
+		url: 'https://hotmilfsfuck.com',
 		parent: 'exploitedx',
 	},
 	// FILTHY KINGS
--- a/src/scrapers/exploitedx.js
+++ b/src/scrapers/exploitedx.js
@ -0,0 +1,131 @@
+'use strict';
+
+const unprint = require('unprint');
+
+const slugify = require('../utils/slugify');
+const { convert } = require('../utils/convert');
+
+function scrapeAll(scenes) {
+	return scenes.map(({ query }) => {
+		const release = {};
+
+		release.url = query.url('.img-div a[href*="/trailers"], .content-div h4 a[href*="/trailers"]'); // empty anchor in markup for some reason
+		release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase();
+
+		release.title = query.content('.content-div h4 a[href]');
+
+		release.date = query.date('.more-info-div', 'MMM DD, YYYY');
+		release.duration = query.duration('.more-info-div');
+
+		release.photoCount = query.number('.more-info-div', { match: /(\d+) photos/i, matchIndex: 1 })
+			|| query.number('//i[contains(@class, "fa-camera")]//following-sibling::text()[1]');
+
+		const poster = query.img('.video_placeholder') || query.poster();
+
+		if (poster) {
+			release.poster = [
+				poster.replace('-1x', '-2x'),
+				poster.replace('-1x', '-3x'),
+				poster,
+				poster.replace('-1x', '-4x'), // too big, only use as fallback
+			];
+		}
+
+		release.teaser = query.video();
+
+		return release;
+	});
+}
+
+function scrapeScene({ query }, { url }) {
+	const release = {};
+
+	release.entryId = new URL(url).pathname.match(/\/trailers\/(.*)\.html/)[1].toLowerCase();
+
+	// ExGoGiGirls deviates most from the other sites
+	release.title = query.content('.video-player .section-title, #scene-info h1') || query.content('.bio-article .section-title'); // model-name class not on all sites
+	release.description = (query.content('.descriptionFull') || query.content('.description'))?.replace(/(read more)|(read less)/i, '').trim(); // querying text nodes breaks a lot of descriptions
+
+	release.date = query.date('//*[strong[contains(text(), "Released")]]', 'MMMM D, YYYY');
+	release.duration = query.duration('//*[strong[contains(text(), "Runtime")]]');
+	release.photoCount = query.number('//*[strong[contains(text(), "Runtime")]]', { match: /(\d+) photos/i, matchIndex: 1 });
+
+	release.actors = query.all('.models-list-thumbs li, [id="model bio"] .card').map((actorEl) => { // not all actors have links
+		const actorUrl = unprint.query.url(actorEl);
+
+		return {
+			name: unprint.query.content(actorEl, 'span, .model-name'),
+			url: actorUrl,
+			entryId: actorUrl && new URL(actorUrl).pathname.match(/\/models\/(.*)\.html/)?.[1].toLowerCase(),
+			avatar: [
+				unprint.query.img(actorEl, 'img', { attribute: 'src0_2x' }),
+				unprint.query.img(actorEl, 'img', { attribute: 'src0_1x' }),
+				unprint.query.img(actorEl, 'img', { attribute: 'src0_3x' }), // too big
+			],
+		};
+	});
+
+	release.tags = query.contents('.tags a[href]');
+
+	release.poster = query.img('.update_thumb', { attribute: 'src0_1x' });
+
+	return release;
+}
+
+function scrapeProfile({ query }, _entity) {
+	const profile = {};
+
+	const bio = Object.fromEntries(query.all('.detail-div p').map((detailEl) => [
+		slugify(unprint.query.content(detailEl, 'strong'), '_'),
+		unprint.query.text(detailEl),
+	]));
+
+	profile.age = Number(bio.age) || null;
+	profile.height = convert(bio.height, 'cm');
+	profile.measurements = bio.measurements;
+
+	profile.description = [
+		bio.favorite_position && `Favorite position: ${bio.favorite_position}`,
+		bio.likes && `Likes: ${bio.likes}`,
+	].filter(Boolean).join('\n');
+
+	profile.avatar = [
+		query.img('.model_bio_thumb', { attribute: 'src0_2x' }),
+		query.img('.model_bio_thumb', { attribute: 'src0_1x' }),
+		query.img('.model_bio_thumb', { attribute: 'src0_3x' }), // too big
+	];
+
+	return profile;
+}
+
+async function fetchLatest(channel, page = 1) {
+	const url = `${channel.url}/categories/movies_${page}_d.html`;
+	const res = await unprint.get(url, { selectAll: '.main-article .item-update' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
+async function fetchProfile({ url }, entity) {
+	if (!url) {
+		// ExploitedX has loads of performers with the same name, don't search for the name, only use known URLs
+		return null;
+	}
+
+	const res = await unprint.get(url);
+
+	if (res.ok) {
+		return scrapeProfile(res.context, entity);
+	}
+
+	return res.status;
+}
+
+module.exports = {
+	fetchLatest,
+	fetchProfile,
+	scrapeScene,
+};
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@ -21,6 +21,7 @@ const fabulouscash = require('./fabulouscash');
 const famedigital = require('./famedigital');
 const firstanalquest = require('./firstanalquest');
 const elevatedx = require('./elevatedx');
+const exploitedx = require('./exploitedx');
 const fullpornnetwork = require('./fullpornnetwork');
 const gamma = require('./gamma');
 const hitzefrei = require('./hitzefrei');
@ -107,7 +108,7 @@ const scrapers = {
 		dorcel,
 		elegantangel: adultempire,
 		famedigital,
-		exploitedx: elevatedx,
+		exploitedx,
 		fabulouscash,
 		firstanalquest,
 		forbondage: porndoe,
@ -198,7 +199,6 @@ const scrapers = {
 		asiam: modelmedia,
 		babes: aylo,
 		babevr: badoink,
-		backroomcastingcouch: elevatedx,
 		baddaddypov: fullpornnetwork,
 		badoinkvr: badoink,
 		bamvisions,
@ -207,7 +207,6 @@ const scrapers = {
 		bjraw: radical,
 		blacked: vixen,
 		blackedraw: vixen,
-		blackambush: elevatedx,
 		bluedonkeymedia,
 		delphine: modelmedia,
 		meidenvanholland: bluedonkeymedia,
@ -228,7 +227,7 @@ const scrapers = {
 		doubleviewcasting: firstanalquest,
 		dtfsluts: fullpornnetwork,
 		evilangel: gamma,
-		exploitedcollegegirls: elevatedx,
+		exploitedx, // only from known URL that will specify site
 		eyeontheguy: hush,
 		fakehub: aylo,
 		firstanalquest,
--- a/src/scrapers/template.js
+++ b/src/scrapers/template.js
@ -24,10 +24,8 @@ function scrapeAll(scenes) {
 		release.poster = query.img('img.poster');
 		release.teaser = query.video('.teaser video');

-		release.stars = query.number('.rating');
-		release.likes = query.number('.likes');
-
 		console.log(release);
+
 		return release;
 	});
 }
@ -40,14 +38,18 @@ function scrapeScene({ query }, { url }) {
 	release.title = query.content('h3.title');
 	release.description = query.content('p.description');

+	release.date = query.date('.date', 'MMMM D, YYYY');
+	release.duration = query.duration('.duration');
+
 	[release.poster, ...release.photos] = query.imgs('.preview-thumb');
 	release.trailer = query.video('.trailer video');

 	console.log(release);
+
 	return release;
 }

-function scrapeProfile({ query }, actorName, entity, include) {
+function scrapeProfile({ query }) {
 	const profile = {};

 	profile.description = query.content('.bio-text');
@ -55,11 +57,8 @@ function scrapeProfile({ query }, actorName, entity, include) {

 	profile.avatar = query.img('.actor-photo img');

-	if (include.releases) {
-		return scrapeAll(unprint.initAll(query.all('.scene')));
-	}
-
 	console.log(profile);
+
 	return profile;
 }

@ -74,12 +73,12 @@ async function fetchLatest(channel, page = 1) {
 	return res.status;
 }

-async function fetchProfile({ name: actorName }, entity, include) {
+async function fetchProfile({ name: actorName }, entity) {
 	const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
 	const res = await unprint.get(url);

 	if (res.ok) {
-		return scrapeProfile(res.context, actorName, entity, include);
+		return scrapeProfile(res.context, entity);
 	}

 	return res.status;