Added generic ElevatedX scraper. Changed FCUK to ExploitedX network. Testing ElevatedX scraper with ExploitedX network.

2021-02-13 04:49:00 +01:00
parent 4c306effb7
commit f79505f3f6
74 changed files with 158 additions and 44 deletions
--- a/src/media.js
+++ b/src/media.js
@@ -435,6 +435,10 @@ async function storeFile(media, options) {
 			return storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options);
 		}

+		if (['posters', 'photos', 'covers'].includes(media.role)) {
+			throw new Error(`Media for '${media.role}' must be an image, but '${media.meta.mimetype}' was detected`);
+		}
+
 		const [stat] = await Promise.all([
 			fsPromises.stat(media.file.path),
 			fsPromises.mkdir(path.join(config.media.path, filedir), { recursive: true }),
--- a/src/scrapers/elevatedx.js
+++ b/src/scrapers/elevatedx.js
@@ -0,0 +1,112 @@
+'use strict';
+
+const format = require('template-format');
+
+const qu = require('../utils/q');
+const slugify = require('../utils/slugify');
+
+function deriveEntryId(release) {
+	if (release.date && release.url) {
+		const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1];
+
+		return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`;
+	}
+
+	if (release.date && release.title) {
+		return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
+	}
+
+	return null;
+}
+
+function scrapeAllClassic(scenes, channel) {
+	return scenes.map(({ query }) => {
+		const release = {};
+
+		release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])');
+		release.entryId = query.url('.updateThumb img', 'alt');
+
+		release.title = query.cnt('.updateInfo h5 a');
+
+		release.actors = query.cnts('.tour_update_models a');
+		release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY');
+
+		release.poster = query.img('.updateThumb img');
+
+		const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1];
+
+		if (trailer) {
+			release.trailer = `${channel.url}${trailer}`;
+		}
+
+		return release;
+	});
+}
+
+function scrapeAllTubular(scenes, channel, accNetworkReleases) {
+	return scenes.map(({ query }) => {
+		const release = {};
+
+		release.title = query.q('h4 a', 'title') || query.q('h4 a', true);
+		release.url = query.url('h4 a');
+
+		release.date = query.date('.more-info-div', 'MMM D, YYYY');
+		release.duration = query.dur('.more-info-div');
+
+		const posterPath = query.q('.img-div img', 'src0_1x') || query.img('img.video_placeholder');
+
+		if (posterPath) {
+			const poster = /^http/.test(posterPath) ? posterPath : `${channel.parameters?.media || channel.url}${posterPath}`;
+
+			release.poster = [
+				poster.replace('-1x', '-3x'),
+				poster.replace('-1x', '-2x'),
+				poster,
+			];
+		}
+
+		release.teaser = query.video();
+
+		// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
+		release.entryId = deriveEntryId(release);
+
+		if (channel.parameters?.accFilter && accNetworkReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
+			// filter out releases that were already scraped from a categorized site, requeryires sequeryential site scraping
+			return null;
+		}
+
+		return release;
+	});
+}
+
+async function fetchLatest(site, page = 1, options, preData, allScraper) {
+	const url = (site.parameters?.latest && format(site.parameters.latest, { page }))
+        || `${site.url}/categories/movies_${page}_d.html`;
+
+	const res = await qu.getAll(url, '.modelfeature, .item-video, .bodyArea .updateItem');
+
+	if (!res.ok) {
+		return res.status;
+	}
+
+	return allScraper(res.items, site, preData?.uniqueReleases);
+}
+
+async function fetchLatestClassic(channel, page, options, preData) {
+	return fetchLatest(channel, page, options, preData, scrapeAllClassic);
+}
+
+async function fetchLatestTubular(channel, page, options, preData) {
+	return fetchLatest(channel, page, options, preData, scrapeAllTubular);
+}
+
+module.exports = {
+	classic: {
+		fetchLatest: fetchLatestClassic,
+		scrapeAll: scrapeAllClassic,
+	},
+	tubular: {
+		fetchLatest: fetchLatestTubular,
+		scrapeAll: scrapeAllTubular,
+	},
+};
--- a/src/scrapers/exploitedx.js
+++ b/src/scrapers/exploitedx.js
@@ -4,6 +4,8 @@ const qu = require('../utils/qu');
 const slugify = require('../utils/slugify');
 const { feetInchesToCm } = require('../utils/convert');

+const elevatedx = require('./elevatedx');
+
 function scrapeLatestBlog(scenes, channel) {
 	return scenes.map(({ query }) => {
 		const release = {};
@@ -134,52 +136,41 @@ function scrapeProfile({ query }, entity) {
 }

 async function fetchLatestBlog(channel, page) {
+	/*
 	const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`;
-	const res = await qu.getAll(url, '.videos');
+	const url = `${channel.parameters?.latest || channel.url}/movies_${page}_d.html`;
+	const res = await qu.getAll(url, '.item-update');
+	*/

-	return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
+	const scenes = await elevatedx.tubular.fetchLatest(channel, page);
+
+	console.log(scenes);
+
+	// return res.ok ? scrapeLatestBlog(res.items, channel) : res.status;
 }

 async function fetchLatest(channel, page = 1) {
-	if (channel.parameters?.blog) {
-		return fetchLatestBlog(channel, page);
-	}
-
+	/*
 	const url = `${channel.url}/categories/Movies_${page}_d.html`;
 	const res = await qu.getAll(url, '.bodyArea .updateItem');

 	return res.ok ? scrapeAll(res.items, channel) : res.status;
+	*/
+
+	const scenes = await elevatedx.classic.fetchLatest(channel, page);
+
+	return scenes;
 }

 async function fetchUpcoming(channel) {
-	if (channel.parameters?.blog) {
-		return [];
-	}
-
 	const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem');

 	return res.ok ? scrapeAll(res.items, channel) : res.status;
 }

-async function fetchScene(url, channel) {
-	const res = await qu.get(url);
-
-	if (res.ok) {
-		if (channel.parameters?.blog) {
-			return scrapeSceneBlog(res.item, url, channel);
-		}
-
-		return scrapeScene(res.item, url, channel);
-	}
-
-	return res.status;
-}
-
 async function fetchProfile(baseActor, entity) {
 	const modelsRes = await qu.getAll(`${entity.url}/free/girls.php?alpha=${baseActor.name.slice(0, 1)}`, '.model');

-	console.log(baseActor);
-
 	if (modelsRes.ok) {
 		const models = modelsRes.items.filter(({ query }) => query.cnt('strong') === baseActor.name);

@@ -199,8 +190,12 @@ async function fetchProfile(baseActor, entity) {
 }

 module.exports = {
-	fetchLatest,
-	fetchScene,
+	fetchLatest: elevatedx.classic.fetchLatest,
 	fetchUpcoming,
 	fetchProfile,
+	scrapeScene,
+	blog: {
+		fetchLatest: elevatedx.tubular.fetchLatest,
+		scrapeScene: scrapeSceneBlog,
+	},
 };
--- a/src/scrapers/hush.js
+++ b/src/scrapers/hush.js
@@ -368,4 +368,5 @@ module.exports = {
 	fetchLatest,
 	fetchScene,
 	fetchProfile,
+	scrapeAllT1,
 };
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@@ -18,7 +18,7 @@ const dorcel = require('./dorcel');
 const elegantangel = require('./elegantangel');
 const famedigital = require('./famedigital');
 const firstanalquest = require('./firstanalquest');
-const fcuk = require('./fcuk');
+const exploitedx = require('./exploitedx');
 const fullpornnetwork = require('./fullpornnetwork');
 const gamma = require('./gamma');
 const hitzefrei = require('./hitzefrei');
@@ -87,7 +87,7 @@ const scrapers = {
 		dorcel,
 		elegantangel,
 		famedigital,
-		fcuk,
+		exploitedx,
 		firstanalquest,
 		forbondage: porndoe,
 		fullpornnetwork,
@@ -179,7 +179,7 @@ const scrapers = {
 		dtfsluts: fullpornnetwork,
 		elegantangel,
 		evilangel: gamma,
-		exploitedcollegegirls: fcuk,
+		exploitedcollegegirls: exploitedx,
 		eyeontheguy: hush,
 		fakehub: mindgeek,
 		firstanalquest,