Separated Filthy Kings into its channels, upgraded Gamma scraper to accomodate.

2023-07-09 04:35:30 +02:00
parent c51577098a
commit 88a56794aa
58 changed files with 152 additions and 120 deletions
--- a/src/scrapers/gamma.js
+++ b/src/scrapers/gamma.js
@@ -143,8 +143,8 @@ async function getPhotos(albumPath, site, includeThumbnails = true) {
 	}
 }

-async function getFullPhotos(entryId, site) {
-	const res = await http.get(`${site.url}/media/signPhotoset/${entryId}`, {
+async function getFullPhotos(entryId, site, parameters) {
+	const res = await http.get(`${parameters.album || site.url}/media/signPhotoset/${entryId}`, {
 		headers: {
 			'X-Requested-With': 'XMLHttpRequest',
 		},
@@ -193,8 +193,8 @@ async function getThumbs(entryId, site, parameters) {

 async function getPhotosApi(entryId, site, parameters) {
 	const [photos, thumbs] = await Promise.all([
-		getFullPhotos(entryId, site, parameters),
-		getThumbs(entryId, site, parameters),
+		getFullPhotos(entryId, site, parameters).catch(() => { logger.error(`Gamma scraper failed to fetch photos for ${entryId}`); return []; }),
+		getThumbs(entryId, site, parameters).catch(() => { logger.error(`Gamma scraper failed to fetch photos for ${entryId}`); return []; }),
 	]);

 	return photos.concat(thumbs.slice(photos.length));
@@ -217,14 +217,14 @@ function curateTitle(title, channel) {
 	return title.replace(new RegExp(`^\\s*${channel.name}\\s*[:|-]\\s`, 'i'), '');
 }

-async function scrapeApiReleases(json, site) {
-	return json.map((scene) => {
-		if (site.parameters?.extract && scene.sitename !== site.parameters.extract) {
-			return null;
+async function scrapeApiReleases(json, site, options) {
+	return json.reduce((acc, scene) => {
+		if (options.parameters?.extract && scene.sitename !== options.parameters.extract) {
+			return acc;
 		}

-		if (site.parameters?.filterExclusive && scene.availableOnSite.length > 1) {
-			return null;
+		if (options.parameters?.filterExclusive && scene.availableOnSite.length > 1) {
+			return acc;
 		}

 		const release = {
@@ -238,8 +238,11 @@ async function scrapeApiReleases(json, site) {
 		release.title = curateTitle(scene.title, site);
 		release.path = `/${scene.url_title}/${release.entryId}`;

-		if (site.parameters?.scene) release.url = `${site.parameters.scene}${release.path}`;
-		else if (site.url && site.parameters?.scene !== false) release.url = `${site.url}/en/video${release.path}`;
+		if (options.parameters?.scene) {
+			release.url = `${options.parameters.scene}${release.path}`;
+		} else if (site.url && options.parameters?.scene !== false) {
+			release.url = `${site.url}/en/video${release.path}`;
+		}

 		release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate();
 		release.director = scene.directors[0]?.name || null;
@@ -276,10 +279,24 @@ async function scrapeApiReleases(json, site) {
 			];
 		}

+		if (options.parameters.filterNetwork && scene.mainChannel) {
+			return {
+				...acc,
+				unextracted: acc.unextracted.concat(release),
+			};
+		}
+
+		release.channel = slugify(scene.mainChannel?.id || scene.sitename, ''); // remove -
 		// release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;

-		return release;
-	}).filter(Boolean);
+		return {
+			...acc,
+			scenes: acc.scenes.concat(release),
+		};
+	}, {
+		scenes: [],
+		unextracted: [],
+	});
 }

 function scrapeAll(scenes, site, networkUrl, hasTeaser = true) {
@@ -476,7 +493,7 @@ async function scrapeReleaseApi(data, site, options, movieScenes) {
 		release.scenes = await Promise.all(movieScenes.map((movieScene) => scrapeReleaseApi(movieScene, site, options)));
 	}

-	release.channel = data.sitename;
+	release.channel = slugify(data.mainChannel?.id || data.sitename, ''); // remove -
 	release.qualities = data.download_sizes;

 	return release;
@@ -638,11 +655,15 @@ async function fetchLatestApi(site, page = 1, options, preData, upcoming = false
 	const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
 	const { apiUrl } = await fetchApiCredentials(referer, site);

+	const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel
+		? `&filters=channels.id:${options.parameters.queryChannel === true ? site.slug : options.parameters.queryChannel}`
+		: `&filters=sitename:${site.slug}`}`;
+
 	const res = await http.post(apiUrl, {
 		requests: [
 			{
 				indexName: 'all_scenes',
-				params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]&filters=sitename:${site.slug}`, // OR channels.id:${site.slug}`,
+				params,
 			},
 		],
 	}, {
@@ -654,7 +675,7 @@ async function fetchLatestApi(site, page = 1, options, preData, upcoming = false
 	});

 	if (res.status === 200 && res.body.results?.[0]?.hits) {
-		return scrapeApiReleases(res.body.results[0].hits, site);
+		return scrapeApiReleases(res.body.results[0].hits, site, options);
 	}

 	return res.status;
--- a/src/scrapers/julesjordan.js
+++ b/src/scrapers/julesjordan.js
@@ -2,12 +2,10 @@

 const util = require('util');
 const Promise = require('bluebird');
-const cheerio = require('cheerio');
-const moment = require('moment');
 const unprint = require('unprint');

+const argv = require('../argv');
 const qu = require('../utils/qu');
-const http = require('../utils/http');
 const { heightToCm } = require('../utils/convert');
 const slugify = require('../utils/slugify');

@@ -90,44 +88,23 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
 	});
 }

-function scrapeUpcoming(html, site) {
-	const $ = cheerio.load(html, { normalizeWhitespace: true });
-	const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
-
-	return scenesElements.map((element) => {
+function scrapeUpcoming(scenes, channel) {
+	return scenes.map(({ query, html }) => {
 		const release = {};

-		release.entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
+		release.title = query.text('.overlay-text', { join: false })?.[0];
+		release.date = query.date('.overlay-text', 'MM/DD/YYYY');

-		const details = $(element).find('.update_details_comingsoon')
-			.eq(1)
-			.children()
-			.remove();
+		release.actors = query.all('.update_models a').map((actorEl) => ({
+			name: unprint.query.content(actorEl),
+			url: unprint.query.url(actorEl, null),
+		}));

-		release.title = details
-			.end()
-			.text()
-			.trim();
+		release.poster = query.img('img') || query.img('img', { attribute: 'src0_1x' });

-		release.actors = details
-			.text()
-			.trim()
-			.split(', ');
+		release.entryId = channel.parameters?.entryIdFromTitle ? slugify(release.title) : getEntryId(html);

-		release.date = moment
-			.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
-			.toDate();
-
-		const photoElement = $(element).find('a img.thumbs');
-		const posterPath = photoElement.attr('src');
-		release.poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
-
-		const videoClass = $(element).find('.update_thumbnail div').attr('class');
-		const videoScript = $(element).find(`script:contains(${videoClass})`).html();
-
-		if (videoScript) {
-			release.teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
-		}
+		// TODO: teaser

 		return release;
 	});
@@ -230,8 +207,11 @@ async function scrapeScene({ html, query }, context) {
 	}

 	// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
-	// release.photos = query.imgs('#images img');
-	release.photos = getPhotos(query, release, context);
+	if (argv.jjFullPhotos) {
+		release.photos = getPhotos(query, release, context);
+	} else {
+		release.photos = query.imgs('#images img');
+	}

 	if (query.exists('.update_dvds a')) {
 		release.movie = {
@@ -271,55 +251,6 @@ function scrapeMovie({ el, query }, url, site) {
 	};
 }

-/*
-function scrapeProfile(html, url, actorName, entity) {
-	const bio = document.querySelector('.model_bio').textContent;
-	const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb');
-
-	const profile = {
-		name: actorName,
-	};
-
-	const heightString = bio.match(/\d+ feet \d+ inches/);
-	const ageString = bio.match(/Age:\s*(\d{2})/);
-	const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
-	const measurementsString = bio.match(/\w+-\d+-\d+/);
-
-	if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
-	if (ageString) profile.age = Number(ageString[1]);
-
-	if (heightString) profile.height = heightToCm(heightString[0]);
-
-	if (measurementsString) {
-		const [bust, waist, hip] = measurementsString[0].split('-');
-
-		if (bust) profile.bust = bust;
-		if (waist) profile.waist = Number(waist);
-		if (hip) profile.hip = Number(hip);
-	}
-
-	if (avatarEl) {
-		const avatarSources = [
-			avatarEl.getAttribute('src0_3x'),
-			avatarEl.getAttribute('src0_2x'),
-			avatarEl.getAttribute('src0_1x'),
-			avatarEl.getAttribute('src0'),
-			avatarEl.getAttribute('src'),
-		]
-			.filter((avatar) => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
-			.map((avatar) => qu.prefixUrl(avatar, entity.url));
-
-		if (avatarSources.length) profile.avatar = avatarSources;
-	}
-
-	profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), (el) => el.href);
-
-	console.log(profile);
-
-	return profile;
-}
-*/
-
 function scrapeProfile({ query }, url, name, entity) {
 	const profile = { url };

@@ -368,13 +299,13 @@ async function fetchUpcoming(site) {
 	if (site.parameters?.upcoming === false) return null;

 	const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
-	const res = await http.get(url);
+	const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/parent::div' });

-	if (res.statusCode === 200) {
-		return scrapeUpcoming(res.body.toString(), site);
+	if (res.ok) {
+		return scrapeUpcoming(res.context, site);
 	}

-	return res.statusCode;
+	return res.status;
 }

 async function fetchMovie(url, site) {