Added Sex Like Real. Added separate missing date limit for upcoming updates.

2024-09-04 02:40:52 +02:00 · 2024-09-04 02:40:52 +02:00 · 51bd1bff14
parent 6cc872821c
commit 51bd1bff14
7 changed files with 224 additions and 1 deletions
--- a/config/default.js
+++ b/config/default.js
@ -380,6 +380,7 @@ module.exports = {
 	},
 	fetchAfter: [1, 'week'],
 	missingDateLimit: 3,
+	upcomingMissingDateLimit: 20,
 	memorySampling: {
 		enabled: false,
 		sampleDuration: 300000, // 5 minutes
--- a/seeds/00_tags.js
+++ b/seeds/00_tags.js
@ -1269,6 +1269,26 @@ const tags = [
 		implies: ['anal'],
 		description: 'Ass is the new pussy! Zero vaginal sex, anal only, guaranteed.',
 	},
+	{
+		name: 'fisheye',
+		slug: 'fisheye',
+	},
+	{
+		name: 'passthrough',
+		slug: 'passthrough',
+		description: 'Passthrough is a feature on VR headsets that allows you to see your real environment behind the VR content.',
+	},
+	{
+		name: 'AI passthrough',
+		slug: 'ai-passthrough',
+		description: '[Passthrough](/tag/passthrough) VR that is filmed with a regular background that is removed by AI, rather than traditional chroma keying (green screen).',
+		implies: ['passthrough'],
+	},
+	{
+		name: 'scripts',
+		slug: 'scripts',
+		description: 'Scripts for haptic sex toys.',
+	},
 ];

 const aliases = [
@ -1411,6 +1431,14 @@ const aliases = [
 		name: 'boob fucking',
 		for: 'titty-fucking',
 	},
+	{
+		name: 'tits fucking',
+		for: 'titty-fucking',
+	},
+	{
+		name: 'titfucking',
+		for: 'titty-fucking',
+	},
 	{
 		name: 'bts',
 		for: 'bts',
@ -2603,6 +2631,14 @@ const aliases = [
 		name: 'anal only',
 		for: 'only-anal',
 	},
+	{
+		name: '200°',
+		for: 'fisheye',
+	},
+	{
+		name: 'sex toy scripts',
+		for: 'scripts',
+	},
 ];

 const priorities = [ // higher index is higher priority
--- a/seeds/02_sites.js
+++ b/seeds/02_sites.js
@ -10651,6 +10651,13 @@ const sites = [
 		url: 'https://www.milfbundle.com/yourwifemymeat',
 		parent: 'score',
 	},
+	// SEX LIKE REAL
+	{
+		name: 'Sex Like Real',
+		slug: 'sexlikereal',
+		url: 'https://www.sexlikereal.com',
+		tags: ['vr'],
+	},
 	// SEXY HUB
 	{
 		slug: 'danejones',
--- a/src/argv.js
+++ b/src/argv.js
@ -168,6 +168,12 @@ const { argv } = yargs
 		default: config.missingDateLimit,
 		alias: ['null-date-limit'],
 	})
+	.option('upcoming-missing-date-limit', {
+		describe: 'Limit amount of scenes when dates are missing.',
+		type: 'number',
+		default: config.upcomingMissingDateLimit,
+		alias: ['upcoming-null-date-limit'],
+	})
 	.option('page', {
 		describe: 'Page to start scraping at',
 		type: 'number',
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@ -60,6 +60,7 @@ const privateNetwork = require('./private'); // reserved keyword
 const purgatoryx = require('./purgatoryx');
 const radical = require('./radical');
 const rickysroom = require('./rickysroom');
+const sexlikereal = require('./sexlikereal');
 const score = require('./score');
 const spizoo = require('./spizoo');
 const teamskeet = require('./teamskeet');
@ -159,6 +160,7 @@ const scrapers = {
 		rickysroom,
 		sayuncle: teamskeet,
 		score,
+		sexlikereal,
 		sexyhub: aylo,
 		spizoo,
 		swallowsalon: julesjordan,
@ -304,6 +306,7 @@ const scrapers = {
 		sayuncle: teamskeet,
 		score,
 		seehimfuck: hush,
+		sexlikereal,
 		sexyhub: aylo,
 		silverstonedvd: famedigital,
 		silviasaint: famedigital,
--- a/src/scrapers/sexlikereal.js
+++ b/src/scrapers/sexlikereal.js
@ -0,0 +1,170 @@
+'use strict';
+
+const unprint = require('unprint');
+
+function scrapeAll(scenes, channel) {
+	return scenes.map(({ query }) => {
+		const release = {};
+		const data = query.json('script[type="application/ld+json"]');
+
+		release.url = unprint.prefixUrl(data?.url, channel.url) || query.url('article a[href*="/scenes"]');
+		release.entryId = query.attribute(null, 'data-scene-id');
+
+		release.title = data?.name || query.content('.c-grid-item-footer-title');
+		release.description = data?.description;
+
+		release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD');
+		release.duration = query.duration('.c-grid-ratio-bottom');
+
+		release.tags = [
+			query.exists('.c-grid-badge--fisheye-bg') && 'fisheye',
+			query.exists('.c-grid-badge--fleshlight-badge-multi') && 'scripts',
+			query.exists('.c-grid-badge--passthrough') && 'passthrough',
+			query.exists('.c-grid-badge--passthrough-ai') && 'ai-passthrough',
+		].filter(Boolean);
+
+		const poster = query.img('img[data-qa="grid-item-photo-img"]', { attribute: 'data-srcset' });
+
+		if (poster) {
+			release.poster = [
+				poster.replace('-app.', '-desktop.'),
+				poster,
+			];
+		}
+
+		release.teaser = query.video('img[data-qa="grid-item-photo-img"]', { attribute: 'data-videosrc' });
+
+		return release;
+	});
+}
+
+function scrapeScene({ query, window }, { url, entity }) {
+	const release = {};
+	const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "VideoObject")]');
+	const videoData = window.vrPlayerSettings?.videoData;
+
+	release.entryId = videoData?.id || (data?.url || new URL(url).pathname).split('-').at(-1);
+
+	release.title = videoData?.title || data?.name || query.content('h1[data-qa="scene-title"]');
+	release.description = query.content('div[data-qa="scene-about-tab-text"]'); // data text is cut off
+
+	release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('time[data-qa="page-scene-studio-date"]', 'YYYY-MM-DD', { attribute: 'datetime' });
+	// release.duration = unprint.extractTimestamp(data?.duration); // video duration data seems to be missing hours, regularly leading to wrong numbers; rely on front page duration
+
+	release.actors = query.all('div[data-qa="scene-model-list-item"]').map((actorEl) => {
+		const avatar = unprint.query.content(actorEl, 'img[data-qa="scene-model-list-item-photo-img"]', { attribute: 'data-src' });
+
+		return {
+			name: unprint.query.content(actorEl, 'a[data-qa="scene-model-list-item-name"]'),
+			url: unprint.query.url(actorEl, 'a[data-qa="scene-model-list-item-photo-link-to-profile"], a[data-qa="scene-model-list-item-name"]', { origin: entity.url }),
+			avatar: [
+				avatar?.replace('-small.', '.'),
+				avatar,
+			],
+		};
+	}) || data?.actor.map((actor) => actor.name); // prefer html actors for url and avatar
+
+	release.tags = query.contents('a[data-qa="scene-tags-list-item-link"]');
+
+	const fallbackPoster = data?.thumbnail || query.img(`link[rel="preload"][as="image"][href*="images/${release.entryId}"]`);
+
+	release.poster = [
+		videoData?.posterURL,
+		fallbackPoster?.replace('-app.', '-desktop.'),
+		fallbackPoster,
+	];
+
+	release.photos = query.imgs('.mediabox-img', { attribute: 'data-srcset' });
+
+	release.trailer = videoData?.src
+		.filter((src) => src.encoding === 'h264')
+		?.map((src) => ({
+			src: src.url,
+			type: src.mimeType,
+			quality: parseInt(src.quality, 10),
+			expectType: {
+				'binary/octet-stream': 'video/mp4',
+			},
+		}));
+
+	release.chapters = videoData?.timeStamps?.map((chapter) => ({
+		time: chapter.ts,
+		tags: [chapter.name],
+	}));
+
+	release.qualities = release.trailer?.map((trailer) => trailer.quality);
+
+	return release;
+}
+
+function scrapeProfile({ query }, entity) {
+	const profile = {};
+	const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "Person")]');
+
+	if (!data) {
+		return null;
+	}
+
+	profile.url = unprint.prefixUrl(data.url, entity.url);
+
+	profile.dateOfBirth = unprint.extractDate(data.birthDate, 'MMMM DD, YYYY');
+	profile.birthPlace = data.nationality; // origin country rather than nationality
+
+	// height and weight are provided in both cm and lbs, but this seems to be a manual conversion; the format isn't always the same
+	profile.height = unprint.extractNumber(data.height, { match: /(\d+)\s*cm/, matchIndex: 1 });
+	profile.weight = unprint.extractNumber(data.weight, { match: /(\d+)\s*kg/, matchIndex: 1 });
+
+	profile.description = data.description;
+
+	profile.avatar = [
+		data.image?.replace('-small.', '.'),
+		data.image,
+	];
+
+	return profile;
+}
+
+async function fetchLatest(channel, page = 1) {
+	const url = `https://www.sexlikereal.com/studios/slr-originals?sort=most_recent&page=${page}`;
+	const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
+async function fetchUpcoming(channel) {
+	const url = 'https://www.sexlikereal.com/studios/slr-originals?type=upcoming';
+	const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' });
+
+	if (res.ok) {
+		return scrapeAll(res.context, channel);
+	}
+
+	return res.status;
+}
+
+async function fetchProfile({ slug }, entity) {
+	const url = `${entity.url}/pornstars/${slug}`;
+	const res = await unprint.get(url);
+
+	if (res.ok) {
+		return scrapeProfile(res.context, entity);
+	}
+
+	return res.status;
+}
+
+module.exports = {
+	fetchLatest,
+	fetchUpcoming,
+	fetchProfile,
+	scrapeScene: {
+		scraper: scrapeScene,
+		parser: {
+			runScripts: 'dangerously',
+		},
+	},
+};
--- a/src/updates.js
+++ b/src/updates.js
@ -151,7 +151,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {

 	const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0)))
 		|| (hasDates && releases.filter((release) => moment(release.date).isAfter(argv.after)))
-		|| releases.slice(0, Math.max(argv.missingDateLimit, 0));
+		|| releases.slice(0, Math.max(isUpcoming ? argv.upcomingMissingDateLimit : argv.missingDateLimit, 0));

 	const { uniqueReleases, duplicateReleases } = argv.force
 		? { uniqueReleases: limitedReleases, duplicateReleases: [] }