diff --git a/config/default.js b/config/default.js index e9af034f..5335dbb3 100755 --- a/config/default.js +++ b/config/default.js @@ -380,6 +380,7 @@ module.exports = { }, fetchAfter: [1, 'week'], missingDateLimit: 3, + upcomingMissingDateLimit: 20, memorySampling: { enabled: false, sampleDuration: 300000, // 5 minutes diff --git a/seeds/00_tags.js b/seeds/00_tags.js index ab74ee15..85d1a389 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -1269,6 +1269,26 @@ const tags = [ implies: ['anal'], description: 'Ass is the new pussy! Zero vaginal sex, anal only, guaranteed.', }, + { + name: 'fisheye', + slug: 'fisheye', + }, + { + name: 'passthrough', + slug: 'passthrough', + description: 'Passthrough is a feature on VR headsets that allows you to see your real environment behind the VR content.', + }, + { + name: 'AI passthrough', + slug: 'ai-passthrough', + description: '[Passthrough](/tag/passthrough) VR that is filmed with a regular background that is removed by AI, rather than traditional chroma keying (green screen).', + implies: ['passthrough'], + }, + { + name: 'scripts', + slug: 'scripts', + description: 'Scripts for haptic sex toys.', + }, ]; const aliases = [ @@ -1411,6 +1431,14 @@ const aliases = [ name: 'boob fucking', for: 'titty-fucking', }, + { + name: 'tits fucking', + for: 'titty-fucking', + }, + { + name: 'titfucking', + for: 'titty-fucking', + }, { name: 'bts', for: 'bts', @@ -2603,6 +2631,14 @@ const aliases = [ name: 'anal only', for: 'only-anal', }, + { + name: '200°', + for: 'fisheye', + }, + { + name: 'sex toy scripts', + for: 'scripts', + }, ]; const priorities = [ // higher index is higher priority diff --git a/seeds/02_sites.js b/seeds/02_sites.js index e5fec448..4a013eea 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -10651,6 +10651,13 @@ const sites = [ url: 'https://www.milfbundle.com/yourwifemymeat', parent: 'score', }, + // SEX LIKE REAL + { + name: 'Sex Like Real', + slug: 'sexlikereal', + url: 'https://www.sexlikereal.com', + tags: ['vr'], + }, // SEXY HUB { slug: 'danejones', diff --git a/src/argv.js b/src/argv.js index d7411a21..4bff43c0 100755 --- a/src/argv.js +++ b/src/argv.js @@ -168,6 +168,12 @@ const { argv } = yargs default: config.missingDateLimit, alias: ['null-date-limit'], }) + .option('upcoming-missing-date-limit', { + describe: 'Limit amount of scenes when dates are missing.', + type: 'number', + default: config.upcomingMissingDateLimit, + alias: ['upcoming-null-date-limit'], + }) .option('page', { describe: 'Page to start scraping at', type: 'number', diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 1b7db161..e6bc2666 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -60,6 +60,7 @@ const privateNetwork = require('./private'); // reserved keyword const purgatoryx = require('./purgatoryx'); const radical = require('./radical'); const rickysroom = require('./rickysroom'); +const sexlikereal = require('./sexlikereal'); const score = require('./score'); const spizoo = require('./spizoo'); const teamskeet = require('./teamskeet'); @@ -159,6 +160,7 @@ const scrapers = { rickysroom, sayuncle: teamskeet, score, + sexlikereal, sexyhub: aylo, spizoo, swallowsalon: julesjordan, @@ -304,6 +306,7 @@ const scrapers = { sayuncle: teamskeet, score, seehimfuck: hush, + sexlikereal, sexyhub: aylo, silverstonedvd: famedigital, silviasaint: famedigital, diff --git a/src/scrapers/sexlikereal.js b/src/scrapers/sexlikereal.js new file mode 100755 index 00000000..7c89e052 --- /dev/null +++ b/src/scrapers/sexlikereal.js @@ -0,0 +1,170 @@ +'use strict'; + +const unprint = require('unprint'); + +function scrapeAll(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + const data = query.json('script[type="application/ld+json"]'); + + release.url = unprint.prefixUrl(data?.url, channel.url) || query.url('article a[href*="/scenes"]'); + release.entryId = query.attribute(null, 'data-scene-id'); + + release.title = data?.name || query.content('.c-grid-item-footer-title'); + release.description = data?.description; + + release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD'); + release.duration = query.duration('.c-grid-ratio-bottom'); + + release.tags = [ + query.exists('.c-grid-badge--fisheye-bg') && 'fisheye', + query.exists('.c-grid-badge--fleshlight-badge-multi') && 'scripts', + query.exists('.c-grid-badge--passthrough') && 'passthrough', + query.exists('.c-grid-badge--passthrough-ai') && 'ai-passthrough', + ].filter(Boolean); + + const poster = query.img('img[data-qa="grid-item-photo-img"]', { attribute: 'data-srcset' }); + + if (poster) { + release.poster = [ + poster.replace('-app.', '-desktop.'), + poster, + ]; + } + + release.teaser = query.video('img[data-qa="grid-item-photo-img"]', { attribute: 'data-videosrc' }); + + return release; + }); +} + +function scrapeScene({ query, window }, { url, entity }) { + const release = {}; + const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "VideoObject")]'); + const videoData = window.vrPlayerSettings?.videoData; + + release.entryId = videoData?.id || (data?.url || new URL(url).pathname).split('-').at(-1); + + release.title = videoData?.title || data?.name || query.content('h1[data-qa="scene-title"]'); + release.description = query.content('div[data-qa="scene-about-tab-text"]'); // data text is cut off + + release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('time[data-qa="page-scene-studio-date"]', 'YYYY-MM-DD', { attribute: 'datetime' }); + // release.duration = unprint.extractTimestamp(data?.duration); // video duration data seems to be missing hours, regularly leading to wrong numbers; rely on front page duration + + release.actors = query.all('div[data-qa="scene-model-list-item"]').map((actorEl) => { + const avatar = unprint.query.content(actorEl, 'img[data-qa="scene-model-list-item-photo-img"]', { attribute: 'data-src' }); + + return { + name: unprint.query.content(actorEl, 'a[data-qa="scene-model-list-item-name"]'), + url: unprint.query.url(actorEl, 'a[data-qa="scene-model-list-item-photo-link-to-profile"], a[data-qa="scene-model-list-item-name"]', { origin: entity.url }), + avatar: [ + avatar?.replace('-small.', '.'), + avatar, + ], + }; + }) || data?.actor.map((actor) => actor.name); // prefer html actors for url and avatar + + release.tags = query.contents('a[data-qa="scene-tags-list-item-link"]'); + + const fallbackPoster = data?.thumbnail || query.img(`link[rel="preload"][as="image"][href*="images/${release.entryId}"]`); + + release.poster = [ + videoData?.posterURL, + fallbackPoster?.replace('-app.', '-desktop.'), + fallbackPoster, + ]; + + release.photos = query.imgs('.mediabox-img', { attribute: 'data-srcset' }); + + release.trailer = videoData?.src + .filter((src) => src.encoding === 'h264') + ?.map((src) => ({ + src: src.url, + type: src.mimeType, + quality: parseInt(src.quality, 10), + expectType: { + 'binary/octet-stream': 'video/mp4', + }, + })); + + release.chapters = videoData?.timeStamps?.map((chapter) => ({ + time: chapter.ts, + tags: [chapter.name], + })); + + release.qualities = release.trailer?.map((trailer) => trailer.quality); + + return release; +} + +function scrapeProfile({ query }, entity) { + const profile = {}; + const data = query.json('//script[contains(@type, "application/ld+json") and contains(text(), "Person")]'); + + if (!data) { + return null; + } + + profile.url = unprint.prefixUrl(data.url, entity.url); + + profile.dateOfBirth = unprint.extractDate(data.birthDate, 'MMMM DD, YYYY'); + profile.birthPlace = data.nationality; // origin country rather than nationality + + // height and weight are provided in both cm and lbs, but this seems to be a manual conversion; the format isn't always the same + profile.height = unprint.extractNumber(data.height, { match: /(\d+)\s*cm/, matchIndex: 1 }); + profile.weight = unprint.extractNumber(data.weight, { match: /(\d+)\s*kg/, matchIndex: 1 }); + + profile.description = data.description; + + profile.avatar = [ + data.image?.replace('-small.', '.'), + data.image, + ]; + + return profile; +} + +async function fetchLatest(channel, page = 1) { + const url = `https://www.sexlikereal.com/studios/slr-originals?sort=most_recent&page=${page}`; + const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +async function fetchUpcoming(channel) { + const url = 'https://www.sexlikereal.com/studios/slr-originals?type=upcoming'; + const res = await unprint.get(url, { selectAll: '.c-grid-item--scene' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +async function fetchProfile({ slug }, entity) { + const url = `${entity.url}/pornstars/${slug}`; + const res = await unprint.get(url); + + if (res.ok) { + return scrapeProfile(res.context, entity); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchUpcoming, + fetchProfile, + scrapeScene: { + scraper: scrapeScene, + parser: { + runScripts: 'dangerously', + }, + }, +}; diff --git a/src/updates.js b/src/updates.js index 78e0c29e..d5c2b8bc 100755 --- a/src/updates.js +++ b/src/updates.js @@ -151,7 +151,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { const limitedReleases = (argv.last && releases.slice(0, Math.max(argv.last, 0))) || (hasDates && releases.filter((release) => moment(release.date).isAfter(argv.after))) - || releases.slice(0, Math.max(argv.missingDateLimit, 0)); + || releases.slice(0, Math.max(isUpcoming ? argv.upcomingMissingDateLimit : argv.missingDateLimit, 0)); const { uniqueReleases, duplicateReleases } = argv.force ? { uniqueReleases: limitedReleases, duplicateReleases: [] }