From 1950dd2e6285917bf5f791ea2f760c84cb56580a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 16 Oct 2024 02:39:11 +0200 Subject: [PATCH] Added Snow Valley (Sperm Mania) scraper. --- config/default.js | 1 + migrations/20241016020256_profile_details.js | 35 + package-lock.json | 116 ++- package.json | 3 +- seeds/00_tags.js | 54 +- seeds/01_networks.js | 5 + seeds/02_sites.js | 121 +++ src/actors.js | 16 +- src/deep.js | 2 +- src/media.js | 2 + src/scrapers/julesjordan.js | 62 +- src/scrapers/scrapers.js | 11 + src/scrapers/snowvalley.js | 867 +++++++++++++++++++ src/scrapers/spizoo.js | 5 + src/store-releases.js | 12 +- src/utils/http.js | 1 + 16 files changed, 1234 insertions(+), 79 deletions(-) create mode 100644 migrations/20241016020256_profile_details.js create mode 100755 src/scrapers/snowvalley.js diff --git a/config/default.js b/config/default.js index d538735b..b7998fb7 100755 --- a/config/default.js +++ b/config/default.js @@ -221,6 +221,7 @@ module.exports = { 'vrcosplayx', 'teamskeet', 'mylf', + 'spermmania', [ 'letsdoeit', 'mamacitaz', diff --git a/migrations/20241016020256_profile_details.js b/migrations/20241016020256_profile_details.js new file mode 100644 index 00000000..76e7fa6e --- /dev/null +++ b/migrations/20241016020256_profile_details.js @@ -0,0 +1,35 @@ +exports.up = async (knex) => { + await knex.schema.alterTable('actors', (table) => { + table.integer('leg'); + table.integer('foot'); + table.integer('thigh'); + }); + + await knex.schema.alterTable('actors_profiles', (table) => { + table.integer('leg'); + table.integer('foot'); + table.integer('thigh'); + }); + + await knex.schema.alterTable('releases', (table) => { + table.integer('video_count'); + }); +}; + +exports.down = async (knex) => { + await knex.schema.alterTable('actors', (table) => { + table.dropColumn('leg'); + table.dropColumn('foot'); + table.dropColumn('thigh'); + }); + + await knex.schema.alterTable('actors_profiles', (table) => { + table.dropColumn('leg'); + table.dropColumn('foot'); + table.dropColumn('thigh'); + }); + + await knex.schema.alterTable('releases', (table) => { + table.dropColumn('video_count'); + }); +}; diff --git a/package-lock.json b/package-lock.json index c01f7adf..9b32b08b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -53,6 +53,7 @@ "graphile-utils": "^4.14.0", "graphql": "^15.8.0", "html-entities": "^2.4.0", + "https-proxy-agent": "^7.0.5", "iconv-lite": "^0.6.3", "inquirer": "^8.2.6", "inspector-api": "^1.4.8", @@ -88,7 +89,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.11.9", + "unprint": "^0.11.13", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -3861,6 +3862,18 @@ "node-pre-gyp": "bin/node-pre-gyp" } }, + "node_modules/@mapbox/node-pre-gyp/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/@mapbox/node-pre-gyp/node_modules/lru-cache": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", @@ -10868,15 +10881,26 @@ } }, "node_modules/https-proxy-agent": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", - "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz", + "integrity": "sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==", "dependencies": { - "agent-base": "6", + "agent-base": "^7.0.2", "debug": "4" }, "engines": { - "node": ">= 6" + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent/node_modules/agent-base": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", + "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==", + "dependencies": { + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" } }, "node_modules/human-signals": { @@ -12100,18 +12124,6 @@ "node": ">= 14" } }, - "node_modules/jsdom/node_modules/https-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz", - "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==", - "dependencies": { - "agent-base": "^7.0.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, "node_modules/jsdom/node_modules/whatwg-mimetype": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", @@ -12806,6 +12818,19 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, + "node_modules/make-fetch-happen/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dev": true, + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/make-fetch-happen/node_modules/lru-cache": { "version": "7.18.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", @@ -13613,6 +13638,19 @@ "node": ">= 6" } }, + "node_modules/node-gyp/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dev": true, + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/node-gyp/node_modules/lru-cache": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", @@ -14411,18 +14449,6 @@ "node": ">= 14" } }, - "node_modules/pac-proxy-agent/node_modules/https-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz", - "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==", - "dependencies": { - "agent-base": "^7.0.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, "node_modules/pac-proxy-agent/node_modules/socks-proxy-agent": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.2.tgz", @@ -15354,18 +15380,6 @@ "node": ">= 14" } }, - "node_modules/proxy-agent/node_modules/https-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz", - "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==", - "dependencies": { - "agent-base": "^7.0.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, "node_modules/proxy-agent/node_modules/lru-cache": { "version": "7.18.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", @@ -18298,9 +18312,9 @@ } }, "node_modules/unprint": { - "version": "0.11.9", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.9.tgz", - "integrity": "sha512-ROb7d1o4w0ATTgMW970/z3xURbslfc2D/AmYTzT5RoXsaSbQZTXa5lSCQ/iZGGyzrTX1UGVqot0+AQIYf2c3IQ==", + "version": "0.11.13", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.13.tgz", + "integrity": "sha512-dEa3zdaXtK2TmRVWf4APunTUXZfnYb0Yv4RlddpFVA8fgYf0ER/m0JN/ZcbEfqg3x5YPiJEHpgLGH9pMv5lbqA==", "dependencies": { "axios": "^0.27.2", "bottleneck": "^2.19.5", @@ -18424,6 +18438,18 @@ "node": ">= 6" } }, + "node_modules/unprint/node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/unprint/node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", diff --git a/package.json b/package.json index 4a46c1bd..df6eeb16 100755 --- a/package.json +++ b/package.json @@ -112,6 +112,7 @@ "graphile-utils": "^4.14.0", "graphql": "^15.8.0", "html-entities": "^2.4.0", + "https-proxy-agent": "^7.0.5", "iconv-lite": "^0.6.3", "inquirer": "^8.2.6", "inspector-api": "^1.4.8", @@ -147,7 +148,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.11.9", + "unprint": "^0.11.13", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/seeds/00_tags.js b/seeds/00_tags.js index 85d1a389..20b9a408 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -328,6 +328,11 @@ const tags = [ name: 'corporal punishment', slug: 'corporal-punishment', }, + { + name: 'cosplay', + slug: 'cosplay', + group: 'roleplay', + }, { name: 'couples', slug: 'couples', @@ -355,6 +360,14 @@ const tags = [ name: 'cum licking', slug: 'cum-licking', }, + { + name: 'cum fetish', + slug: 'cum-fetish', + }, + { + name: 'cum play', + slug: 'cum-play', + }, { name: 'cum on butt', slug: 'cum-on-butt', @@ -825,7 +838,12 @@ const tags = [ { name: 'cum in mouth', slug: 'cum-in-mouth', - description: 'A guy ejaculating in someone\'s mouth. If they keep their lips wrapped around his cock, it is an [oral creampie](/tag/oral-creampie). They may not be able to resist [swallowing](/tag/swallowing) the cum.', + description: 'A cock ejaculating in your mouth. If you keep your lips wrapped around the cock, it is an [oral creampie](/tag/oral-creampie). You may not be able to resist [swallowing](/tag/swallowing) the cum.', + group: 'finish', + }, + { + name: 'cum in panty', + slug: 'cum-in-panty', group: 'finish', }, { @@ -996,6 +1014,12 @@ const tags = [ { name: 'solo', slug: 'solo', + description: 'You don\'t need a man... or a woman! No one does it better than yourself.', + }, + { + name: 'solo foreplay', + slug: 'solo-foreplay', + description: 'Getting yourself all nice and wet before a good fucking.', }, { name: 'skinny', @@ -1289,6 +1313,18 @@ const tags = [ slug: 'scripts', description: 'Scripts for haptic sex toys.', }, + { + name: 'cat ears', + slug: 'cat-ears', + }, + { + name: 'neko', + slug: 'neko', + }, + { + name: 'ahegao', + slug: 'ahegao', + }, ]; const aliases = [ @@ -1411,6 +1447,10 @@ const aliases = [ name: 'bald pussy', for: 'shaved', }, + { + name: 'hairless pussy', + for: 'shaved', + }, { name: 'ball gag', for: 'gag', @@ -2313,6 +2353,10 @@ const aliases = [ for: 'titty-fucking', secondary: true, }, + { + name: 'titjob', + for: 'titty-fucking', + }, { name: 'tp', for: 'triple-penetration', @@ -2639,6 +2683,14 @@ const aliases = [ name: 'sex toy scripts', for: 'scripts', }, + { + name: 'mouth cumshot', + for: 'cum-in-mouth', + }, + { + name: 'oral cumshot', + for: 'cum-in-mouth', + }, ]; const priorities = [ // higher index is higher priority diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 846eabbe..e5776da2 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -653,6 +653,11 @@ const networks = [ parentSession: false, }, }, + { + slug: 'snowvalley', + name: 'Snow Valley Group', + hasLogo: false, + }, { slug: 'spizoo', name: 'Spizoo', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index c8663f86..e04ec9c6 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -10720,6 +10720,121 @@ const sites = [ tags: ['lesbian'], parent: 'sexyhub', }, + // SNOW VALLEY GROUP + { + slug: 'spermmania', + name: 'Sperm Mania', + url: 'https://www.spermmania.com', + tags: ['cum-fetish'], + independent: true, + parent: 'snowvalley', + }, + { + slug: 'cospuri', + name: 'Cospuri', + url: 'https://www.cospuri.com', + tags: ['cosplay'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'cospuri', + actors: 'https://www.cospuri.com/model', + }, + }, + { + slug: 'cutebutts', + name: 'Cute Butts', + url: 'https://www.cutebutts.com', + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'cospuri', + actors: 'https://www.cutebutts.com/model', + }, + }, + { + slug: 'fellatiojapan', + name: 'Fellatio Japan', + url: 'https://www.fellatiojapan.com', + tags: ['blowjob', 'jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'fellatio', + actors: 'https://www.fellatiojapan.com/en/girl', + }, + }, + { + slug: 'handjobjapan', + name: 'Handjob Japan', + url: 'https://www.handjobjapan.com', + tags: ['handjob', 'jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'handjob', + actors: 'https://www.handjobjapan.com/en/models', + }, + }, + { + slug: 'legsjapan', + name: 'Legs Japan', + url: 'https://www.legsjapan.com', + tags: ['jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'legs', + actors: 'https://www.legsjapan.com/en/girl', + }, + }, + { + slug: 'uralesbian', + name: 'Ura Lesbian', + url: 'https://www.uralesbian.com', + tags: ['lesbian', 'jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'lesbian', + actors: 'https://www.uralesbian.com/en/model', + }, + }, + { + slug: 'tokyofacefuck', + name: 'Tokyo Facefuck', + url: 'https://www.tokyofacefuck.com', + tags: ['facefucking', 'blowjob', 'jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'facefuck', + }, + }, + { + slug: 'cumbuffet', + name: 'Cum Buffet', + url: 'https://www.cumbuffet.com', + tags: ['swallowing'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'buffet', + actors: 'https://www.cumbuffet.com/girl', + }, + }, + { + slug: 'transexjapan', + name: 'Transex Japan', + url: 'https://www.transexjapan.com', + tags: ['transsexual', 'jav'], + independent: true, + parent: 'snowvalley', + parameters: { + layout: 'trans', + actors: 'https://www.transexjapan.com/model', + }, + }, // SPIZOO { slug: 'spizoo', @@ -10734,6 +10849,12 @@ const sites = [ tags: ['stripper'], parent: 'spizoo', }, + { + slug: 'creamher', + name: 'Goth Girlfriends', + url: 'https://www.creamher.com', + parent: 'spizoo', + }, { slug: 'gothgirlfriends', name: 'Goth Girlfriends', diff --git a/src/actors.js b/src/actors.js index 13327fff..d50308b7 100755 --- a/src/actors.js +++ b/src/actors.js @@ -267,6 +267,9 @@ function curateActor(actor, withDetails = false, isProfile = false) { bust: actor.bust, waist: actor.waist, hip: actor.hip, + foot: actor.foot, + leg: actor.leg, + thigh: actor.thigh, naturalBoobs: actor.natural_boobs, penisLength: actor.penis_length, penisGirth: actor.penis_girth, @@ -359,6 +362,9 @@ function curateProfileEntry(profile) { cup: profile.cup, bust: profile.bust, waist: profile.waist, + leg: profile.leg, + thigh: profile.thigh, + foot: profile.foot, hip: profile.hip, penis_length: profile.penisLength, penis_girth: profile.penisGirth, @@ -442,8 +448,13 @@ async function curateProfile(profile, actor) { curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null; curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null; + curatedProfile.leg = Number(profile.leg) || profile.leg?.match?.(/\d+/)?.[0] || null; + curatedProfile.thigh = Number(profile.thigh) || profile.thigh?.match?.(/\d+/)?.[0] || null; + curatedProfile.foot = Number(profile.foot) || profile.foot?.match?.(/\d+/)?.[0] || null; + // combined measurement value - const measurements = profile.measurements?.match(/(\d+)(\w+)(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/); // ExCoGi uses x, Jules Jordan has spaces between the dashes + // ExCoGi uses x, Jules Jordan has spaces between the dashes, SpermMenia/Cum Buffet sometimes misses cup + const measurements = profile.measurements?.match(/(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i); if (measurements) { curatedProfile.bust = Number(measurements[1]) || null; @@ -589,6 +600,9 @@ async function interpolateProfiles(actorIdsOrNames) { 'bust', 'waist', 'hip', + 'leg', + 'thigh', + 'foot', 'shoe_size', 'penis_length', 'penis_girth', diff --git a/src/deep.js b/src/deep.js index 8acaf59d..e1d75b17 100755 --- a/src/deep.js +++ b/src/deep.js @@ -130,7 +130,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { return baseRelease; } - if ((!baseRelease.url && !baseRelease.path) || !argv.deep) { + if ((!baseRelease.url && !baseRelease.path && !baseRelease.forceDeep) || !argv.deep) { return { ...baseRelease, entity, diff --git a/src/media.js b/src/media.js index 90494a15..e242cbdf 100755 --- a/src/media.js +++ b/src/media.js @@ -132,6 +132,7 @@ function toBaseSource(rawSource) { if (rawSource.extract) baseSource.extract = rawSource.extract; if (rawSource.expectType) baseSource.expectType = rawSource.expectType; + if (typeof rawSource.followRedirects === 'boolean') baseSource.followRedirects = rawSource.followRedirects; if (rawSource.stream) { baseSource.src = rawSource.stream; @@ -623,6 +624,7 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) { ...(source.host && { host: source.host }), }, stream: true, // sources are fetched in parallel, don't gobble up memory + followRedirects: source.followRedirects, transforms: [hashStream], destination: tempFileTarget, ...(source.interval && { interval: source.interval }), diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 29bd3b78..0353175f 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -28,7 +28,7 @@ function getEntryId(html) { function getEntryIdFromTitle(release) { // return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page // return slugify(release.title); - return slugify([release.title, ...(release.actors?.map((actor) => actor.name).toSorted() || [])]); + return slugify([release.title, ...(release.actors?.map((actor) => actor.name || actor).toSorted() || [])]); } function scrapeAll(scenes, site, entryIdFromTitle) { @@ -226,13 +226,13 @@ async function scrapeScene({ html, query }, context) { }))); } - if (query.exists('.update_dvds a')) { + if (query.exists('.player-scene-description a[href*="/dvd"]')) { release.movie = { - url: query.url('.update_dvds a'), - title: query.cnt('.update_dvds a'), + url: query.url('.player-scene-description a[href*="/dvd"]'), + title: query.content('.player-scene-description a[href*="/dvd"]'), }; - release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', ''); + release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase(); } release.stars = query.number('.avg_rating'); @@ -244,28 +244,40 @@ async function scrapeScene({ html, query }, context) { return release; } -function scrapeMovie({ el, query }, url, site) { - const movie = { url, site }; +function scrapeMovie({ query }, { url }) { + const movie = {}; movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase(); - movie.title = query.cnt('.title_bar span'); - movie.covers = query.urls('#dvd-cover-flip > a'); - movie.channel = slugify(query.q('.update_date a', true), ''); + movie.title = query.attribute('meta[property="og:title"]', 'content'); - // movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href); - const sceneQus = qu.initAll(el, '.dvd_details'); - const scenes = scrapeAll(sceneQus, site); + movie.covers = [query.img('img.dvd_box')]; // -2x etc is likely upscaled - const curatedScenes = scenes - ?.map((scene) => ({ ...scene, movie })) - .sort((sceneA, sceneB) => sceneA.date - sceneB.date); + const sceneTitles = query.contents('.title-heading-content-black-dvd'); - movie.date = curatedScenes?.[0]?.date; + const scenes = query.all('.grid-container-scene').map((sceneEl, index) => { + const scene = {}; - return { - ...movie, - ...(curatedScenes && { scenes: curatedScenes }), - }; + scene.url = unprint.query.url(sceneEl, 'a[href*="/scenes"]'); + scene.title = sceneTitles[index]; + + scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY'); + scene.actors = unprint.query.contents(sceneEl, '.update_models a'); + + scene.entryId = getEntryIdFromTitle(scene); + + console.log(scene); + + return scene; + }); + + movie.scenes = scenes?.sort((sceneA, sceneB) => sceneA.date - sceneB.date); + + movie.date = movie.scenes?.[0]?.date; + movie.datePrecision = 'month'; + + console.log('jj movie', movie); + + return movie; } function scrapeProfile({ query }, url, name, entity) { @@ -325,12 +337,6 @@ async function fetchUpcoming(site) { return res.status; } -async function fetchMovie(url, site) { - const res = await qu.get(url); - - return res.ok ? scrapeMovie(res.item, url, site) : res.status; -} - async function fetchProfile({ name: actorName, url }, entity) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName, '-'); @@ -364,8 +370,8 @@ async function fetchProfile({ name: actorName, url }, entity) { module.exports = { fetchLatest, - fetchMovie, fetchProfile, fetchUpcoming, scrapeScene, + scrapeMovie, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 4466ffb8..f76820b9 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -63,6 +63,7 @@ const radical = require('./radical'); const rickysroom = require('./rickysroom'); const sexlikereal = require('./sexlikereal'); const score = require('./score'); +const snowvalley = require('./snowvalley'); const spizoo = require('./spizoo'); const teamskeet = require('./teamskeet'); const teencoreclub = require('./teencoreclub'); @@ -163,6 +164,7 @@ const scrapers = { score, sexlikereal, sexyhub: aylo, + snowvalley, spizoo, swallowsalon: julesjordan, theflourish: archangel, @@ -309,6 +311,15 @@ const scrapers = { sexyhub: aylo, silverstonedvd: famedigital, silviasaint: famedigital, + spermmania: snowvalley, + handjobjapan: snowvalley, + fellatiojapan: snowvalley, + legsjapan: snowvalley, + cumbuffet: snowvalley, + cospuri: snowvalley, + cutebutts: snowvalley, + transexjapan: snowvalley, + uralesbian: snowvalley, spizoo, swallowed: mikeadriano, milfcandy: archangel, diff --git a/src/scrapers/snowvalley.js b/src/scrapers/snowvalley.js new file mode 100755 index 00000000..1158c6ea --- /dev/null +++ b/src/scrapers/snowvalley.js @@ -0,0 +1,867 @@ +'use strict'; + +const unprint = require('unprint'); + +const slugify = require('../utils/slugify'); + +const tagsMap = { + 'body bukkake': ['bukkake'], + 'creampie gangbang': ['gangbang', 'creampie'], + 'cum handjob': ['handjob'], + 'facial bukkake': ['facial', 'bukkake'], + 'massive creampie': ['creampie'], + 'massive cum handjob': ['handjob'], + 'panty cum': ['cum-in-panty'], + 'pussy bukkake': ['cum-on-pussy'], +}; + +function entryIdFromMedia(release) { + return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase(); +} + +function scrapeAll(scenes, tilesByEntryId, channel) { + return scenes.map(({ query }) => { + const release = {}; + + // release.url = query.url('.title a'); + + release.title = query.content('.sample-title'); + + // release.date = query.date('.date', 'MMM DD, YYYY'); + release.duration = query.duration('//div[contains(text(), "Runtime")]'); + + release.actors = query.all('a[href*="actress/"]').map((actorEl) => ({ // actors can be only in title or dedicated field + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); + + release.tags = tagsMap[query.content('a[href*="type/"]')?.toLowerCase()]; + + const posterBackground = query.style('.player'); + + if (posterBackground?.background) { + release.poster = posterBackground.background.match(/url\((.*)\)/)?.[1]?.trim(); + } + + release.photos = query.all('.sample-thumbs .thumb a').map((linkEl) => [ + unprint.query.url(linkEl, null), + unprint.query.img(linkEl, 'img'), + ].filter((src) => !src.includes('join'))); + + release.trailer = query.video('.player source'); + + release.photoCount = query.number('//div[contains(text(), "Photos")]'); + release.cumshots = query.number('//div[contains(text(), "Cumshots")]'); + + release.entryId = entryIdFromMedia(release); + + const tile = tilesByEntryId[release.entryId]; + + if (tile) { + Object.entries(tile).forEach(([key, value]) => { + if (!Object.hasOwn(release, key)) { + release[key] = value; + } + }); + } + + return release; + }); +} + +// page has no container divs, select all following siblings until the 'join' link indicating the end of the block +function composeBlock(element, init = true, acc = '') { + const newAcc = `${acc}${element.outerHTML}`; + + // image albums also contain a join link, make sure not to select that one + if (element.nextElementSibling.className.includes('join') + || !!element.nextElementSibling.querySelector('.item-join, .join-link') + || !!element.nextElementSibling.querySelector('h2 a[href*="join"]') + ) { + if (init) { + return unprint.init(newAcc); + } + + return newAcc; + } + + return composeBlock(element.nextElementSibling, init, newAcc); +} + +// used for both SpermMania and Fellation Japan, but different layouts +function scrapeAllTiles(tiles, channel) { + return tiles.map(({ query }) => { + const release = {}; + const sceneString = query.content(); + + const originalEntryId = query.attribute('.scene-hover', 'data-path'); + release.entryId = originalEntryId?.toLowerCase(); + + release.title = query.content('.scene-title'); + + release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD'); + release.duration = query.duration('.data.orange') || unprint.extractDuration(sceneString.match(/([\d:]+)\s*min/)?.[1]); + + release.actors = query.all('a[href*="actress/"], .sGirl a').map((actorEl) => ({ // actors can be only in title or dedicated field + name: unprint.query.content(actorEl), + url: channel.slug === 'fellatiojapan' + ? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}` + : unprint.query.element(actorEl, null, { origin: channel.url }), + })); + + release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean); + + const posterBackground = query.style('.scene-img'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = [ + posterUrl + .replace('-sm', '-lg') + .replace('-med', '-lg'), + posterUrl.replace('-sm', '-med'), + posterUrl, + ]; + } + + release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`; + + release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null; + release.cumshots = Number(sceneString.match(/(\d+) cumshots/)?.[1]) || null; + + return release; + }); +} + +// Sperm Mania +async function fetchLatestTiles(channel) { + const res = await unprint.get(`${channel.url}/tour`, { selectAll: '.scene' }); + + if (res.ok) { + const tiles = scrapeAllTiles(res.context, channel); + + return Object.fromEntries(tiles.map((tile) => [tile.entryId, tile])); + } + + return res.status; +} + +// SpermMania, sample feed with limited info +async function fetchLatest(channel, page = 1) { + const url = `${channel.url}/samples?page=${page}`; + + const [res, tilesByEntryId] = await Promise.all([ + unprint.get(url, { selectAll: '.sample-title, .item-title' }), + fetchLatestTiles(channel), + ]); + + if (res.ok) { + const expandedContext = res.context.map(({ element }) => composeBlock(element)); + + return scrapeAll(expandedContext, tilesByEntryId, channel); + } + + return res.status; +} + +function scrapeAllCospuri(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('.scene-thumb a'); + release.entryId = new URL(release.url).searchParams.get('id') + || new URL(release.url).pathname.match(/\/sample\/(.*)\//)[1]; + + release.title = query.content('.title'); + + release.date = query.date('.date', 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ }); + release.duration = query.duration('.length'); + release.photoCount = query.number('.photos'); + + release.actors = query.all('.model a[href*="/model"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); + + release.tags = [...query.contents('.tags .tag, .tag-box .tag'), query.content('.model .channel')].filter(Boolean); + + const posterBackground = query.style('.scene-thumb'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = [ + posterUrl + .replace('-med', '-lg') + .replace('-sm', '-lg'), + posterUrl.replace('-sm', '-med'), + posterUrl, + ]; + } + + release.teaser = query.video('.scene-hover', { attribute: 'data-path' }); + + return release; + }); +} + +// Cospuri, Cute Butts, paginated sample tiles with full info +async function fetchLatestCospuri(channel, page) { + const url = `${channel.url}/samples?page=${page}`; + + const res = await unprint.get(url, { selectAll: '.scene' }); + + if (res.ok) { + return scrapeAllCospuri(res.context, channel); + } + + return res.status; +} + +function curatePhotos(sources) { + return sources + .filter(Boolean).map((src) => [ + src.replace(/(\d+)s.jpg/, (match, photoIndex) => `${photoIndex}.jpg`), + src, + ].map((url) => ({ + src: url, + followRedirects: false, + }))); +} + +function scrapeAllFellatio(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.duration = query.duration('.tour-data'); + release.photoCount = query.number('.tour-data', { match: /(\d+) photos/, matchIndex: 1 }); + + release.actors = query.all('.tour-data a[href*="girl/"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`, + })); + + release.tags = query.contents('.tour-data a[href*="tag/"]'); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.photos = curatePhotos(query.imgs('.tour-thumb img')); + release.trailer = query.video(); + + release.entryId = entryIdFromMedia(release); + release.path = release.actors[0]?.url; + + return release; + }); +} + +// Fellatio Japan +async function fetchLatestFellatio(channel, page) { + const url = `${channel.url}/en/samples/?page=${page}`; + const res = await unprint.get(url, { selectAll: '.tour-data' }); + + if (res.ok) { + const expandedContext = res.context.map(({ element }) => composeBlock(element)); + + return scrapeAllFellatio(expandedContext, channel); + } + + return res.status; +} + +function scrapeAllHandjob(scenes, _channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.title = query.content('.blurb'); + + release.duration = query.duration('.item-rtitle'); + release.photoCount = query.number('//h3[contains(text(), "Scene Photos")]/strong'); + + release.actors = query.text('.item-ltitle h1')?.split(/,\s*/).map((actor) => actor.trim()); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.photos = curatePhotos(query.imgs('img.thumb, img.rthumb')); + release.trailer = query.video(); + + release.entryId = entryIdFromMedia(release); + + return release; + }); +} + +// Handjob Japan +async function fetchLatestHandjob(channel, page) { + const url = `${channel.url}/en/samples/?page=${page}`; + const res = await unprint.get(url, { selectAll: '.item-title' }); + + if (res.ok) { + const expandedContext = res.context.map(({ element }) => composeBlock(element)); + + return scrapeAllHandjob(expandedContext, channel); + } + + return res.status; +} + +function scrapeAllLegs(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.title = query.content('.tContent h3 strong'); + + release.duration = query.duration('//h3[contains(text(), "length")]/strong'); + release.photoCount = query.number('//h3[contains(text(), "photos")]/strong'); + + release.actors = query.all('.tContent a[href*="girl/"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`, + })); + + release.tags = query.contents('a[href*="tag/"]'); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.photos = curatePhotos(query.imgs('.tThumbs img')); + release.trailer = query.video(); + + release.entryId = entryIdFromMedia(release); + + return release; + }); +} + +// Legs Japan +async function fetchLatestLegs(channel, page) { + const url = `${channel.url}/en/samples/?page=${page}`; + const res = await unprint.get(url, { selectAll: '.player' }); + + if (res.ok) { + const expandedContext = res.context.map(({ element }) => composeBlock(element)); + + return scrapeAllLegs(expandedContext, channel); + } + + return res.status; +} + +function scrapeAllFacefuck(scenes) { + return scenes.map(({ query }) => { + const release = {}; + + release.description = query.content('.infotxt'); + release.actors = query.content('.info h1').split(',').map((actor) => actor.trim()); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.photos = curatePhotos(query.imgs('.thumb img')); + release.trailer = query.video(); + + release.entryId = entryIdFromMedia(release); + + return release; + }); +} + +// Tokyo Facefuck +async function fetchLatestFacefuck(channel, page) { + const url = `${channel.url}/en/?page=${page}`; + const res = await unprint.get(url, { selectAll: '.girl.box' }); + + if (res.ok) { + return scrapeAllFacefuck(res.context, channel); + } + + return res.status; +} + +function scrapeAllTrans(scenes) { + return scenes.map(([{ query }, { query: albumQuery }]) => { + const release = {}; + + release.title = query.content('.sample-info h1'); + release.actors = query.content('.sample-info a strong').split(',').map((actor) => actor.trim()); + + release.description = query.content('.sample-desc')?.replace('""', '') || null; // usually empty, but let's try it just in case + + release.duration = query.duration('.sample-info'); + release.photoCount = albumQuery.number('.sample-info', { match: /(\d+) photos/i, matchIndex: 1 }); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = [ + posterUrl, + posterUrl.replace(/-\d.jpg/, '-2.jpg'), + posterUrl.replace(/-\d.jpg/, '-1.jpg'), + ]; + } + + release.photos = curatePhotos(albumQuery.styles('.sample-lg, .sample-thumb').map((style) => style['background-image']?.match(/url\((.*)\)/)?.[1])); + release.trailer = query.video(); + + release.entryId = entryIdFromMedia(release); + + return release; + }); +} + +// Trans Sex Japan +async function fetchLatestTrans(channel, page) { + const url = `${channel.url}/samples?page=${page}`; + const res = await unprint.get(url, { select: '.stage' }); + + const videoHeads = unprint.initAll(res.context.element, '//div[contains(@class, "col-1") and .//div[contains(@class, "player")]]'); + const albumHeads = unprint.initAll(res.context.element, '//div[div[contains(@class, "sample-thumbs")]]'); + + if (res.ok) { + const videoBlocks = videoHeads.map(({ element }) => composeBlock(element)); + const albumBlocks = albumHeads.map(({ element }) => composeBlock(element)); + + const mergedContext = videoBlocks.map((context, index) => [context, albumBlocks[index]]); + + return scrapeAllTrans(mergedContext, channel); + } + + return res.status; +} + +function scrapeAllLesbianTiles(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.entryId = query.attribute('.scene-hover', 'data-path'); + + // supplementary data, filter items without entry ID + if (!release.entryId || query.content('.content-overlay')?.includes('photo')) { + return null; + } + + release.title = query.content('.content-title'); + release.duration = query.duration('.content-size-model'); + + release.actors = query.all('.content-size-model a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); + + release.tags = query.contents('.content-tags a'); + + const posterBackground = query.style('.vidthumb'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = [ + posterUrl + .replace('-sm', '-lg') + .replace('-med', '-lg'), + posterUrl.replace('-sm', '-med'), + posterUrl, + ]; + } + + release.teaser = `${channel.url}/content/${release.entryId}/hover.mp4`; + + return release; + }).filter(Boolean); +} + +function scrapeAllLesbian(scenes, channel, tiles) { + return scenes.map(({ query }) => { + const release = {}; + + if (query.exists('a[href*="samples"]')) { + return null; + } + + release.actors = query.all('a[href*="model/"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`, + })); + + release.duration = unprint.extractTimestamp(`${query.content('.tour-datum')?.split(' ').at(-1)}M`); + release.videoCount = query.number('.tour-datum', { match: /(\d+) hd scenes/i, matchIndex: 1 }); + release.photoCount = query.number('//div[text()[contains(., "Photos")]]', { match: /(\d+) photos/i, matchIndex: 1 }); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.trailer = query.video(); + release.photos = curatePhotos(query.imgs('.tour-thumb img')); + + release.entryId = slugify([entryIdFromMedia(release), ...release.actors.map((actor) => actor.name)]); + + const relatedTiles = tiles.filter((tile) => tile.actors.length === release.actors.length && tile.actors.every((tileActor) => release.actors.some((releaseActor) => tileActor.name === releaseActor.name))); + + // if we found the same number of tiles as videos in this set, we can be pretty sure they relate to this set + // if there are more, we have no way of determining which of the videos belong to this set + if (relatedTiles.length === release.videoCount) { + const sortedTiles = relatedTiles.toSorted((tileA, tileB) => tileA.entryId.localeCompare(tileB.entryId)); // entry IDs appear chronological + + release.tags = relatedTiles.flatMap((tile) => tile.tags); + + release.chapters = sortedTiles.map((tile, index, array) => { + const time = array.slice(0, index).reduce((acc, relatedTile) => acc + relatedTile.duration, 0); + + return { + title: tile.title, + time, + duration: tile.duration, + tags: tile.tags, + poster: tile.poster, + }; + }); + } + + return release; + }).filter(Boolean); +} + +// Uralesbian +async function fetchLatestLesbianTiles(channel, _page) { + // each sample on the samples page represents multiple videos, so for this site we start with the update tiles instead + // l=0 language, 0 = English, 1 = Japanese + // s=1 unclear, seems to be some sort of set, s=1 is everything, s=4 is front page + // c=5000 limit, only seems to apply to 'everything' set, seemingly unlimited by default but apply for good measure + // no known pagination parameter at this moment, so we try to get everything + const url = `${channel.url}/getdata.php?l=0&c=5000`; + const res = await unprint.get(url, { selectAll: '.content-obj' }); + + if (res.ok) { + return scrapeAllLesbianTiles(res.context, channel); + } + + return res.status; +} + +// Uralesbian +async function fetchLatestLesbian(channel, page) { + const url = `${channel.url}/en/samples?page=${page}`; + + const [res, tiles] = await Promise.all([ + unprint.get(url, { selectAll: '.tour-obj' }), + fetchLatestLesbianTiles(channel), + ]); + + if (res.ok) { + return scrapeAllLesbian(res.context, channel, tiles); + } + + return res.status; +} + +function scrapeAllBuffet(scenes, channel) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('.video-link'); + release.entryId = new URL(release.url).pathname.match(/sample\/(\w+)\//)[1]; + + release.title = query.content('.video-link'); + release.date = query.date('.date', 'MMM D, YYYY'); + + release.actors = query.all('.model-name a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); + + const posterUrl = query.img('.thumb'); + + if (posterUrl) { + release.poster = [ + posterUrl + .replace('-sm', '-lg') + .replace('-med', '-lg'), + posterUrl.replace('-sm', '-med'), + posterUrl, + ]; + } + + return release; + }); +} + +// Uralesbian +async function fetchLatestBuffet(channel, _page) { + const url = `${channel.url}/samples`; // no pagination + const res = await unprint.get(url, { selectAll: '.videos .video' }); + + if (res.ok) { + return scrapeAllBuffet(res.context, channel); + } + + return res.status; +} + +function scrapeSceneBuffet({ query }, { url, entity }) { + const release = {}; + + release.entryId = new URL(url).pathname.match(/sample\/(\w+)\//)[1]; + + release.title = query.text('.pg-nav h2'); + + release.actors = query.all('.tags a[href*="girl/"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: entity.url }), + })); + + release.tags = query.contents('.tag-list a'); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = [ + posterUrl.replace('-sm', '-lg'), // should already be -lg, but just in case + posterUrl.replace('-lg', '-sm'), + ]; + } + + release.trailer = query.video('.player source'); + release.photos = query.imgs('.photos .photo', { attribute: 'href' }); + + return release; +} + +function scrapeSceneCospuri({ query }, { url, entity }) { + const release = {}; + + release.entryId = new URL(url).searchParams.get('id') + || new URL(url).pathname.match(/\/sample\/(.*)\//)[1]; + + release.description = query.content('.detail-box .description'); + + release.date = query.date([ + '.detail-box .date', // cospuri + '//div[contains(@class, "details")]//span[strong[contains(text(), "Date")]]', // cute butts + ], 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ }); + + release.duration = query.duration([ + '.detail-box .length', + '//div[contains(@class, "details")]//span[strong[contains(text(), "Runtime")]]', // cute butts + ]); + + release.photoCount = query.number([ + '.detail-box .photos', + '//div[contains(@class, "details")]//span[strong[contains(text(), "Photos")]]', // cute butts + ]); + + release.actors = query.all('.sample-model a, .model a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: entity.url }), + })); + + release.tags = [...query.contents('.tag'), query.content('.sample-channel')].filter(Boolean); + + const posterBackground = query.style('.player'); + const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim(); + + if (posterUrl) { + release.poster = posterUrl; + } + + release.photos = query.attributes('.thumb a', 'data-asset').map((photoIndex) => [ + `https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}.jpg`, + `https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}s.jpg`, + ]); + + release.trailer = `https://img.${entity.slug}.com/preview/${release.entryId}/sample.mp4`; + + if (query.exists('.detail-box .fourK')) { + release.qualities = [2160]; + } + + return release; +} + +// Fellatio Japan +async function fetchSceneFellatio(url, channel, baseRelease) { + if (!baseRelease.entryId || !baseRelease.path) { + return null; + } + + // no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape + // can't use front page like on Sperm Mania because dates are missing + const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' }); + + if (res.ok) { + const tiles = scrapeAllTiles(res.context, channel); + + return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null; + } + + return res.status; +} + +function extractSizes(sizes) { + return { + cup: sizes.match(/b\d+-(\w+)/i)?.[1], + bust: unprint.extractNumber(sizes.match(/b(\d+)/i)?.[1]), + waist: unprint.extractNumber(sizes.match(/w(\d+)/i)?.[1]), + hip: unprint.extractNumber(sizes.match(/h(\d+)/i)?.[1]), + }; +} + +// SpermMania, Handjob Japan +function scrapeProfile({ query }, channel, url) { + const profile = { url }; + + const bio = Object.fromEntries(query.all('.actr-item, .profile tr, #profile tr, .profile-info li, .model-detail .item, .model-item').map((bioEl) => [ + slugify(unprint.query.content(bioEl, 'td, b, .model-item-title') || unprint.query.text(bioEl), '_'), + unprint.query.url(bioEl) || unprint.query.content(bioEl, 'strong, td:last-child, span, .model-item-contents') || unprint.query.text(bioEl), // ensure social links have priority over text + ])); + + profile.birthPlace = bio.from || bio.country; + + profile.description = [ + bio.hobbies && `Hobbies: ${bio.hobbies}`, + bio.skills && `Skills: ${bio.skills}`, + bio.fun_fact, + query.content('h2 + p'), + ].filter(Boolean).join('. ') || null; + + profile.age = unprint.extractNumber(bio.age); + profile.height = unprint.extractNumber(bio.height); + + const sizes = bio.sizes || bio.measurements; + + if (/b\d+/i.test(sizes)) { + const measurements = extractSizes(sizes); + + profile.cup = measurements.cup; + profile.bust = measurements.bust; + profile.waist = measurements.waist; + profile.hip = measurements.hip; + } else { + profile.measurements = bio.measurements; + } + + profile.foot = unprint.extractNumber(bio.foot_size); + profile.leg = unprint.extractNumber(bio.leg_length); + profile.thigh = unprint.extractNumber(bio.thigh_width); + + profile.social = [bio.homepage, bio.twitter].filter(Boolean); + + const avatar = query.img('.scene-array img[src*="/actress"], img.portrait, .profile-img img') + || query.img('.costume-bg', { attribute: 'data-img' }) + || query.style('.model-profile, #profile, .carousel-item')?.['background-image']?.match(/url\((.*)\)/)?.[1]; + + if (avatar) { + profile.avatar = [ + avatar.replace('-header.jpg', '.jpg'), // Transex Japan, prefer avatar over header banner + avatar, + ]; + } + + profile.photos = [ + ...query.imgs('.costume-bg', { attribute: 'data-img' }).slice(1), + avatar?.includes('-header.jpg') && avatar, + ].filter(Boolean); + + return profile; +} + +function scrapeProfileLesbian({ query, html }, channel, url) { + const profile = { url }; + + profile.age = query.number('//strong[contains(text(), "Age")]/following-sibling::text()[1]'); + profile.height = query.number('//strong[contains(text(), "Height")]/following-sibling::text()[1]'); + profile.birthPlace = query.content('//img[contains(@src, "from")]/following-sibling::text()[1]')?.replace(/^from/i, '').trim() || null; + + const sizes = query.content('//strong[contains(text(), "Measurements")]/following-sibling::text()[1]'); + + if (/b\d+/i.test(sizes)) { + const measurements = extractSizes(sizes); + + profile.cup = measurements.cup; + profile.bust = measurements.bust; + profile.waist = measurements.waist; + profile.hip = measurements.hip; + } + + profile.avatar = html.match(/https:\/\/img.uralesbian.com\/models\/\d+\.jpg/)?.[0]; + + return profile; +} + +async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) { + const url = actorUrl || (parameters.actors + ? `${parameters.actors}/${slug}` + : `${entity.url}/actress/${slug}`); + + const res = await unprint.get(url); + + if (res.ok) { + if (parameters.layout === 'lesbian') { + return scrapeProfileLesbian(res.context, entity, url); + } + + return scrapeProfile(res.context, entity, url); + } + + return res.status; +} + +module.exports = { + fetchLatest, + fetchProfile, + cospuri: { + fetchLatest: fetchLatestCospuri, + scrapeScene: scrapeSceneCospuri, + fetchProfile, + }, + fellatio: { + fetchLatest: fetchLatestFellatio, + fetchScene: fetchSceneFellatio, + fetchProfile, + }, + handjob: { + fetchLatest: fetchLatestHandjob, + fetchProfile, + }, + legs: { + fetchLatest: fetchLatestLegs, + fetchProfile, + }, + facefuck: { + fetchLatest: fetchLatestFacefuck, + }, + trans: { + fetchLatest: fetchLatestTrans, + fetchProfile, + }, + lesbian: { + fetchLatest: fetchLatestLesbian, + fetchProfile, + }, + buffet: { + fetchLatest: fetchLatestBuffet, + scrapeScene: scrapeSceneBuffet, + fetchProfile, + }, +}; diff --git a/src/scrapers/spizoo.js b/src/scrapers/spizoo.js index aab0988c..8929af79 100755 --- a/src/scrapers/spizoo.js +++ b/src/scrapers/spizoo.js @@ -1,7 +1,9 @@ 'use strict'; +const config = require('config'); const unprint = require('unprint'); const format = require('template-format'); +const { HttpsProxyAgent } = require('https-proxy-agent'); const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); @@ -137,11 +139,14 @@ function scrapeProfile({ query, el }) { return profile; } +const agent = new HttpsProxyAgent(`http://${config.proxy.host}:${config.proxy.port}`); + async function fetchLatest(channel, page) { // const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail'); const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, { selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail', + httpsAgent: agent, }); if (res.ok) { diff --git a/src/store-releases.js b/src/store-releases.js index 3684c7f8..d5f6c6ce 100755 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -349,7 +349,7 @@ async function storeMovies(movies, useBatchId) { return []; } - const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies'); + const { uniqueReleases, duplicateReleaseEntries } = await filterDuplicateReleases(movies, 'movies'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie'))); @@ -362,7 +362,15 @@ async function storeMovies(movies, useBatchId) { await associateReleaseMedia(moviesWithId, 'movie'); - return moviesWithId; + return [...moviesWithId, ...duplicateReleaseEntries.map((entry) => ({ + // used to map new movie scenes to existing movie entries + id: entry.id, + entryId: entry.entry_id, + entityId: entry.entity_id, + entity: { + id: entry.entity_id, + }, + }))]; } async function storeSeries(series, useBatchId) { diff --git a/src/utils/http.js b/src/utils/http.js index 85eec706..62bef48f 100755 --- a/src/utils/http.js +++ b/src/utils/http.js @@ -483,4 +483,5 @@ module.exports = { getCookieJar, destroyBypassSessions, destroyBrowserSessions, + proxyAgent, };