Added Snow Valley (Sperm Mania) scraper.

This commit is contained in:
DebaucheryLibrarian 2024-10-16 02:39:11 +02:00
parent 91e31e8ce7
commit 1950dd2e62
16 changed files with 1234 additions and 79 deletions

View File

@ -221,6 +221,7 @@ module.exports = {
'vrcosplayx', 'vrcosplayx',
'teamskeet', 'teamskeet',
'mylf', 'mylf',
'spermmania',
[ [
'letsdoeit', 'letsdoeit',
'mamacitaz', 'mamacitaz',

View File

@ -0,0 +1,35 @@
exports.up = async (knex) => {
await knex.schema.alterTable('actors', (table) => {
table.integer('leg');
table.integer('foot');
table.integer('thigh');
});
await knex.schema.alterTable('actors_profiles', (table) => {
table.integer('leg');
table.integer('foot');
table.integer('thigh');
});
await knex.schema.alterTable('releases', (table) => {
table.integer('video_count');
});
};
exports.down = async (knex) => {
await knex.schema.alterTable('actors', (table) => {
table.dropColumn('leg');
table.dropColumn('foot');
table.dropColumn('thigh');
});
await knex.schema.alterTable('actors_profiles', (table) => {
table.dropColumn('leg');
table.dropColumn('foot');
table.dropColumn('thigh');
});
await knex.schema.alterTable('releases', (table) => {
table.dropColumn('video_count');
});
};

116
package-lock.json generated
View File

@ -53,6 +53,7 @@
"graphile-utils": "^4.14.0", "graphile-utils": "^4.14.0",
"graphql": "^15.8.0", "graphql": "^15.8.0",
"html-entities": "^2.4.0", "html-entities": "^2.4.0",
"https-proxy-agent": "^7.0.5",
"iconv-lite": "^0.6.3", "iconv-lite": "^0.6.3",
"inquirer": "^8.2.6", "inquirer": "^8.2.6",
"inspector-api": "^1.4.8", "inspector-api": "^1.4.8",
@ -88,7 +89,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.37", "ua-parser-js": "^1.0.37",
"undici": "^5.28.1", "undici": "^5.28.1",
"unprint": "^0.11.9", "unprint": "^0.11.13",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3", "v-tooltip": "^2.1.3",
"video.js": "^8.6.1", "video.js": "^8.6.1",
@ -3861,6 +3862,18 @@
"node-pre-gyp": "bin/node-pre-gyp" "node-pre-gyp": "bin/node-pre-gyp"
} }
}, },
"node_modules/@mapbox/node-pre-gyp/node_modules/https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
"dependencies": {
"agent-base": "6",
"debug": "4"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/@mapbox/node-pre-gyp/node_modules/lru-cache": { "node_modules/@mapbox/node-pre-gyp/node_modules/lru-cache": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
@ -10868,15 +10881,26 @@
} }
}, },
"node_modules/https-proxy-agent": { "node_modules/https-proxy-agent": {
"version": "5.0.1", "version": "7.0.5",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", "integrity": "sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==",
"dependencies": { "dependencies": {
"agent-base": "6", "agent-base": "^7.0.2",
"debug": "4" "debug": "4"
}, },
"engines": { "engines": {
"node": ">= 6" "node": ">= 14"
}
},
"node_modules/https-proxy-agent/node_modules/agent-base": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
"integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
"dependencies": {
"debug": "^4.3.4"
},
"engines": {
"node": ">= 14"
} }
}, },
"node_modules/human-signals": { "node_modules/human-signals": {
@ -12100,18 +12124,6 @@
"node": ">= 14" "node": ">= 14"
} }
}, },
"node_modules/jsdom/node_modules/https-proxy-agent": {
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz",
"integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==",
"dependencies": {
"agent-base": "^7.0.2",
"debug": "4"
},
"engines": {
"node": ">= 14"
}
},
"node_modules/jsdom/node_modules/whatwg-mimetype": { "node_modules/jsdom/node_modules/whatwg-mimetype": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
@ -12806,6 +12818,19 @@
"node": "^12.13.0 || ^14.15.0 || >=16.0.0" "node": "^12.13.0 || ^14.15.0 || >=16.0.0"
} }
}, },
"node_modules/make-fetch-happen/node_modules/https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
"dev": true,
"dependencies": {
"agent-base": "6",
"debug": "4"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/make-fetch-happen/node_modules/lru-cache": { "node_modules/make-fetch-happen/node_modules/lru-cache": {
"version": "7.18.3", "version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -13613,6 +13638,19 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/node-gyp/node_modules/https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
"dev": true,
"dependencies": {
"agent-base": "6",
"debug": "4"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/node-gyp/node_modules/lru-cache": { "node_modules/node-gyp/node_modules/lru-cache": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
@ -14411,18 +14449,6 @@
"node": ">= 14" "node": ">= 14"
} }
}, },
"node_modules/pac-proxy-agent/node_modules/https-proxy-agent": {
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz",
"integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==",
"dependencies": {
"agent-base": "^7.0.2",
"debug": "4"
},
"engines": {
"node": ">= 14"
}
},
"node_modules/pac-proxy-agent/node_modules/socks-proxy-agent": { "node_modules/pac-proxy-agent/node_modules/socks-proxy-agent": {
"version": "8.0.2", "version": "8.0.2",
"resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.2.tgz", "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.2.tgz",
@ -15354,18 +15380,6 @@
"node": ">= 14" "node": ">= 14"
} }
}, },
"node_modules/proxy-agent/node_modules/https-proxy-agent": {
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz",
"integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==",
"dependencies": {
"agent-base": "^7.0.2",
"debug": "4"
},
"engines": {
"node": ">= 14"
}
},
"node_modules/proxy-agent/node_modules/lru-cache": { "node_modules/proxy-agent/node_modules/lru-cache": {
"version": "7.18.3", "version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -18298,9 +18312,9 @@
} }
}, },
"node_modules/unprint": { "node_modules/unprint": {
"version": "0.11.9", "version": "0.11.13",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.9.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.13.tgz",
"integrity": "sha512-ROb7d1o4w0ATTgMW970/z3xURbslfc2D/AmYTzT5RoXsaSbQZTXa5lSCQ/iZGGyzrTX1UGVqot0+AQIYf2c3IQ==", "integrity": "sha512-dEa3zdaXtK2TmRVWf4APunTUXZfnYb0Yv4RlddpFVA8fgYf0ER/m0JN/ZcbEfqg3x5YPiJEHpgLGH9pMv5lbqA==",
"dependencies": { "dependencies": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",
@ -18424,6 +18438,18 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/unprint/node_modules/https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
"integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==",
"dependencies": {
"agent-base": "6",
"debug": "4"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/unprint/node_modules/iconv-lite": { "node_modules/unprint/node_modules/iconv-lite": {
"version": "0.4.24", "version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",

View File

@ -112,6 +112,7 @@
"graphile-utils": "^4.14.0", "graphile-utils": "^4.14.0",
"graphql": "^15.8.0", "graphql": "^15.8.0",
"html-entities": "^2.4.0", "html-entities": "^2.4.0",
"https-proxy-agent": "^7.0.5",
"iconv-lite": "^0.6.3", "iconv-lite": "^0.6.3",
"inquirer": "^8.2.6", "inquirer": "^8.2.6",
"inspector-api": "^1.4.8", "inspector-api": "^1.4.8",
@ -147,7 +148,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.37", "ua-parser-js": "^1.0.37",
"undici": "^5.28.1", "undici": "^5.28.1",
"unprint": "^0.11.9", "unprint": "^0.11.13",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3", "v-tooltip": "^2.1.3",
"video.js": "^8.6.1", "video.js": "^8.6.1",

View File

@ -328,6 +328,11 @@ const tags = [
name: 'corporal punishment', name: 'corporal punishment',
slug: 'corporal-punishment', slug: 'corporal-punishment',
}, },
{
name: 'cosplay',
slug: 'cosplay',
group: 'roleplay',
},
{ {
name: 'couples', name: 'couples',
slug: 'couples', slug: 'couples',
@ -355,6 +360,14 @@ const tags = [
name: 'cum licking', name: 'cum licking',
slug: 'cum-licking', slug: 'cum-licking',
}, },
{
name: 'cum fetish',
slug: 'cum-fetish',
},
{
name: 'cum play',
slug: 'cum-play',
},
{ {
name: 'cum on butt', name: 'cum on butt',
slug: 'cum-on-butt', slug: 'cum-on-butt',
@ -825,7 +838,12 @@ const tags = [
{ {
name: 'cum in mouth', name: 'cum in mouth',
slug: 'cum-in-mouth', slug: 'cum-in-mouth',
description: 'A guy ejaculating in someone\'s mouth. If they keep their lips wrapped around his cock, it is an [oral creampie](/tag/oral-creampie). They may not be able to resist [swallowing](/tag/swallowing) the cum.', description: 'A cock ejaculating in your mouth. If you keep your lips wrapped around the cock, it is an [oral creampie](/tag/oral-creampie). You may not be able to resist [swallowing](/tag/swallowing) the cum.',
group: 'finish',
},
{
name: 'cum in panty',
slug: 'cum-in-panty',
group: 'finish', group: 'finish',
}, },
{ {
@ -996,6 +1014,12 @@ const tags = [
{ {
name: 'solo', name: 'solo',
slug: 'solo', slug: 'solo',
description: 'You don\'t need a man... or a woman! No one does it better than yourself.',
},
{
name: 'solo foreplay',
slug: 'solo-foreplay',
description: 'Getting yourself all nice and wet before a good fucking.',
}, },
{ {
name: 'skinny', name: 'skinny',
@ -1289,6 +1313,18 @@ const tags = [
slug: 'scripts', slug: 'scripts',
description: 'Scripts for haptic sex toys.', description: 'Scripts for haptic sex toys.',
}, },
{
name: 'cat ears',
slug: 'cat-ears',
},
{
name: 'neko',
slug: 'neko',
},
{
name: 'ahegao',
slug: 'ahegao',
},
]; ];
const aliases = [ const aliases = [
@ -1411,6 +1447,10 @@ const aliases = [
name: 'bald pussy', name: 'bald pussy',
for: 'shaved', for: 'shaved',
}, },
{
name: 'hairless pussy',
for: 'shaved',
},
{ {
name: 'ball gag', name: 'ball gag',
for: 'gag', for: 'gag',
@ -2313,6 +2353,10 @@ const aliases = [
for: 'titty-fucking', for: 'titty-fucking',
secondary: true, secondary: true,
}, },
{
name: 'titjob',
for: 'titty-fucking',
},
{ {
name: 'tp', name: 'tp',
for: 'triple-penetration', for: 'triple-penetration',
@ -2639,6 +2683,14 @@ const aliases = [
name: 'sex toy scripts', name: 'sex toy scripts',
for: 'scripts', for: 'scripts',
}, },
{
name: 'mouth cumshot',
for: 'cum-in-mouth',
},
{
name: 'oral cumshot',
for: 'cum-in-mouth',
},
]; ];
const priorities = [ // higher index is higher priority const priorities = [ // higher index is higher priority

View File

@ -653,6 +653,11 @@ const networks = [
parentSession: false, parentSession: false,
}, },
}, },
{
slug: 'snowvalley',
name: 'Snow Valley Group',
hasLogo: false,
},
{ {
slug: 'spizoo', slug: 'spizoo',
name: 'Spizoo', name: 'Spizoo',

View File

@ -10720,6 +10720,121 @@ const sites = [
tags: ['lesbian'], tags: ['lesbian'],
parent: 'sexyhub', parent: 'sexyhub',
}, },
// SNOW VALLEY GROUP
{
slug: 'spermmania',
name: 'Sperm Mania',
url: 'https://www.spermmania.com',
tags: ['cum-fetish'],
independent: true,
parent: 'snowvalley',
},
{
slug: 'cospuri',
name: 'Cospuri',
url: 'https://www.cospuri.com',
tags: ['cosplay'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'cospuri',
actors: 'https://www.cospuri.com/model',
},
},
{
slug: 'cutebutts',
name: 'Cute Butts',
url: 'https://www.cutebutts.com',
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'cospuri',
actors: 'https://www.cutebutts.com/model',
},
},
{
slug: 'fellatiojapan',
name: 'Fellatio Japan',
url: 'https://www.fellatiojapan.com',
tags: ['blowjob', 'jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'fellatio',
actors: 'https://www.fellatiojapan.com/en/girl',
},
},
{
slug: 'handjobjapan',
name: 'Handjob Japan',
url: 'https://www.handjobjapan.com',
tags: ['handjob', 'jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'handjob',
actors: 'https://www.handjobjapan.com/en/models',
},
},
{
slug: 'legsjapan',
name: 'Legs Japan',
url: 'https://www.legsjapan.com',
tags: ['jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'legs',
actors: 'https://www.legsjapan.com/en/girl',
},
},
{
slug: 'uralesbian',
name: 'Ura Lesbian',
url: 'https://www.uralesbian.com',
tags: ['lesbian', 'jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'lesbian',
actors: 'https://www.uralesbian.com/en/model',
},
},
{
slug: 'tokyofacefuck',
name: 'Tokyo Facefuck',
url: 'https://www.tokyofacefuck.com',
tags: ['facefucking', 'blowjob', 'jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'facefuck',
},
},
{
slug: 'cumbuffet',
name: 'Cum Buffet',
url: 'https://www.cumbuffet.com',
tags: ['swallowing'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'buffet',
actors: 'https://www.cumbuffet.com/girl',
},
},
{
slug: 'transexjapan',
name: 'Transex Japan',
url: 'https://www.transexjapan.com',
tags: ['transsexual', 'jav'],
independent: true,
parent: 'snowvalley',
parameters: {
layout: 'trans',
actors: 'https://www.transexjapan.com/model',
},
},
// SPIZOO // SPIZOO
{ {
slug: 'spizoo', slug: 'spizoo',
@ -10734,6 +10849,12 @@ const sites = [
tags: ['stripper'], tags: ['stripper'],
parent: 'spizoo', parent: 'spizoo',
}, },
{
slug: 'creamher',
name: 'Goth Girlfriends',
url: 'https://www.creamher.com',
parent: 'spizoo',
},
{ {
slug: 'gothgirlfriends', slug: 'gothgirlfriends',
name: 'Goth Girlfriends', name: 'Goth Girlfriends',

View File

@ -267,6 +267,9 @@ function curateActor(actor, withDetails = false, isProfile = false) {
bust: actor.bust, bust: actor.bust,
waist: actor.waist, waist: actor.waist,
hip: actor.hip, hip: actor.hip,
foot: actor.foot,
leg: actor.leg,
thigh: actor.thigh,
naturalBoobs: actor.natural_boobs, naturalBoobs: actor.natural_boobs,
penisLength: actor.penis_length, penisLength: actor.penis_length,
penisGirth: actor.penis_girth, penisGirth: actor.penis_girth,
@ -359,6 +362,9 @@ function curateProfileEntry(profile) {
cup: profile.cup, cup: profile.cup,
bust: profile.bust, bust: profile.bust,
waist: profile.waist, waist: profile.waist,
leg: profile.leg,
thigh: profile.thigh,
foot: profile.foot,
hip: profile.hip, hip: profile.hip,
penis_length: profile.penisLength, penis_length: profile.penisLength,
penis_girth: profile.penisGirth, penis_girth: profile.penisGirth,
@ -442,8 +448,13 @@ async function curateProfile(profile, actor) {
curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null; curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null;
curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null; curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
curatedProfile.leg = Number(profile.leg) || profile.leg?.match?.(/\d+/)?.[0] || null;
curatedProfile.thigh = Number(profile.thigh) || profile.thigh?.match?.(/\d+/)?.[0] || null;
curatedProfile.foot = Number(profile.foot) || profile.foot?.match?.(/\d+/)?.[0] || null;
// combined measurement value // combined measurement value
const measurements = profile.measurements?.match(/(\d+)(\w+)(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/); // ExCoGi uses x, Jules Jordan has spaces between the dashes // ExCoGi uses x, Jules Jordan has spaces between the dashes, SpermMenia/Cum Buffet sometimes misses cup
const measurements = profile.measurements?.match(/(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i);
if (measurements) { if (measurements) {
curatedProfile.bust = Number(measurements[1]) || null; curatedProfile.bust = Number(measurements[1]) || null;
@ -589,6 +600,9 @@ async function interpolateProfiles(actorIdsOrNames) {
'bust', 'bust',
'waist', 'waist',
'hip', 'hip',
'leg',
'thigh',
'foot',
'shoe_size', 'shoe_size',
'penis_length', 'penis_length',
'penis_girth', 'penis_girth',

View File

@ -130,7 +130,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
return baseRelease; return baseRelease;
} }
if ((!baseRelease.url && !baseRelease.path) || !argv.deep) { if ((!baseRelease.url && !baseRelease.path && !baseRelease.forceDeep) || !argv.deep) {
return { return {
...baseRelease, ...baseRelease,
entity, entity,

View File

@ -132,6 +132,7 @@ function toBaseSource(rawSource) {
if (rawSource.extract) baseSource.extract = rawSource.extract; if (rawSource.extract) baseSource.extract = rawSource.extract;
if (rawSource.expectType) baseSource.expectType = rawSource.expectType; if (rawSource.expectType) baseSource.expectType = rawSource.expectType;
if (typeof rawSource.followRedirects === 'boolean') baseSource.followRedirects = rawSource.followRedirects;
if (rawSource.stream) { if (rawSource.stream) {
baseSource.src = rawSource.stream; baseSource.src = rawSource.stream;
@ -623,6 +624,7 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
...(source.host && { host: source.host }), ...(source.host && { host: source.host }),
}, },
stream: true, // sources are fetched in parallel, don't gobble up memory stream: true, // sources are fetched in parallel, don't gobble up memory
followRedirects: source.followRedirects,
transforms: [hashStream], transforms: [hashStream],
destination: tempFileTarget, destination: tempFileTarget,
...(source.interval && { interval: source.interval }), ...(source.interval && { interval: source.interval }),

View File

@ -28,7 +28,7 @@ function getEntryId(html) {
function getEntryIdFromTitle(release) { function getEntryIdFromTitle(release) {
// return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page // return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page
// return slugify(release.title); // return slugify(release.title);
return slugify([release.title, ...(release.actors?.map((actor) => actor.name).toSorted() || [])]); return slugify([release.title, ...(release.actors?.map((actor) => actor.name || actor).toSorted() || [])]);
} }
function scrapeAll(scenes, site, entryIdFromTitle) { function scrapeAll(scenes, site, entryIdFromTitle) {
@ -226,13 +226,13 @@ async function scrapeScene({ html, query }, context) {
}))); })));
} }
if (query.exists('.update_dvds a')) { if (query.exists('.player-scene-description a[href*="/dvd"]')) {
release.movie = { release.movie = {
url: query.url('.update_dvds a'), url: query.url('.player-scene-description a[href*="/dvd"]'),
title: query.cnt('.update_dvds a'), title: query.content('.player-scene-description a[href*="/dvd"]'),
}; };
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', ''); release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase();
} }
release.stars = query.number('.avg_rating'); release.stars = query.number('.avg_rating');
@ -244,28 +244,40 @@ async function scrapeScene({ html, query }, context) {
return release; return release;
} }
function scrapeMovie({ el, query }, url, site) { function scrapeMovie({ query }, { url }) {
const movie = { url, site }; const movie = {};
movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase(); movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase();
movie.title = query.cnt('.title_bar span'); movie.title = query.attribute('meta[property="og:title"]', 'content');
movie.covers = query.urls('#dvd-cover-flip > a');
movie.channel = slugify(query.q('.update_date a', true), '');
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href); movie.covers = [query.img('img.dvd_box')]; // -2x etc is likely upscaled
const sceneQus = qu.initAll(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes const sceneTitles = query.contents('.title-heading-content-black-dvd');
?.map((scene) => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = curatedScenes?.[0]?.date; const scenes = query.all('.grid-container-scene').map((sceneEl, index) => {
const scene = {};
return { scene.url = unprint.query.url(sceneEl, 'a[href*="/scenes"]');
...movie, scene.title = sceneTitles[index];
...(curatedScenes && { scenes: curatedScenes }),
}; scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY');
scene.actors = unprint.query.contents(sceneEl, '.update_models a');
scene.entryId = getEntryIdFromTitle(scene);
console.log(scene);
return scene;
});
movie.scenes = scenes?.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = movie.scenes?.[0]?.date;
movie.datePrecision = 'month';
console.log('jj movie', movie);
return movie;
} }
function scrapeProfile({ query }, url, name, entity) { function scrapeProfile({ query }, url, name, entity) {
@ -325,12 +337,6 @@ async function fetchUpcoming(site) {
return res.status; return res.status;
} }
async function fetchMovie(url, site) {
const res = await qu.get(url);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile({ name: actorName, url }, entity) { async function fetchProfile({ name: actorName, url }, entity) {
const actorSlugA = slugify(actorName, ''); const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName, '-'); const actorSlugB = slugify(actorName, '-');
@ -364,8 +370,8 @@ async function fetchProfile({ name: actorName, url }, entity) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchMovie,
fetchProfile, fetchProfile,
fetchUpcoming, fetchUpcoming,
scrapeScene, scrapeScene,
scrapeMovie,
}; };

View File

@ -63,6 +63,7 @@ const radical = require('./radical');
const rickysroom = require('./rickysroom'); const rickysroom = require('./rickysroom');
const sexlikereal = require('./sexlikereal'); const sexlikereal = require('./sexlikereal');
const score = require('./score'); const score = require('./score');
const snowvalley = require('./snowvalley');
const spizoo = require('./spizoo'); const spizoo = require('./spizoo');
const teamskeet = require('./teamskeet'); const teamskeet = require('./teamskeet');
const teencoreclub = require('./teencoreclub'); const teencoreclub = require('./teencoreclub');
@ -163,6 +164,7 @@ const scrapers = {
score, score,
sexlikereal, sexlikereal,
sexyhub: aylo, sexyhub: aylo,
snowvalley,
spizoo, spizoo,
swallowsalon: julesjordan, swallowsalon: julesjordan,
theflourish: archangel, theflourish: archangel,
@ -309,6 +311,15 @@ const scrapers = {
sexyhub: aylo, sexyhub: aylo,
silverstonedvd: famedigital, silverstonedvd: famedigital,
silviasaint: famedigital, silviasaint: famedigital,
spermmania: snowvalley,
handjobjapan: snowvalley,
fellatiojapan: snowvalley,
legsjapan: snowvalley,
cumbuffet: snowvalley,
cospuri: snowvalley,
cutebutts: snowvalley,
transexjapan: snowvalley,
uralesbian: snowvalley,
spizoo, spizoo,
swallowed: mikeadriano, swallowed: mikeadriano,
milfcandy: archangel, milfcandy: archangel,

867
src/scrapers/snowvalley.js Executable file
View File

@ -0,0 +1,867 @@
'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const tagsMap = {
'body bukkake': ['bukkake'],
'creampie gangbang': ['gangbang', 'creampie'],
'cum handjob': ['handjob'],
'facial bukkake': ['facial', 'bukkake'],
'massive creampie': ['creampie'],
'massive cum handjob': ['handjob'],
'panty cum': ['cum-in-panty'],
'pussy bukkake': ['cum-on-pussy'],
};
function entryIdFromMedia(release) {
return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase();
}
function scrapeAll(scenes, tilesByEntryId, channel) {
return scenes.map(({ query }) => {
const release = {};
// release.url = query.url('.title a');
release.title = query.content('.sample-title');
// release.date = query.date('.date', 'MMM DD, YYYY');
release.duration = query.duration('//div[contains(text(), "Runtime")]');
release.actors = query.all('a[href*="actress/"]').map((actorEl) => ({ // actors can be only in title or dedicated field
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
release.tags = tagsMap[query.content('a[href*="type/"]')?.toLowerCase()];
const posterBackground = query.style('.player');
if (posterBackground?.background) {
release.poster = posterBackground.background.match(/url\((.*)\)/)?.[1]?.trim();
}
release.photos = query.all('.sample-thumbs .thumb a').map((linkEl) => [
unprint.query.url(linkEl, null),
unprint.query.img(linkEl, 'img'),
].filter((src) => !src.includes('join')));
release.trailer = query.video('.player source');
release.photoCount = query.number('//div[contains(text(), "Photos")]');
release.cumshots = query.number('//div[contains(text(), "Cumshots")]');
release.entryId = entryIdFromMedia(release);
const tile = tilesByEntryId[release.entryId];
if (tile) {
Object.entries(tile).forEach(([key, value]) => {
if (!Object.hasOwn(release, key)) {
release[key] = value;
}
});
}
return release;
});
}
// page has no container divs, select all following siblings until the 'join' link indicating the end of the block
function composeBlock(element, init = true, acc = '') {
const newAcc = `${acc}${element.outerHTML}`;
// image albums also contain a join link, make sure not to select that one
if (element.nextElementSibling.className.includes('join')
|| !!element.nextElementSibling.querySelector('.item-join, .join-link')
|| !!element.nextElementSibling.querySelector('h2 a[href*="join"]')
) {
if (init) {
return unprint.init(newAcc);
}
return newAcc;
}
return composeBlock(element.nextElementSibling, init, newAcc);
}
// used for both SpermMania and Fellation Japan, but different layouts
function scrapeAllTiles(tiles, channel) {
return tiles.map(({ query }) => {
const release = {};
const sceneString = query.content();
const originalEntryId = query.attribute('.scene-hover', 'data-path');
release.entryId = originalEntryId?.toLowerCase();
release.title = query.content('.scene-title');
release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD');
release.duration = query.duration('.data.orange') || unprint.extractDuration(sceneString.match(/([\d:]+)\s*min/)?.[1]);
release.actors = query.all('a[href*="actress/"], .sGirl a').map((actorEl) => ({ // actors can be only in title or dedicated field
name: unprint.query.content(actorEl),
url: channel.slug === 'fellatiojapan'
? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}`
: unprint.query.element(actorEl, null, { origin: channel.url }),
}));
release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean);
const posterBackground = query.style('.scene-img');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = [
posterUrl
.replace('-sm', '-lg')
.replace('-med', '-lg'),
posterUrl.replace('-sm', '-med'),
posterUrl,
];
}
release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`;
release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null;
release.cumshots = Number(sceneString.match(/(\d+) cumshots/)?.[1]) || null;
return release;
});
}
// Sperm Mania
async function fetchLatestTiles(channel) {
const res = await unprint.get(`${channel.url}/tour`, { selectAll: '.scene' });
if (res.ok) {
const tiles = scrapeAllTiles(res.context, channel);
return Object.fromEntries(tiles.map((tile) => [tile.entryId, tile]));
}
return res.status;
}
// SpermMania, sample feed with limited info
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/samples?page=${page}`;
const [res, tilesByEntryId] = await Promise.all([
unprint.get(url, { selectAll: '.sample-title, .item-title' }),
fetchLatestTiles(channel),
]);
if (res.ok) {
const expandedContext = res.context.map(({ element }) => composeBlock(element));
return scrapeAll(expandedContext, tilesByEntryId, channel);
}
return res.status;
}
function scrapeAllCospuri(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.scene-thumb a');
release.entryId = new URL(release.url).searchParams.get('id')
|| new URL(release.url).pathname.match(/\/sample\/(.*)\//)[1];
release.title = query.content('.title');
release.date = query.date('.date', 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ });
release.duration = query.duration('.length');
release.photoCount = query.number('.photos');
release.actors = query.all('.model a[href*="/model"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
release.tags = [...query.contents('.tags .tag, .tag-box .tag'), query.content('.model .channel')].filter(Boolean);
const posterBackground = query.style('.scene-thumb');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = [
posterUrl
.replace('-med', '-lg')
.replace('-sm', '-lg'),
posterUrl.replace('-sm', '-med'),
posterUrl,
];
}
release.teaser = query.video('.scene-hover', { attribute: 'data-path' });
return release;
});
}
// Cospuri, Cute Butts, paginated sample tiles with full info
async function fetchLatestCospuri(channel, page) {
const url = `${channel.url}/samples?page=${page}`;
const res = await unprint.get(url, { selectAll: '.scene' });
if (res.ok) {
return scrapeAllCospuri(res.context, channel);
}
return res.status;
}
function curatePhotos(sources) {
return sources
.filter(Boolean).map((src) => [
src.replace(/(\d+)s.jpg/, (match, photoIndex) => `${photoIndex}.jpg`),
src,
].map((url) => ({
src: url,
followRedirects: false,
})));
}
function scrapeAllFellatio(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.duration = query.duration('.tour-data');
release.photoCount = query.number('.tour-data', { match: /(\d+) photos/, matchIndex: 1 });
release.actors = query.all('.tour-data a[href*="girl/"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
}));
release.tags = query.contents('.tour-data a[href*="tag/"]');
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.photos = curatePhotos(query.imgs('.tour-thumb img'));
release.trailer = query.video();
release.entryId = entryIdFromMedia(release);
release.path = release.actors[0]?.url;
return release;
});
}
// Fellatio Japan
async function fetchLatestFellatio(channel, page) {
const url = `${channel.url}/en/samples/?page=${page}`;
const res = await unprint.get(url, { selectAll: '.tour-data' });
if (res.ok) {
const expandedContext = res.context.map(({ element }) => composeBlock(element));
return scrapeAllFellatio(expandedContext, channel);
}
return res.status;
}
function scrapeAllHandjob(scenes, _channel) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.blurb');
release.duration = query.duration('.item-rtitle');
release.photoCount = query.number('//h3[contains(text(), "Scene Photos")]/strong');
release.actors = query.text('.item-ltitle h1')?.split(/,\s*/).map((actor) => actor.trim());
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.photos = curatePhotos(query.imgs('img.thumb, img.rthumb'));
release.trailer = query.video();
release.entryId = entryIdFromMedia(release);
return release;
});
}
// Handjob Japan
async function fetchLatestHandjob(channel, page) {
const url = `${channel.url}/en/samples/?page=${page}`;
const res = await unprint.get(url, { selectAll: '.item-title' });
if (res.ok) {
const expandedContext = res.context.map(({ element }) => composeBlock(element));
return scrapeAllHandjob(expandedContext, channel);
}
return res.status;
}
function scrapeAllLegs(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.content('.tContent h3 strong');
release.duration = query.duration('//h3[contains(text(), "length")]/strong');
release.photoCount = query.number('//h3[contains(text(), "photos")]/strong');
release.actors = query.all('.tContent a[href*="girl/"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
}));
release.tags = query.contents('a[href*="tag/"]');
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.photos = curatePhotos(query.imgs('.tThumbs img'));
release.trailer = query.video();
release.entryId = entryIdFromMedia(release);
return release;
});
}
// Legs Japan
async function fetchLatestLegs(channel, page) {
const url = `${channel.url}/en/samples/?page=${page}`;
const res = await unprint.get(url, { selectAll: '.player' });
if (res.ok) {
const expandedContext = res.context.map(({ element }) => composeBlock(element));
return scrapeAllLegs(expandedContext, channel);
}
return res.status;
}
function scrapeAllFacefuck(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.description = query.content('.infotxt');
release.actors = query.content('.info h1').split(',').map((actor) => actor.trim());
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.photos = curatePhotos(query.imgs('.thumb img'));
release.trailer = query.video();
release.entryId = entryIdFromMedia(release);
return release;
});
}
// Tokyo Facefuck
async function fetchLatestFacefuck(channel, page) {
const url = `${channel.url}/en/?page=${page}`;
const res = await unprint.get(url, { selectAll: '.girl.box' });
if (res.ok) {
return scrapeAllFacefuck(res.context, channel);
}
return res.status;
}
function scrapeAllTrans(scenes) {
return scenes.map(([{ query }, { query: albumQuery }]) => {
const release = {};
release.title = query.content('.sample-info h1');
release.actors = query.content('.sample-info a strong').split(',').map((actor) => actor.trim());
release.description = query.content('.sample-desc')?.replace('""', '') || null; // usually empty, but let's try it just in case
release.duration = query.duration('.sample-info');
release.photoCount = albumQuery.number('.sample-info', { match: /(\d+) photos/i, matchIndex: 1 });
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = [
posterUrl,
posterUrl.replace(/-\d.jpg/, '-2.jpg'),
posterUrl.replace(/-\d.jpg/, '-1.jpg'),
];
}
release.photos = curatePhotos(albumQuery.styles('.sample-lg, .sample-thumb').map((style) => style['background-image']?.match(/url\((.*)\)/)?.[1]));
release.trailer = query.video();
release.entryId = entryIdFromMedia(release);
return release;
});
}
// Trans Sex Japan
async function fetchLatestTrans(channel, page) {
const url = `${channel.url}/samples?page=${page}`;
const res = await unprint.get(url, { select: '.stage' });
const videoHeads = unprint.initAll(res.context.element, '//div[contains(@class, "col-1") and .//div[contains(@class, "player")]]');
const albumHeads = unprint.initAll(res.context.element, '//div[div[contains(@class, "sample-thumbs")]]');
if (res.ok) {
const videoBlocks = videoHeads.map(({ element }) => composeBlock(element));
const albumBlocks = albumHeads.map(({ element }) => composeBlock(element));
const mergedContext = videoBlocks.map((context, index) => [context, albumBlocks[index]]);
return scrapeAllTrans(mergedContext, channel);
}
return res.status;
}
function scrapeAllLesbianTiles(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.entryId = query.attribute('.scene-hover', 'data-path');
// supplementary data, filter items without entry ID
if (!release.entryId || query.content('.content-overlay')?.includes('photo')) {
return null;
}
release.title = query.content('.content-title');
release.duration = query.duration('.content-size-model');
release.actors = query.all('.content-size-model a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
release.tags = query.contents('.content-tags a');
const posterBackground = query.style('.vidthumb');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = [
posterUrl
.replace('-sm', '-lg')
.replace('-med', '-lg'),
posterUrl.replace('-sm', '-med'),
posterUrl,
];
}
release.teaser = `${channel.url}/content/${release.entryId}/hover.mp4`;
return release;
}).filter(Boolean);
}
function scrapeAllLesbian(scenes, channel, tiles) {
return scenes.map(({ query }) => {
const release = {};
if (query.exists('a[href*="samples"]')) {
return null;
}
release.actors = query.all('a[href*="model/"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: `${channel.url}/en/${unprint.query.url(actorEl, null)}`,
}));
release.duration = unprint.extractTimestamp(`${query.content('.tour-datum')?.split(' ').at(-1)}M`);
release.videoCount = query.number('.tour-datum', { match: /(\d+) hd scenes/i, matchIndex: 1 });
release.photoCount = query.number('//div[text()[contains(., "Photos")]]', { match: /(\d+) photos/i, matchIndex: 1 });
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.trailer = query.video();
release.photos = curatePhotos(query.imgs('.tour-thumb img'));
release.entryId = slugify([entryIdFromMedia(release), ...release.actors.map((actor) => actor.name)]);
const relatedTiles = tiles.filter((tile) => tile.actors.length === release.actors.length && tile.actors.every((tileActor) => release.actors.some((releaseActor) => tileActor.name === releaseActor.name)));
// if we found the same number of tiles as videos in this set, we can be pretty sure they relate to this set
// if there are more, we have no way of determining which of the videos belong to this set
if (relatedTiles.length === release.videoCount) {
const sortedTiles = relatedTiles.toSorted((tileA, tileB) => tileA.entryId.localeCompare(tileB.entryId)); // entry IDs appear chronological
release.tags = relatedTiles.flatMap((tile) => tile.tags);
release.chapters = sortedTiles.map((tile, index, array) => {
const time = array.slice(0, index).reduce((acc, relatedTile) => acc + relatedTile.duration, 0);
return {
title: tile.title,
time,
duration: tile.duration,
tags: tile.tags,
poster: tile.poster,
};
});
}
return release;
}).filter(Boolean);
}
// Uralesbian
async function fetchLatestLesbianTiles(channel, _page) {
// each sample on the samples page represents multiple videos, so for this site we start with the update tiles instead
// l=0 language, 0 = English, 1 = Japanese
// s=1 unclear, seems to be some sort of set, s=1 is everything, s=4 is front page
// c=5000 limit, only seems to apply to 'everything' set, seemingly unlimited by default but apply for good measure
// no known pagination parameter at this moment, so we try to get everything
const url = `${channel.url}/getdata.php?l=0&c=5000`;
const res = await unprint.get(url, { selectAll: '.content-obj' });
if (res.ok) {
return scrapeAllLesbianTiles(res.context, channel);
}
return res.status;
}
// Uralesbian
async function fetchLatestLesbian(channel, page) {
const url = `${channel.url}/en/samples?page=${page}`;
const [res, tiles] = await Promise.all([
unprint.get(url, { selectAll: '.tour-obj' }),
fetchLatestLesbianTiles(channel),
]);
if (res.ok) {
return scrapeAllLesbian(res.context, channel, tiles);
}
return res.status;
}
function scrapeAllBuffet(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.video-link');
release.entryId = new URL(release.url).pathname.match(/sample\/(\w+)\//)[1];
release.title = query.content('.video-link');
release.date = query.date('.date', 'MMM D, YYYY');
release.actors = query.all('.model-name a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
const posterUrl = query.img('.thumb');
if (posterUrl) {
release.poster = [
posterUrl
.replace('-sm', '-lg')
.replace('-med', '-lg'),
posterUrl.replace('-sm', '-med'),
posterUrl,
];
}
return release;
});
}
// Uralesbian
async function fetchLatestBuffet(channel, _page) {
const url = `${channel.url}/samples`; // no pagination
const res = await unprint.get(url, { selectAll: '.videos .video' });
if (res.ok) {
return scrapeAllBuffet(res.context, channel);
}
return res.status;
}
function scrapeSceneBuffet({ query }, { url, entity }) {
const release = {};
release.entryId = new URL(url).pathname.match(/sample\/(\w+)\//)[1];
release.title = query.text('.pg-nav h2');
release.actors = query.all('.tags a[href*="girl/"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: entity.url }),
}));
release.tags = query.contents('.tag-list a');
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = [
posterUrl.replace('-sm', '-lg'), // should already be -lg, but just in case
posterUrl.replace('-lg', '-sm'),
];
}
release.trailer = query.video('.player source');
release.photos = query.imgs('.photos .photo', { attribute: 'href' });
return release;
}
function scrapeSceneCospuri({ query }, { url, entity }) {
const release = {};
release.entryId = new URL(url).searchParams.get('id')
|| new URL(url).pathname.match(/\/sample\/(.*)\//)[1];
release.description = query.content('.detail-box .description');
release.date = query.date([
'.detail-box .date', // cospuri
'//div[contains(@class, "details")]//span[strong[contains(text(), "Date")]]', // cute butts
], 'YYYY・MM・DD', { match: /\d{4}・\d{2}・\d{2}/ });
release.duration = query.duration([
'.detail-box .length',
'//div[contains(@class, "details")]//span[strong[contains(text(), "Runtime")]]', // cute butts
]);
release.photoCount = query.number([
'.detail-box .photos',
'//div[contains(@class, "details")]//span[strong[contains(text(), "Photos")]]', // cute butts
]);
release.actors = query.all('.sample-model a, .model a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: entity.url }),
}));
release.tags = [...query.contents('.tag'), query.content('.sample-channel')].filter(Boolean);
const posterBackground = query.style('.player');
const posterUrl = posterBackground?.background?.match(/url\((.*)\)/)?.[1]?.trim();
if (posterUrl) {
release.poster = posterUrl;
}
release.photos = query.attributes('.thumb a', 'data-asset').map((photoIndex) => [
`https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}.jpg`,
`https://img.${entity.slug}.com/preview/${release.entryId}/${photoIndex}s.jpg`,
]);
release.trailer = `https://img.${entity.slug}.com/preview/${release.entryId}/sample.mp4`;
if (query.exists('.detail-box .fourK')) {
release.qualities = [2160];
}
return release;
}
// Fellatio Japan
async function fetchSceneFellatio(url, channel, baseRelease) {
if (!baseRelease.entryId || !baseRelease.path) {
return null;
}
// no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape
// can't use front page like on Sperm Mania because dates are missing
const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' });
if (res.ok) {
const tiles = scrapeAllTiles(res.context, channel);
return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null;
}
return res.status;
}
function extractSizes(sizes) {
return {
cup: sizes.match(/b\d+-(\w+)/i)?.[1],
bust: unprint.extractNumber(sizes.match(/b(\d+)/i)?.[1]),
waist: unprint.extractNumber(sizes.match(/w(\d+)/i)?.[1]),
hip: unprint.extractNumber(sizes.match(/h(\d+)/i)?.[1]),
};
}
// SpermMania, Handjob Japan
function scrapeProfile({ query }, channel, url) {
const profile = { url };
const bio = Object.fromEntries(query.all('.actr-item, .profile tr, #profile tr, .profile-info li, .model-detail .item, .model-item').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'td, b, .model-item-title') || unprint.query.text(bioEl), '_'),
unprint.query.url(bioEl) || unprint.query.content(bioEl, 'strong, td:last-child, span, .model-item-contents') || unprint.query.text(bioEl), // ensure social links have priority over text
]));
profile.birthPlace = bio.from || bio.country;
profile.description = [
bio.hobbies && `Hobbies: ${bio.hobbies}`,
bio.skills && `Skills: ${bio.skills}`,
bio.fun_fact,
query.content('h2 + p'),
].filter(Boolean).join('. ') || null;
profile.age = unprint.extractNumber(bio.age);
profile.height = unprint.extractNumber(bio.height);
const sizes = bio.sizes || bio.measurements;
if (/b\d+/i.test(sizes)) {
const measurements = extractSizes(sizes);
profile.cup = measurements.cup;
profile.bust = measurements.bust;
profile.waist = measurements.waist;
profile.hip = measurements.hip;
} else {
profile.measurements = bio.measurements;
}
profile.foot = unprint.extractNumber(bio.foot_size);
profile.leg = unprint.extractNumber(bio.leg_length);
profile.thigh = unprint.extractNumber(bio.thigh_width);
profile.social = [bio.homepage, bio.twitter].filter(Boolean);
const avatar = query.img('.scene-array img[src*="/actress"], img.portrait, .profile-img img')
|| query.img('.costume-bg', { attribute: 'data-img' })
|| query.style('.model-profile, #profile, .carousel-item')?.['background-image']?.match(/url\((.*)\)/)?.[1];
if (avatar) {
profile.avatar = [
avatar.replace('-header.jpg', '.jpg'), // Transex Japan, prefer avatar over header banner
avatar,
];
}
profile.photos = [
...query.imgs('.costume-bg', { attribute: 'data-img' }).slice(1),
avatar?.includes('-header.jpg') && avatar,
].filter(Boolean);
return profile;
}
function scrapeProfileLesbian({ query, html }, channel, url) {
const profile = { url };
profile.age = query.number('//strong[contains(text(), "Age")]/following-sibling::text()[1]');
profile.height = query.number('//strong[contains(text(), "Height")]/following-sibling::text()[1]');
profile.birthPlace = query.content('//img[contains(@src, "from")]/following-sibling::text()[1]')?.replace(/^from/i, '').trim() || null;
const sizes = query.content('//strong[contains(text(), "Measurements")]/following-sibling::text()[1]');
if (/b\d+/i.test(sizes)) {
const measurements = extractSizes(sizes);
profile.cup = measurements.cup;
profile.bust = measurements.bust;
profile.waist = measurements.waist;
profile.hip = measurements.hip;
}
profile.avatar = html.match(/https:\/\/img.uralesbian.com\/models\/\d+\.jpg/)?.[0];
return profile;
}
async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) {
const url = actorUrl || (parameters.actors
? `${parameters.actors}/${slug}`
: `${entity.url}/actress/${slug}`);
const res = await unprint.get(url);
if (res.ok) {
if (parameters.layout === 'lesbian') {
return scrapeProfileLesbian(res.context, entity, url);
}
return scrapeProfile(res.context, entity, url);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
cospuri: {
fetchLatest: fetchLatestCospuri,
scrapeScene: scrapeSceneCospuri,
fetchProfile,
},
fellatio: {
fetchLatest: fetchLatestFellatio,
fetchScene: fetchSceneFellatio,
fetchProfile,
},
handjob: {
fetchLatest: fetchLatestHandjob,
fetchProfile,
},
legs: {
fetchLatest: fetchLatestLegs,
fetchProfile,
},
facefuck: {
fetchLatest: fetchLatestFacefuck,
},
trans: {
fetchLatest: fetchLatestTrans,
fetchProfile,
},
lesbian: {
fetchLatest: fetchLatestLesbian,
fetchProfile,
},
buffet: {
fetchLatest: fetchLatestBuffet,
scrapeScene: scrapeSceneBuffet,
fetchProfile,
},
};

View File

@ -1,7 +1,9 @@
'use strict'; 'use strict';
const config = require('config');
const unprint = require('unprint'); const unprint = require('unprint');
const format = require('template-format'); const format = require('template-format');
const { HttpsProxyAgent } = require('https-proxy-agent');
const qu = require('../utils/qu'); const qu = require('../utils/qu');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
@ -137,11 +139,14 @@ function scrapeProfile({ query, el }) {
return profile; return profile;
} }
const agent = new HttpsProxyAgent(`http://${config.proxy.host}:${config.proxy.port}`);
async function fetchLatest(channel, page) { async function fetchLatest(channel, page) {
// const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail'); // const res = await qu.getAll(`${channel.url}/categories/movies_${page}_d.html`, '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail');
const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, { const res = await unprint.get(`${channel.url}${format(channel.parameters?.latest || '/categories/movies_{page}_d.html', { page })}`, {
selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail', selectAll: '.thumb-big, .thumb-video, .thumbnail, .thumbnail-popular, .full-thumbnail',
httpsAgent: agent,
}); });
if (res.ok) { if (res.ok) {

View File

@ -349,7 +349,7 @@ async function storeMovies(movies, useBatchId) {
return []; return [];
} }
const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies'); const { uniqueReleases, duplicateReleaseEntries } = await filterDuplicateReleases(movies, 'movies');
const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id'); const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id');
const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie'))); const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie')));
@ -362,7 +362,15 @@ async function storeMovies(movies, useBatchId) {
await associateReleaseMedia(moviesWithId, 'movie'); await associateReleaseMedia(moviesWithId, 'movie');
return moviesWithId; return [...moviesWithId, ...duplicateReleaseEntries.map((entry) => ({
// used to map new movie scenes to existing movie entries
id: entry.id,
entryId: entry.entry_id,
entityId: entry.entity_id,
entity: {
id: entry.entity_id,
},
}))];
} }
async function storeSeries(series, useBatchId) { async function storeSeries(series, useBatchId) {

View File

@ -483,4 +483,5 @@ module.exports = {
getCookieJar, getCookieJar,
destroyBypassSessions, destroyBypassSessions,
destroyBrowserSessions, destroyBrowserSessions,
proxyAgent,
}; };