forked from DebaucheryLibrarian/traxxx
Added Meiden van Holland and Vurig Vlaanderen.
This commit is contained in:
@@ -40,30 +40,50 @@ const hairColors = {
|
||||
'soft-black': 'black',
|
||||
'brunette/raven': 'brown',
|
||||
black: 'black',
|
||||
blond: 'blond',
|
||||
blonde: 'blonde',
|
||||
blondie: 'blonde',
|
||||
brown: 'brown',
|
||||
bruin: 'brown',
|
||||
brunette: 'brown',
|
||||
fair: 'blonde',
|
||||
raven: 'black',
|
||||
red: 'red',
|
||||
redhead: 'red',
|
||||
rood: 'red',
|
||||
blue: 'blue',
|
||||
green: 'green',
|
||||
purple: 'purple',
|
||||
pink: 'pink',
|
||||
zwart: 'black',
|
||||
};
|
||||
|
||||
const eyeColors = {
|
||||
blauw: 'blue',
|
||||
blue: 'blue',
|
||||
brown: 'brown',
|
||||
bruin: 'bruin',
|
||||
dark: 'brown',
|
||||
gray: 'gray',
|
||||
green: 'green',
|
||||
groen: 'green',
|
||||
grey: 'gray',
|
||||
hazel: 'hazel',
|
||||
};
|
||||
|
||||
const orientations = {
|
||||
bi: 'bisexual',
|
||||
biseksueel: 'bisexual',
|
||||
bisexual: 'bisexual',
|
||||
gay: 'gay',
|
||||
hetero: 'straight',
|
||||
heteroseksueel: 'straight',
|
||||
heterosexual: 'straight',
|
||||
homoseksueel: 'gay',
|
||||
homosexual: 'gay',
|
||||
straight: 'straight',
|
||||
};
|
||||
|
||||
const ethnicities = {
|
||||
'african american': 'black',
|
||||
'african-american': 'black',
|
||||
@@ -205,6 +225,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
|
||||
slug: actor.slug,
|
||||
url: actor.url,
|
||||
gender: actor.gender,
|
||||
orientation: actor.orientation,
|
||||
entityId: actor.entity_id,
|
||||
aliasFor: actor.alias_for,
|
||||
dateOfBirth: actor.date_of_birth,
|
||||
@@ -303,6 +324,7 @@ function curateProfileEntry(profile) {
|
||||
age: profile.age,
|
||||
url: profile.url,
|
||||
gender: profile.gender,
|
||||
orientation: profile.orientation,
|
||||
ethnicity: profile.ethnicity,
|
||||
description: profile.description,
|
||||
description_hash: profile.descriptionHash,
|
||||
@@ -372,6 +394,8 @@ async function curateProfile(profile, actor) {
|
||||
|| (/male/i.test(profile.gender) && 'male')
|
||||
|| null;
|
||||
|
||||
curatedProfile.orientation = orientations[profile.orientation?.trim()] || null;
|
||||
|
||||
const dateOfBirth = profile.dateOfBirth || profile.birthdate;
|
||||
|
||||
curatedProfile.dateOfBirth = (!Number.isNaN(Number(dateOfBirth)) // possibly valid date
|
||||
@@ -528,6 +552,7 @@ async function interpolateProfiles(actorIdsOrNames) {
|
||||
|
||||
const mostFrequentValues = [
|
||||
'gender',
|
||||
'orientation',
|
||||
'ethnicity',
|
||||
'cup',
|
||||
'bust',
|
||||
@@ -669,6 +694,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
...entity,
|
||||
// legacy
|
||||
site: entity,
|
||||
channel: entity,
|
||||
network: entity?.parent,
|
||||
entity,
|
||||
scraper: scraperSlug,
|
||||
|
||||
@@ -56,12 +56,15 @@ function toBaseReleases(baseReleasesOrUrls, entity = null) {
|
||||
}
|
||||
|
||||
async function fetchUnprintScene(scraper, url, entity, baseRelease, options, type) {
|
||||
const releaseScraper = scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'];
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
rejectUnauthorized: false,
|
||||
...(releaseScraper.scraper && releaseScraper), // options object
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scraper[type === 'movie' ? 'scrapeMovie' : 'scrapeScene'](res.context, {
|
||||
return (releaseScraper.scraper || releaseScraper)(res.context, {
|
||||
url,
|
||||
entity,
|
||||
baseRelease,
|
||||
@@ -78,7 +81,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc
|
||||
}
|
||||
|
||||
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||
if (scraper.useUnprint) {
|
||||
if (scraper.useUnprint || scraper.scrapeScene?.unprint || scraper.scrapeMovie?.unprint) {
|
||||
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
|
||||
}
|
||||
|
||||
|
||||
25
src/media.js
25
src/media.js
@@ -163,7 +163,7 @@ function toBaseSource(rawSource) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (rawSource.match(/.m3u8$/)) {
|
||||
if (new URL(rawSource).pathname.match(/.m3u8$/)) {
|
||||
return {
|
||||
src: rawSource,
|
||||
stream: rawSource,
|
||||
@@ -175,6 +175,12 @@ function toBaseSource(rawSource) {
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof rawSource === 'function') {
|
||||
return {
|
||||
defer: rawSource,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -266,12 +272,12 @@ async function findSourceDuplicates(baseMedias) {
|
||||
.filter(Boolean);
|
||||
|
||||
const extractUrls = baseMedias
|
||||
.map((baseMedia) => baseMedia.sources.map((source) => source.url))
|
||||
.map((baseMedia) => baseMedia.sources.map((source) => source.extract))
|
||||
.flat()
|
||||
.filter(Boolean);
|
||||
|
||||
const [existingSourceMedia, existingExtractMedia] = await Promise.all([
|
||||
// my try to check thousands of URLs at once, don't pass all of them to a single query
|
||||
// may try to check thousands of URLs at once, don't pass all of them to a single query
|
||||
chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
|
||||
const accUrls = await chain;
|
||||
const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);
|
||||
@@ -344,6 +350,17 @@ async function findHashDuplicates(medias) {
|
||||
}
|
||||
|
||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
if (typeof baseSource.defer === 'function') {
|
||||
const src = await baseSource.defer();
|
||||
|
||||
console.log('DEFERED', src);
|
||||
|
||||
return {
|
||||
...baseSource,
|
||||
...toBaseSource(src),
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof baseSource.extract !== 'function' || !baseSource.url) {
|
||||
return baseSource;
|
||||
}
|
||||
@@ -365,7 +382,7 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
|
||||
return {
|
||||
...baseSource,
|
||||
src,
|
||||
...toBaseSource(src),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
242
src/scrapers/bluedonkeymedia.js
Normal file
242
src/scrapers/bluedonkeymedia.js
Normal file
@@ -0,0 +1,242 @@
|
||||
'use strict';
|
||||
|
||||
const crypto = require('crypto');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
|
||||
async function fetchTrailer(entryId, videoId, channel, credentials) {
|
||||
const url = `https://api.sysero.nl/free-stream?resource_id=${entryId}&video_id=${videoId}`;
|
||||
|
||||
const res = await http.get(url, {
|
||||
headers: {
|
||||
Origin: channel.url,
|
||||
Credentials: credentials,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.data?.attributes.sources.streams.mpd?.url;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// MVH's slug system seems to break on non-alphanumerical characters, but also supports ID
|
||||
function getSceneUrl(channel, slug, sceneId) {
|
||||
if (slug && /^[\w-]+$/i.test(slug)) {
|
||||
return `${channel.url}/sexfilms/${slug}`;
|
||||
}
|
||||
|
||||
return `${channel.url}/sexfilms/${sceneId}`;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, channel, context) {
|
||||
return scenes.reduce((acc, scene) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = getSceneUrl(channel, scene.attributes.slug, scene.id);
|
||||
release.date = unprint.extractDate(scene.attributes.product.active_from, 'D/M/YY');
|
||||
|
||||
release.title = scene.attributes.title;
|
||||
release.description = scene.attributes.description;
|
||||
release.duration = unprint.extractDuration(scene.attributes.videos.film?.[0]?.duration);
|
||||
|
||||
const posterPath = scene.attributes.images.thumb?.[0]?.path || context.images[scene.id];
|
||||
const teaserPath = context.clips[scene.relationships.clips?.data[0]?.id];
|
||||
|
||||
if (posterPath) {
|
||||
release.poster = `https://cdndo.sysero.nl${scene.attributes.images.thumb?.[0]?.path || context.images[scene.id]}`;
|
||||
}
|
||||
|
||||
if (scene.attributes.videos.trailer?.[0]) {
|
||||
release.trailer = async () => fetchTrailer(scene.id, scene.attributes.videos.trailer[0].id, channel, context.credentials);
|
||||
}
|
||||
|
||||
if (teaserPath) {
|
||||
release.teaser = `https://cdndo.sysero.nl${teaserPath}`;
|
||||
}
|
||||
|
||||
release.tags = scene.relationships.categories?.data.map((category) => context.tags[category.id]?.replace(/-/g, ' ')).filter(Boolean);
|
||||
release.language = scene.attributes.videos.film?.[0]?.language;
|
||||
|
||||
if (release.language && channel.parameters.languages && !channel.parameters.languages?.includes(release.language)) {
|
||||
// all MVH sites list the entire network, but we want to store Flemish scenes under Vurig Vlaanderen
|
||||
return { ...acc, unextracted: [...acc.unextracted, release] };
|
||||
}
|
||||
|
||||
return { ...acc, scenes: [...acc.scenes, release] };
|
||||
}, {
|
||||
scenes: [],
|
||||
unextracted: [],
|
||||
});
|
||||
}
|
||||
|
||||
function getCredentials(channel) {
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
const hash = crypto
|
||||
.createHmac('sha256', channel.parameters.secret)
|
||||
.update(`${channel.parameters.frontend}${now.toString()}`)
|
||||
.digest('hex');
|
||||
|
||||
const credentials = `Syserauth ${channel.parameters.frontend}-${hash}-${now.toString(16)}`;
|
||||
|
||||
return credentials;
|
||||
}
|
||||
|
||||
const falseCountry = /afghanistan/i; // no country defaults to Afghanistan
|
||||
|
||||
function getLocation(model) {
|
||||
const country = model.country && !falseCountry.test(model.country) ? model.country : null;
|
||||
|
||||
return [model.city, model.county, country]
|
||||
.map((segment) => segment?.trim())
|
||||
.filter(Boolean)
|
||||
.join(', ') || null;
|
||||
}
|
||||
|
||||
function scrapeProfile(model, { entity, includeScenes = true }) {
|
||||
const actor = {};
|
||||
|
||||
actor.name = model.title;
|
||||
actor.url = unprint.prefixUrl(`/modellen/${model.slug}`, entity.url);
|
||||
|
||||
actor.entryId = model.id;
|
||||
|
||||
actor.description = model.description;
|
||||
|
||||
actor.dateOfBirth = model.birth_date && model.age > 18 ? new Date(model.birth_date) : null; // sometimes seems to be profile creation date
|
||||
actor.age = model.age > 18 ? model.age : null;
|
||||
actor.orientation = model.sexual_orientation;
|
||||
|
||||
actor.birthPlace = getLocation(model);
|
||||
|
||||
actor.height = Number(model.length) || null;
|
||||
actor.weight = Number(model.weight) || null;
|
||||
|
||||
actor.eyes = model.eye_color;
|
||||
actor.hairColor = model.hair_color;
|
||||
|
||||
if (includeScenes) {
|
||||
actor.scenes = model.videos?.map((video) => ({
|
||||
entryId: video.id,
|
||||
url: getSceneUrl(entity, video.slug, video.id),
|
||||
title: video.title,
|
||||
description: video.description,
|
||||
}));
|
||||
}
|
||||
|
||||
actor.avatar = unprint.prefixUrl(model.images?.[0]?.path, 'https://cdndo.sysero.nl');
|
||||
|
||||
return actor;
|
||||
}
|
||||
|
||||
function scrapeSceneData(scene, { entity }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = getSceneUrl(entity, scene.slug, scene.id);
|
||||
|
||||
release.title = scene.title;
|
||||
release.description = scene.description;
|
||||
release.date = scene.uploadDate
|
||||
? new Date(scene.uploadDate)
|
||||
: unprint.extractDate(scene.product.active_from, 'D/M/YY');
|
||||
|
||||
release.actors = scene.models?.map((model) => scrapeProfile(model, { entity, includeScenes: false }));
|
||||
|
||||
release.duration = scene.seconds || unprint.extractTimestamp(scene.isoDuration) || Number(scene.video_paid?.duration) * 60;
|
||||
release.tags = scene.categories?.map((category) => category.slug.replace(/-/g, ' '));
|
||||
|
||||
if (scene.thumb) {
|
||||
release.poster = [
|
||||
scene.thumb.original,
|
||||
scene.thumb.xxl,
|
||||
scene.thumb.xl,
|
||||
// ... l, m, s, xs, xxs, probably little point trying all of them
|
||||
].map((poster) => unprint.prefixUrl(poster, 'https://cdndo.sysero.nl'));
|
||||
}
|
||||
|
||||
release.photos = scene.gallery;
|
||||
|
||||
if (scene.trailer) {
|
||||
release.trailer = async () => {
|
||||
const credentials = getCredentials(entity);
|
||||
return fetchTrailer(scene.id, scene.trailer.id, entity, credentials);
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeScene({ _query, window }, context) {
|
||||
const data = window.__NUXT__?.state?.videoStore?.video;
|
||||
|
||||
if (data) {
|
||||
return scrapeSceneData(data, context);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page, context) {
|
||||
const credentials = getCredentials(channel);
|
||||
|
||||
const res = await http.get(`https://api.sysero.nl/videos?page=${page}&count=20&type=video&include=images:types(thumb|thumb_mobile),categories,clips&filter[status]=published&filter[products]=1%2C2&sort[published_at]=DESC&frontend=${channel.parameters.frontend}`, {
|
||||
headers: {
|
||||
Origin: channel.url,
|
||||
Credentials: credentials,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.body.data) {
|
||||
const tags = Object.fromEntries(res.body.included?.filter((item) => item.type === 'category').map((item) => [item.id, item.attributes.slug]) || []);
|
||||
const images = Object.fromEntries(res.body.included?.filter((item) => item.type === 'image' && item.attributes.types === 'thumb').map((item) => [item.id, item.attributes.path]) || []);
|
||||
const clips = Object.fromEntries(res.body.included?.filter((item) => item.type === 'clip').map((item) => [item.id, item.attributes.path]) || []);
|
||||
|
||||
return scrapeAll(res.body.data, channel, { ...context, images, clips, tags, credentials });
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, { entity }) {
|
||||
const credentials = getCredentials(entity);
|
||||
const url = `${entity.url}/modellen/${actor.slug}`;
|
||||
|
||||
const res = await unprint.get(url, {
|
||||
headers: {
|
||||
Origin: entity.url,
|
||||
Credentials: credentials,
|
||||
},
|
||||
parser: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const data = res.context.window.__NUXT__?.state?.modelStore?.model;
|
||||
|
||||
if (data) {
|
||||
return scrapeProfile(data, { entity });
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene: {
|
||||
scraper: scrapeScene,
|
||||
unprint: true,
|
||||
parser: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -38,6 +38,7 @@ const kink = require('./kink');
|
||||
const analvids = require('./analvids');
|
||||
const littlecapricedreams = require('./littlecapricedreams');
|
||||
const loveherfilms = require('./loveherfilms');
|
||||
const bluedonkeymedia = require('./bluedonkeymedia');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
const mindgeek = require('./mindgeek');
|
||||
const naughtyamerica = require('./naughtyamerica');
|
||||
@@ -125,6 +126,7 @@ const scrapers = {
|
||||
littlecapricedreams,
|
||||
loveherfilms,
|
||||
mamacitaz: porndoe,
|
||||
bluedonkeymedia,
|
||||
mikeadriano,
|
||||
mindgeek,
|
||||
mylf: teamskeet,
|
||||
@@ -187,6 +189,9 @@ const scrapers = {
|
||||
blackedraw: vixen,
|
||||
blackambush: elevatedx,
|
||||
blowpass,
|
||||
bluedonkeymedia,
|
||||
meidenvanholland: bluedonkeymedia,
|
||||
vurigvlaanderen: bluedonkeymedia,
|
||||
boobpedia,
|
||||
bradmontana,
|
||||
brattysis: nubiles,
|
||||
|
||||
@@ -43,7 +43,7 @@ const defaultOptions = {
|
||||
encodeJSON: true,
|
||||
parse: false,
|
||||
headers: {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
||||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user