diff --git a/seeds/01_networks.js b/seeds/01_networks.js index a8784bbd..36cf5c84 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -594,7 +594,9 @@ const networks = [ url: 'https://www.mylf.com', parent: 'paperstreetmedia', parameters: { - endpoint: 'mylf-elastic-hka5k7vyuw', + // endpoint: 'mylf-elastic-hka5k7vyuw', + fullEndpoint: 'mylf_bundle', + modelPrefix: 'model_', avatars: 'https://images.mylfcdn.net/tsv4/model/profiles', }, }, @@ -736,7 +738,9 @@ const networks = [ description: 'Welcome to teamskeet.com, the largest collection of exclusive teen porn sites and videos on the web. Check out our TeamSkeet porn sites now.', parent: 'paperstreetmedia', parameters: { - endpoint: 'ts-elastic-d5cat0jl5o', + // endpoint: 'ts-elastic-d5cat0jl5o', + fullEndpoint: 'ts_network', + modelPrefix: 'model_', avatars: 'https://images.mylfcdn.net/tsv4/model/profiles', }, }, diff --git a/src/deep.js b/src/deep.js index b20faebd..917964cf 100755 --- a/src/deep.js +++ b/src/deep.js @@ -208,7 +208,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { datePrecision: curatedScrapedRelease.date // don't inherit date precision from base release ? curatedScrapedRelease.datePrecision : baseRelease.datePrecision, - poster: Array.from(new Set([ + poster: Array.from(new Set([ // use base poster as fallback for deep poster ...[].concat(curatedScrapedRelease.poster), ...[].concat(baseRelease.poster), ])).filter(Boolean), diff --git a/src/scrapers/teamskeet.js b/src/scrapers/teamskeet.js index fda02142..98a06554 100755 --- a/src/scrapers/teamskeet.js +++ b/src/scrapers/teamskeet.js @@ -1,9 +1,10 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const http = require('../utils/http'); const slugify = require('../utils/slugify'); -const { lbsToKg, feetInchesToCm } = require('../utils/convert'); +const { convert } = require('../utils/convert'); function getChannelSlug(channelName, entity) { if (!channelName) { @@ -29,7 +30,7 @@ function scrapeScene(scene, channel, parameters) { release.title = scene.title; release.description = scene.description; - release.date = qu.extractDate(scene.publishedDate); + release.date = unprint.extractDate(scene.publishedDate, 'YYYY-MM-DD'); // release.actors = scene.models?.map((model) => model.modelName) || []; release.actors = scene.models?.map((model) => ({ @@ -38,10 +39,15 @@ function scrapeScene(scene, channel, parameters) { url: `${channel.url}/models/${model.modelId || model.id}`, })); - release.poster = [ - // scene.img.replace('med.jpg', 'hi.jpg'), // this image is not always from the same scene! for example on Petite Teens 18 - scene.img, - ]; + if (scene.img) { + const poster = new URL(scene.img); + + release.poster = [ + // scene.img.replace('med.jpg', 'hi.jpg'), // this image is not always from the same scene! for example on Petite Teens 18 + scene.img, + `${poster.origin}/cdn-cgi/image/width=640,quality=89${poster.pathname}`, // sometimes works when main poster is broken, observed on GotMYLF + ]; + } release.teaser = scene.videoTrailer; @@ -63,6 +69,80 @@ function scrapeAll(scenes, channel, parameters) { return scenes.map((scene) => scrapeScene(scene, channel, parameters)); } +async function fetchLatest(channel, page = 1, { parameters }) { + const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`); + + if (res.ok) { + return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters); + } + + return res.status; +} + +async function fetchLatestOrganic(channel, page, context) { + const res = await http.get(`https://store.psmcdn.net/${context.parameters.endpoint}/newestMovies/items.json?orderBy="$key"&startAt="${context.cursor || 'aaaaaaaa'}"&limitToFirst=100`); + + if (res.ok) { + const scenes = scrapeAll(Object.values(res.body), channel, context.parameters); + + return { + // cursor implies page > 1 and first scene is last scene on previous page, + // it probably won't trip up the pagination logic, but avoid the duplicate anyway + scenes: context.cursor ? scenes.slice(1) : scenes, + context: { + cursor: Object.keys(res.body).at(-1), // official page seems to derive cursor from last scene, too + }, + }; + } + + return res.status; +} + +async function fetchLatestSearch(channel, page = 1, { parameters }) { + const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?q=(site.seo.seoSlug:%22${parameters.id}%22%20AND%20type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`); + + if (res.ok) { + return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters); + } + + return res.status; +} + +async function fetchScene(url, channel, baseScene, { parameters }) { + if (parameters.layout !== 'organic' && baseScene?.entryId) { + // overview and deep data is the same in elastic API, don't hit server unnecessarily + return baseScene; + } + + const sceneSlug = new URL(url).pathname.match(/\/([\w-]+$)/)[1]; + + const res = await unprint.get(url, { + parser: { + runScripts: 'dangerously', + }, + }); + + if (res.ok) { + const videos = res.context.window.__INITIAL_STATE__?.content?.videosContent; + + res.context.window.fetch = () => {}; // suppress fetch missing error + + if (!videos) { + return null; + } + + const video = videos?.[sceneSlug] || Object.values(videos)[0]; + + if (video) { + return scrapeScene(video, channel, parameters); + } + + return null; + } + + return res.status; +} + function scrapeProfile(actor, entity, parameters) { const profile = {}; @@ -113,11 +193,12 @@ function scrapeProfile(actor, entity, parameters) { } if (actor.bio.heightFeet && actor.bio.heightInches) { - profile.height = feetInchesToCm(actor.bio.heightFeet, actor.bio.heightInches); + // reports 5 foot as 1 foot for some reason, but inches seem correct + profile.height = convert(`${actor.bio.heightFeet >= 4 ? actor.bio.heightFeet : 5}' ${actor.bio.heightInches}"`, 'cm'); } if (actor.bio.weight) { - profile.weight = lbsToKg(actor.bio.weight); + profile.weight = convert(actor.bio.weight, 'lb', 'kg'); } profile.avatar = actor.img; @@ -127,80 +208,16 @@ function scrapeProfile(actor, entity, parameters) { return profile; } -async function fetchLatest(channel, page = 1, { parameters }) { - const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`); - - if (res.ok) { - return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters); - } - - return res.status; -} - -async function fetchLatestOrganic(channel, page, context) { - const res = await http.get(`https://store.psmcdn.net/${context.parameters.endpoint}/newestMovies/items.json?orderBy="$key"&startAt="${context.cursor || 'aaaaaaaa'}"&limitToFirst=100`); - - if (res.ok) { - const scenes = scrapeAll(Object.values(res.body), channel, context.parameters); - - return { - // cursor implies page > 1 and first scene is last scene on previous page, - // it probably won't trip up the pagination logic, but avoid the duplicate anyway - scenes: context.cursor ? scenes.slice(1) : scenes, - context: { - cursor: Object.keys(res.body).at(-1), // official page seems to derive cursor from last scene, too - }, - }; - } - - return res.status; -} - -async function fetchLatestSearch(channel, page = 1, { parameters }) { - const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?q=(site.seo.seoSlug:%22${parameters.id}%22%20AND%20type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`); - - if (res.ok) { - return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters); - } - - return res.status; -} - -async function fetchScene(url, channel, baseScene, { parameters }) { - if (parameters.layout !== 'organic' && baseScene?.entryId) { - // overview and deep data is the same in elastic API, don't hit server unnecessarily - return baseScene; - } - - const sceneSlug = new URL(url).pathname.match(/\/([\w-]+$)/)[1]; - - const res = await http.get({ - organic: `https://store.psmcdn.net/${parameters.endpoint}/moviesContent/${sceneSlug}.json`, - search: `https://tours-store.psmcdn.net/ts_network/_search/?q=(id:${sceneSlug})&size=1`, - undefined: `https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_doc/${sceneSlug}`, - }[parameters.layout]); - - if (res.ok && res.body.found) { - return scrapeScene(res.body._source, channel, parameters); - } - - if (res.ok && parameters.layout === 'organic' && res.body.id) { - return scrapeScene(res.body, channel, parameters); - } - - return res.status; -} - async function fetchProfile(baseActor, { entity, parameters }) { // const url = format(parameters.profiles, { slug: baseActor.slug }); const url = parameters.layout === 'organic' ? `https://store.psmcdn.net/${parameters.endpoint}/modelsContent/${baseActor.slug}.json` : `https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-modelscontent`}/_doc/${parameters.modelPrefix || ''}${baseActor.slug}`; - const res = await qu.get(url); + const res = await unprint.get(url); - if (res.ok && res.body) { - return scrapeProfile(parameters.layout === 'organic' ? res.body : res.body._source || res.body, entity, parameters); + if (res.ok && res.data) { + return scrapeProfile(parameters.layout === 'organic' ? res.data : res.data._source || res.body, entity, parameters); } return res.status; diff --git a/tests/profiles.js b/tests/profiles.js index e1917e2d..a1d4df2d 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -25,7 +25,7 @@ const actors = [ { entity: 'wifey', name: 'Danielle Renae', fields: ['gender', 'avatar', 'description'] }, // teamskeet { entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] }, - { entity: 'mylf', name: 'Eliza Ibarra', fields: ['avatar', 'description', 'measurements', 'nationality', 'hairColor', 'hasPiercings', 'hasTattoos'] }, + { entity: 'mylf', name: 'Eliza Ibarra', fields: ['avatar', 'measurements', 'nationality', 'hairColor', 'hasPiercings', 'hasTattoos'] }, { entity: 'sayuncle', name: 'Greg McKeon', fields: ['avatar', 'description'] }, // mike adriano { entity: 'trueanal', name: 'Brenna McKenna', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },