'use strict'; const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const moment = require('moment'); function extractTitle(pathname) { return pathname .split('/') .slice(-2)[0] .split('_') .map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`) .join(' '); } function extractActors(str) { return str .split(/,|\band\b/ig) .filter(actor => !/\.{3}/.test(actor)) .map(actor => actor.trim()) .filter(actor => actor.length > 0); } function scrapeLatest(html, site) { const { document } = new JSDOM(html).window; const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white')); return scenes.map((scene) => { const release = { site }; const link = scene.querySelector('.info a'); const poster = scene.querySelector('img'); const { pathname } = new URL(link); [release.entryId] = poster.id.match(/\d+/); release.url = `https://www.teamskeet.com${pathname}`; release.title = extractTitle(pathname); release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate(); const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`)); [release.poster] = photos; release.photos = photos.slice(1); const actors = scene.querySelector('div span[rel="test"]').textContent; release.actors = extractActors(actors); return release; }); } function scrapeScene(html, site, url) { const { document } = new JSDOM(html).window; const release = { site }; release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value; release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent; const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim()); release.url = url; release.title = title; release.actors = extractActors(actors); release.channel = channel.toLowerCase(); release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel); const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim(); release.date = moment.utc(date, 'MMMM Do, YYYY').toDate(); const { poster } = document.querySelector('video'); if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster]; const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', ''); const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0]; release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`); const trailer = document.querySelector('div.right.gray a').href; if (trailer) release.trailer = { src: trailer }; return release; } function scrapeSceneA(html, site, sceneX, url) { const scene = sceneX || new JSDOM(html).window.document; const release = { site }; release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim(); release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate(); release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat(); const durationString = scene.querySelector('.time').textContent.trim(); const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss release.duration = moment.duration(duration).asSeconds(); if (sceneX) { const titleEl = scene.querySelector(':scope > a'); release.url = titleEl.href; release.entryId = titleEl.id; release.title = titleEl.title; const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src); release.poster = [poster.replace('bio_big', 'video'), poster]; release.photos = photos; } if (!sceneX) { release.title = scene.querySelector('.title span').textContent; release.url = url; release.poster = scene.querySelector('video').poster; release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')]; } const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/'); release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA; return release; } function scrapeLatestA(html, site) { const { document } = new JSDOM(html).window; const scenes = Array.from(document.querySelectorAll('.scenewrapper')); return scenes.map(scene => scrapeSceneA(null, site, scene)); } async function fetchLatestTeamSkeet(site, page = 1) { const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`; const res = await bhttp.get(url); if (res.statusCode === 200) { return scrapeLatest(res.body.toString(), site); } return null; } async function fetchLatestA(site) { const url = `${site.url}/scenes`; const res = await bhttp.get(url); if (res.statusCode === 200) { return scrapeLatestA(res.body.toString(), site); } return null; } async function fetchLatest(site, page = 1) { if (site.parameters.id) { return fetchLatestTeamSkeet(site, page); } if (site.parameters.scraper === 'A') { return fetchLatestA(site, page); } return null; } async function fetchScene(url, site) { const session = bhttp.session(); // resolve redirects const res = await session.get(url); if (site.parameters?.scraper === 'A') { return scrapeSceneA(res.body.toString(), site, null, url); } return scrapeScene(res.body.toString(), site, url); } module.exports = { fetchLatest, fetchScene, };