'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const http = require('../utils/http'); function scrapeLatest(html, site) { const { document } = new JSDOM(html).window; const { origin } = new URL(site.url); const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0]; return Array.from(videos.querySelectorAll('.card'), (scene) => { const release = { site }; release.url = `${origin}${scene.querySelector(':scope > a').href}`; release.entryId = scene.dataset.videoId; release.title = scene.querySelector('.card-title').textContent; release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate(); release.actors = Array.from(scene.querySelectorAll('.actors a'), (el) => el.textContent); // slow CDN? const poster = scene.querySelector('.single-image').dataset.src; const teaserEl = scene.querySelector('source'); release.poster = { src: /^http/.test(poster) ? poster : `https:${poster}`, referer: site.url, attempts: 5, interval: 5000, concurrency: 1, }; release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), (el) => ({ src: (/^http/.test(el.dataset.src) ? el.dataset.src : `https:${el.dataset.src}`), referer: site.url, attempts: 5, interval: 5000, concurrency: 1, })); if (teaserEl) { release.teaser = { src: teaserEl.dataset.src, referer: site.url, attempts: 5, interval: 5000, concurrency: 1, }; } return release; }); } function scrapeScene(html, site, url) { const { document } = new JSDOM(html).window; const release = { site }; const scene = document.querySelector('#t2019-2col'); release.url = url; release.title = scene.querySelector('.t2019-stitle').textContent.trim(); release.description = scene.querySelector('#t2019-description').textContent.trim(); release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), (el) => el.textContent); const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span')); if (durationEls.length > 1) { release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate(); release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60; } else { release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60; } // unreliable CDN release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), (el) => ({ src: (/^http/.test(el.src) ? el.src : `https:${el.src}`), referer: site.url, attempts: 5, interval: 5000, concurrency: 1, })); const posterEl = scene.querySelector('#no-player-image'); const videoEl = scene.querySelector('video'); const trailerEl = scene.querySelector('#t2019-video source'); if (posterEl) { release.poster = { src: /^http/.test(posterEl.src) ? posterEl.src : `https:${posterEl.src}`, referer: site.url, attempts: 5, interval: 5000, concurrency: 1, }; } else if (videoEl) { release.poster = { src: /^http/.test(videoEl.poster) ? videoEl.poster : `https:${videoEl.poster}`, referer: site.url, attempts: 5, interval: 5000, concurrency: 1, }; } if (trailerEl) { release.trailer = { src: trailerEl.src, referer: site.url, attempts: 5, interval: 5000, concurrency: 1, }; } return release; } async function fetchLatest(site, page = 1) { const url = `${site.url}?page=${page}`; const res = await http.get(url); if (res.statusCode === 200) { return scrapeLatest(res.body.toString(), site); } return []; } async function fetchScene(url, site) { const res = await http.get(url); if (res.statusCode === 200) { return scrapeScene(res.body.toString(), site, url); } return null; } module.exports = { fetchLatest, fetchScene, };