Added Men network and Icon Male to MindGeek. Added entropy filter to media module to help filter out generic avatars. Added Pure Taboo. Various logo updates.
This commit is contained in:
103
src/scrapers/puretaboo.js
Normal file
103
src/scrapers/puretaboo.js
Normal file
@@ -0,0 +1,103 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const moment = require('moment');
|
||||
|
||||
// const { fetchLatest } = require('./gamma');
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
const { getPhotos } = require('./gamma');
|
||||
|
||||
function scrape(html) {
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, '.sceneList .sceneContainer').map(({ q, qa }) => {
|
||||
const release = {};
|
||||
|
||||
const linkEl = q('a.imgLink');
|
||||
|
||||
release.entryId = linkEl.dataset.id;
|
||||
release.title = linkEl.title;
|
||||
release.url = `https://www.puretaboo.com${linkEl.href}`;
|
||||
release.actors = qa('.sceneActors a', true);
|
||||
|
||||
release.poster = q('.imgLink img').dataset.original;
|
||||
release.trailer = `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4`;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const release = {};
|
||||
|
||||
const { q, qa, qd, qm } = ex(html);
|
||||
const { pathname } = new URL(url);
|
||||
|
||||
const dataPrefix = 'dataLayer = [';
|
||||
const dataStart = html.indexOf(dataPrefix);
|
||||
const dataString = html.slice(dataStart + dataPrefix.length, html.indexOf('];', dataStart));
|
||||
const { sceneDetails: data } = JSON.parse(dataString);
|
||||
|
||||
const dataString2 = q('script[type="application/ld+json"]', true);
|
||||
const data2 = JSON.parse(dataString2)[1];
|
||||
|
||||
const videoPrefix = 'window.ScenePlayerOptions = ';
|
||||
const videoStart = html.indexOf(videoPrefix);
|
||||
const videoString = html.slice(videoStart + videoPrefix.length, html.indexOf('};', videoStart) + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
release.entryId = data.sceneId || videoData.playerOptions.sceneInfos.sceneId || pathname.slice(pathname.lastIndexOf('/') + 1);
|
||||
release.url = url;
|
||||
release.title = data.sceneTitle || videoData.playerOptions.sceneInfos.sceneTitle || qm('meta[name="twitter:title"]') || q('.sceneTitle', true);
|
||||
release.description = data.sceneDescription || data2.description || q('.sceneDesc', true).replace('Video Description:', '');
|
||||
|
||||
release.duration = moment.duration(data2.duration.slice(2)).asSeconds();
|
||||
|
||||
// data2.dateCreated is the date the scene was first added to 'upcoming'
|
||||
release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate
|
||||
? moment.utc(videoData.playerOptions.sceneInfos.sceneReleaseDate, 'YYYY-MM-DD').toDate()
|
||||
: qd('.updatedDate', 'MM-DD-YYYY');
|
||||
|
||||
release.actors = data.sceneActors?.map(actor => actor.actorName)
|
||||
|| data2.actor?.map(actor => actor.name)
|
||||
|| qa('.sceneColActors a', true);
|
||||
|
||||
release.tags = data2.keywords.split(', ') || qa('.sceneColCategories a', 'title');
|
||||
|
||||
release.poster = videoData.picPreview;
|
||||
release.trailer = {
|
||||
src: `${videoData.playerOptions.host}${videoData.url}`,
|
||||
};
|
||||
|
||||
release.photos = await getPhotos(q('.picturesItem a').href, 'puretaboo.com', site);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
Reference in New Issue
Block a user