Added Men network and Icon Male to MindGeek. Added entropy filter to media module to help filter out generic avatars. Added Pure Taboo. Various logo updates.

This commit is contained in:
2020-01-30 01:14:31 +01:00
parent 04e9d29010
commit ff61094b69
124 changed files with 360 additions and 16 deletions

103
src/scrapers/puretaboo.js Normal file
View File

@@ -0,0 +1,103 @@
'use strict';
const bhttp = require('bhttp');
const moment = require('moment');
// const { fetchLatest } = require('./gamma');
const { ex, ctxa } = require('../utils/q');
const { getPhotos } = require('./gamma');
function scrape(html) {
const { document } = ex(html);
return ctxa(document, '.sceneList .sceneContainer').map(({ q, qa }) => {
const release = {};
const linkEl = q('a.imgLink');
release.entryId = linkEl.dataset.id;
release.title = linkEl.title;
release.url = `https://www.puretaboo.com${linkEl.href}`;
release.actors = qa('.sceneActors a', true);
release.poster = q('.imgLink img').dataset.original;
release.trailer = `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4`;
return release;
});
}
async function scrapeScene(html, url, site) {
const release = {};
const { q, qa, qd, qm } = ex(html);
const { pathname } = new URL(url);
const dataPrefix = 'dataLayer = [';
const dataStart = html.indexOf(dataPrefix);
const dataString = html.slice(dataStart + dataPrefix.length, html.indexOf('];', dataStart));
const { sceneDetails: data } = JSON.parse(dataString);
const dataString2 = q('script[type="application/ld+json"]', true);
const data2 = JSON.parse(dataString2)[1];
const videoPrefix = 'window.ScenePlayerOptions = ';
const videoStart = html.indexOf(videoPrefix);
const videoString = html.slice(videoStart + videoPrefix.length, html.indexOf('};', videoStart) + 1);
const videoData = JSON.parse(videoString);
release.entryId = data.sceneId || videoData.playerOptions.sceneInfos.sceneId || pathname.slice(pathname.lastIndexOf('/') + 1);
release.url = url;
release.title = data.sceneTitle || videoData.playerOptions.sceneInfos.sceneTitle || qm('meta[name="twitter:title"]') || q('.sceneTitle', true);
release.description = data.sceneDescription || data2.description || q('.sceneDesc', true).replace('Video Description:', '');
release.duration = moment.duration(data2.duration.slice(2)).asSeconds();
// data2.dateCreated is the date the scene was first added to 'upcoming'
release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate
? moment.utc(videoData.playerOptions.sceneInfos.sceneReleaseDate, 'YYYY-MM-DD').toDate()
: qd('.updatedDate', 'MM-DD-YYYY');
release.actors = data.sceneActors?.map(actor => actor.actorName)
|| data2.actor?.map(actor => actor.name)
|| qa('.sceneColActors a', true);
release.tags = data2.keywords.split(', ') || qa('.sceneColCategories a', 'title');
release.poster = videoData.picPreview;
release.trailer = {
src: `${videoData.playerOptions.host}${videoData.url}`,
};
release.photos = await getPhotos(q('.picturesItem a').href, 'puretaboo.com', site);
return release;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
if (res.statusCode === 200) {
return scrape(res.body.toString(), site);
}
return null;
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchScene,
fetchUpcoming,
};