Fixed Whale Member (Porn Pros, Holed) scraper.
This commit is contained in:
parent
cc67532fd9
commit
c2afa571bf
|
@ -1,145 +1,87 @@
|
|||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const { stripQuery } = require('../utils/url');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.parameters?.latest || site.url);
|
||||
function scrapeLatest(scenes, channel) {
|
||||
return scenes.map(({ query, element }) => {
|
||||
const release = {};
|
||||
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
release.url = query.url('[href*="/video"]');
|
||||
release.entryId = unprint.query.attribute(element, null, 'data-vid');
|
||||
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
release.title = query.content('.video-thumbnail-footer a[href*="/video"]');
|
||||
release.date = query.date('.actor-list + span', 'MM/DD/YYYY');
|
||||
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), (el) => el.textContent);
|
||||
|
||||
// slow CDN?
|
||||
const poster = scene.querySelector('.single-image').dataset.src;
|
||||
const teaserEl = scene.querySelector('source');
|
||||
|
||||
release.poster = {
|
||||
src: /^http/.test(poster) ? poster : `https:${poster}`,
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
};
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), (el) => ({
|
||||
src: (/^http/.test(el.dataset.src) ? el.dataset.src : `https:${el.dataset.src}`),
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
release.actors = query.all('.actor-list a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
||||
}));
|
||||
|
||||
if (teaserEl) {
|
||||
release.teaser = {
|
||||
src: teaserEl.dataset.src,
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
};
|
||||
const poster = query.poster() || query.img('a img', { attribute: 'data-src' });
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
stripQuery(poster),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
release.photos = query.imgs('img[data-index]', { attribute: 'data-src' }).map((src) => [
|
||||
stripQuery(src),
|
||||
src,
|
||||
]);
|
||||
|
||||
release.teaser = query.video('source', { attribute: 'data-src' });
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
function scrapeScene({ query }, channel) {
|
||||
const release = {};
|
||||
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
release.entryId = query.attribute('div[data-id]', 'data-id');
|
||||
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), (el) => el.textContent);
|
||||
release.title = query.content('.scene-info h1');
|
||||
release.description = query.content('//div[contains(@class, \'scene-info\')]//i[contains(@class, \'fa-quote\')]/following-sibling::span');
|
||||
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
release.duration = (query.number('//div[contains(@class, \'scene-info\')]//span[contains(text(), \'Duration\')]/following-sibling::span[contains(text(), \'minutes\')]') * 60) || null;
|
||||
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
|
||||
// unreliable CDN
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), (el) => ({
|
||||
src: (/^http/.test(el.src) ? el.src : `https:${el.src}`),
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
release.actors = query.all('.scene-info a[href*="/models"]').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
||||
}));
|
||||
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
release.poster = query.poster('#player-wrapper video');
|
||||
|
||||
if (posterEl) {
|
||||
release.poster = {
|
||||
src: /^http/.test(posterEl.src) ? posterEl.src : `https:${posterEl.src}`,
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
};
|
||||
} else if (videoEl) {
|
||||
release.poster = {
|
||||
src: /^http/.test(videoEl.poster) ? videoEl.poster : `https:${videoEl.poster}`,
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
};
|
||||
}
|
||||
release.photos = query.imgs('#trailer_player .hidden > a img').map((src) => [
|
||||
stripQuery(src),
|
||||
src,
|
||||
]);
|
||||
|
||||
if (trailerEl) {
|
||||
release.trailer = {
|
||||
src: trailerEl.src,
|
||||
referer: site.url,
|
||||
attempts: 5,
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
};
|
||||
}
|
||||
release.teaser = query.video('#player-wrapper source');
|
||||
release.qualities = query.contents('#trailer_player .resolution').map((resolution) => Number(resolution.split('x')[1])).filter(Boolean);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.parameters?.latest || site.url}?page=${page}`;
|
||||
const res = await http.get(url);
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.parameters?.latest || channel.url}?page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '//*[(starts-with(text(), \'Latest\') and contains(text(), \'Movies\')) or contains(text(), \'Most Recent\')]/following::div[contains(@class, \'video-thumbnail\') and @data-vid]' });
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
if (res.status === 200) {
|
||||
return scrapeLatest(res.context, channel);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
scrapeScene: {
|
||||
scraper: scrapeScene,
|
||||
unprint: true,
|
||||
},
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue