traxxx/src/scrapers/whalemember.js

94 lines
2.9 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const { stripQuery } = require('../utils/url');
const slugify = require('../utils/slugify');
function scrapeLatest(scenes, channel) {
return scenes.map(({ query, _element }) => {
const release = {};
release.url = query.url('[href*="/video"]');
// release.entryId = unprint.query.attribute(element, null, 'data-vid'); // does not match old videos
release.title = query.content('.video-thumbnail-footer a[href*="/video"]');
release.date = query.date('.actor-list + span', 'MM/DD/YYYY');
release.entryId = release.url
? new URL(release.url).pathname.split('/').at(-1)
: slugify(release.title);
release.actors = query.all('.actor-list a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
const poster = query.poster() || query.img('a img', { attribute: 'data-src' });
if (poster) {
release.poster = [
stripQuery(poster),
poster,
];
}
release.photos = query.imgs('img[data-index]', { attribute: 'data-src' }).map((src) => [
stripQuery(src),
src,
]);
release.teaser = query.video('source', { attribute: 'data-src' });
return release;
});
}
function scrapeScene({ query }, { url, entity }) {
const release = {};
// release.entryId = query.attribute('div[data-id]', 'data-id');
release.entryId = new URL(url).pathname.split('/').at(-1);
release.title = query.content('.scene-info h1');
release.description = query.content('//div[contains(@class, \'scene-info\')]//i[contains(@class, \'fa-quote\')]/following-sibling::span');
release.duration = (query.number('//div[contains(@class, \'scene-info\')]//span[contains(text(), \'Duration\')]/following-sibling::span[contains(text(), \'minutes\')]') * 60) || null;
release.actors = query.all('.scene-info a[href*="/models"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: entity.url }),
}));
release.poster = query.poster('#player-wrapper video');
release.photos = query.imgs('#trailer_player .hidden > a img').map((src) => [
stripQuery(src),
src,
]);
release.teaser = query.video('#player-wrapper source');
release.qualities = query.contents('#trailer_player .resolution').map((resolution) => Number(resolution.split('x')[1])).filter(Boolean);
return release;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.parameters?.latest || channel.url}?page=${page}`;
const res = await unprint.get(url, { selectAll: '//*[(starts-with(text(), \'Latest\') and contains(text(), \'Movies\')) or contains(text(), \'Most Recent\')]/following::div[contains(@class, \'video-thumbnail\') and @data-vid]' });
if (res.status === 200) {
return scrapeLatest(res.context, channel);
}
return res.status;
}
module.exports = {
fetchLatest,
scrapeScene: {
scraper: scrapeScene,
unprint: true,
},
};