traxxx/src/scrapers/littlecapricedreams.js

80 lines
1.9 KiB
JavaScript
Raw Normal View History

'use strict';
const qu = require('../utils/qu');
function scrapeAll(scenes) {
return scenes.map(({ query, el }) => {
const release = {};
release.url = query.url('a');
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.meta h3');
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
release.poster = query.img('img');
return release;
});
}
function scrapeScene({ query }) {
const release = {};
const script = query.cnt('script.yoast-schema-graph');
const data = script && JSON.parse(script);
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.vid_title');
release.description = query.cnt('.vid_desc p');
release.date = query.date('.vid_date', 'MMMM D, YYYY');
release.duration = query.dur('.vid_length');
release.actors = query.all('.vid_infos a[href*="author/"]').map(actorEl => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.tags = query.cnts('.vid_infos a[rel="tag"]');
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
release.poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content');
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
// TODO: photo gallery, find if any video has a trailer
console.log(release);
return release;
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
const res = await qu.getAll(`${channel.url}/videos/`, '.project');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, channel);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
};