traxxx/src/scrapers/littlecapricedreams.js

138 lines
3.2 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
// console.log(series?.length, release.url, channel.name);
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {
vr: 'littlecapricevr',
});
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
console.log(release.title, serieName);
if (serie) {
return {
channel: serie.slug,
title: release.title.replace(new RegExp(`${serieName}[\\s:-]*`), ''),
};
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query, el }) => {
const release = {};
release.url = query.url('a');
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.meta h3');
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
release.poster = query.img('img');
return {
...release,
...matchChannel(release, channel),
};
});
}
async function fetchPhotos(url) {
if (url) {
const res = await qu.get(url, '.et_post_gallery');
if (res.ok) {
return res.item.query.urls('a').map(imgUrl => ({
src: imgUrl,
referer: url,
}));
}
}
return null;
}
async function scrapeScene({ query }, url, channel, include) {
const release = {};
const script = query.cnt('script.yoast-schema-graph');
const data = script && JSON.parse(script);
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.vid_title');
release.description = query.cnt('.vid_desc p');
release.date = query.date('.vid_date', 'MMMM D, YYYY');
release.duration = query.dur('.vid_length');
release.actors = query.all('.vid_infos a[href*="author/"]').map(actorEl => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.tags = query.cnts('.vid_infos a[rel="tag"]');
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
const poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
if (include.photos) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
}
return {
...release,
...matchChannel(release, channel),
};
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
const res = await qu.getAll(channel.url, '.project');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel, baseRelease, include) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
};