forked from DebaucheryLibrarian/traxxx
Added series as channels with logos and photo album scraping to Little Caprice. Added various tag photos.
This commit is contained in:
@@ -1,8 +1,33 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
function matchChannel(release, channel) {
|
||||
const series = channel.children || channel.parent.children;
|
||||
|
||||
console.log(channel, series);
|
||||
|
||||
const serieNames = series.reduce((acc, serie) => ({
|
||||
...acc,
|
||||
[serie.name]: serie,
|
||||
[serie.slug]: serie,
|
||||
}), {});
|
||||
|
||||
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
|
||||
const serie = serieName && serieNames[slugify(serieName, '')];
|
||||
|
||||
if (serie) {
|
||||
return {
|
||||
channel: serie.slug,
|
||||
title: release.title.replace(new RegExp(`${serieName}[\\s:–-]*`), ''),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query, el }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -14,11 +39,29 @@ function scrapeAll(scenes) {
|
||||
|
||||
release.poster = query.img('img');
|
||||
|
||||
return release;
|
||||
return {
|
||||
...release,
|
||||
...matchChannel(release, channel),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }) {
|
||||
async function fetchPhotos(url) {
|
||||
if (url) {
|
||||
const res = await qu.get(url, '.et_post_gallery');
|
||||
|
||||
if (res.ok) {
|
||||
return res.item.query.urls('a').map(imgUrl => ({
|
||||
src: imgUrl,
|
||||
referer: url,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, url, channel, include) {
|
||||
const release = {};
|
||||
|
||||
const script = query.cnt('script.yoast-schema-graph');
|
||||
@@ -41,20 +84,31 @@ function scrapeScene({ query }) {
|
||||
|
||||
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
|
||||
|
||||
release.poster = posterData?.url
|
||||
const poster = posterData?.url
|
||||
|| query.q('meta[property="og:image"]', 'content')
|
||||
|| query.q('meta[name="twitter:image"]', 'content');
|
||||
|
||||
release.poster = {
|
||||
src: poster,
|
||||
referer: url,
|
||||
};
|
||||
|
||||
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
|
||||
|
||||
// TODO: photo gallery, find if any video has a trailer
|
||||
console.log(release);
|
||||
return release;
|
||||
if (include.photos) {
|
||||
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
||||
}
|
||||
|
||||
return {
|
||||
...release,
|
||||
...matchChannel(release, channel),
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(channel) {
|
||||
// no apparent pagination, all updates on one page
|
||||
const res = await qu.getAll(`${channel.url}/videos/`, '.project');
|
||||
// using channels in part because main overview contains indistinguishable photo albums
|
||||
const res = await qu.getAll(channel.url, '.project');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
@@ -63,11 +117,11 @@ async function fetchLatest(channel) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
async function fetchScene(url, channel, baseRelease, include) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, channel);
|
||||
return scrapeScene(res.item, url, channel, include);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
Reference in New Issue
Block a user