Fixed empty page breaking Vixen scraper.

This commit is contained in:
DebaucheryLibrarian
2020-11-26 03:13:43 +01:00
parent 980efbc93d
commit 54df9d0c78
8 changed files with 54 additions and 22 deletions

View File

@@ -79,13 +79,12 @@ async function fetchIncludedEntities() {
};
const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS (
WITH RECURSIVE included_entities AS (
/* select configured channels and networks */
SELECT
entities.*, json_agg(siblings) as siblings
entities.*
FROM
entities
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
WHERE
CASE WHEN :includeAll
THEN
@@ -102,17 +101,16 @@ async function fetchIncludedEntities() {
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
GROUP BY entities.id
UNION ALL
/* select recursive children of configured networks */
SELECT
entities.*, null as siblings
entities.*
FROM
entities
INNER JOIN
channels ON channels.id = entities.parent_id
included_entities ON included_entities.id = entities.parent_id
WHERE
NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
@@ -121,17 +119,20 @@ async function fetchIncludedEntities() {
)
/* select recursive channels as children of networks */
SELECT
entities.*, json_agg(channels ORDER BY channels.id) as children
parents.*, json_agg(included_entities ORDER BY included_entities.id) as children
FROM
channels
included_entities
LEFT JOIN
entities ON entities.id = channels.parent_id
entities AS parents ON parents.id = included_entities.parent_id
WHERE
channels.type = 'channel'
included_entities.type = 'channel'
GROUP BY
entities.id;
parents.id;
`, include);
// console.log(rawNetworks.rows[0]);
// console.log(rawNetworks.toString());
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks;

View File

@@ -6,17 +6,21 @@ const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
console.log(channel, series);
// console.log(series?.length, release.url, channel.name);
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
}), {
vr: 'littlecapricevr',
});
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
console.log(release.title, serieName);
if (serie) {
return {
channel: serie.slug,

View File

@@ -207,8 +207,12 @@ async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
return scrapeAll(res.body.data.videos, site);
if (res.ok) {
if (res.body.data.videos) {
return scrapeAll(res.body.data.videos, site);
}
return null;
}
return res.status;
@@ -218,8 +222,12 @@ async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await http.get(apiUrl);
if (res.status === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return null;
}
return res.status;
@@ -231,7 +239,7 @@ async function fetchScene(url, site, baseRelease) {
const res = await http.get(apiUrl);
if (res.status === 200) {
if (res.ok) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
@@ -244,7 +252,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url);
if (res.status === 200) {
if (res.ok) {
return scrapeProfile(res.body.data, origin, include.scenes);
}