Fixed empty page breaking Vixen scraper.

This commit is contained in:
DebaucheryLibrarian 2020-11-26 03:13:43 +01:00
parent 980efbc93d
commit 54df9d0c78
8 changed files with 54 additions and 22 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 412 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -507,8 +507,8 @@ const networks = [
exports.seed = knex => Promise.resolve()
.then(async () => {
const { inserted, updated } = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex);
const parentNetworksBySlug = [].concat(inserted, updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
const parentNetworkEntries = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex);
const parentNetworksBySlug = [].concat(parentNetworkEntries.inserted, parentNetworkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
const networksWithParent = networks.map(network => ({
slug: network.slug,
@ -521,5 +521,23 @@ exports.seed = knex => Promise.resolve()
parent_id: parentNetworksBySlug[network.parent] || null,
}));
return upsert('entities', networksWithParent, ['slug', 'type'], knex);
const networkEntries = await upsert('entities', networksWithParent, ['slug', 'type'], knex);
const networkIdsBySlug = [].concat(networkEntries.inserted, networkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
const tagSlugs = networks.map(network => network.tags).flat().filter(Boolean);
const tagEntries = await knex('tags').whereIn('slug', tagSlugs);
const tagIdsBySlug = tagEntries.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag.id }), {});
const tagAssociations = networks
.map(network => (network.tags
? network.tags.map(tagSlug => ({
entity_id: networkIdsBySlug[network.slug],
tag_id: tagIdsBySlug[tagSlug],
inherit: true,
}))
: []))
.flat();
await upsert('entities_tags', tagAssociations, ['entity_id', 'tag_id'], knex);
});

View File

@ -806,6 +806,7 @@ const tagPhotos = [
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
['fake-cum', 3, 'Alexia Anders in "Thanksgiving Creampies" for Cum 4K'],
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
['fake-cum', 1, 'Ricki White for Fucked Up Facials'],
['fingering', 2, 'Kylie Page and Hadley Viscara in "Busty Blonde Bombshells" for LesbianX'],

View File

@ -79,13 +79,12 @@ async function fetchIncludedEntities() {
};
const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS (
WITH RECURSIVE included_entities AS (
/* select configured channels and networks */
SELECT
entities.*, json_agg(siblings) as siblings
entities.*
FROM
entities
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
WHERE
CASE WHEN :includeAll
THEN
@ -102,17 +101,16 @@ async function fetchIncludedEntities() {
AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel'))
GROUP BY entities.id
UNION ALL
/* select recursive children of configured networks */
SELECT
entities.*, null as siblings
entities.*
FROM
entities
INNER JOIN
channels ON channels.id = entities.parent_id
included_entities ON included_entities.id = entities.parent_id
WHERE
NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network')
@ -121,17 +119,20 @@ async function fetchIncludedEntities() {
)
/* select recursive channels as children of networks */
SELECT
entities.*, json_agg(channels ORDER BY channels.id) as children
parents.*, json_agg(included_entities ORDER BY included_entities.id) as children
FROM
channels
included_entities
LEFT JOIN
entities ON entities.id = channels.parent_id
entities AS parents ON parents.id = included_entities.parent_id
WHERE
channels.type = 'channel'
included_entities.type = 'channel'
GROUP BY
entities.id;
parents.id;
`, include);
// console.log(rawNetworks.rows[0]);
// console.log(rawNetworks.toString());
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks;

View File

@ -6,17 +6,21 @@ const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
console.log(channel, series);
// console.log(series?.length, release.url, channel.name);
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
}), {
vr: 'littlecapricevr',
});
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
console.log(release.title, serieName);
if (serie) {
return {
channel: serie.slug,

View File

@ -207,10 +207,14 @@ async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await http.get(url);
if (res.status === 200) {
if (res.ok) {
if (res.body.data.videos) {
return scrapeAll(res.body.data.videos, site);
}
return null;
}
return res.status;
}
@ -218,10 +222,14 @@ async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await http.get(apiUrl);
if (res.status === 200) {
if (res.ok) {
if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return null;
}
return res.status;
}
@ -231,7 +239,7 @@ async function fetchScene(url, site, baseRelease) {
const res = await http.get(apiUrl);
if (res.status === 200) {
if (res.ok) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
@ -244,7 +252,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url);
if (res.status === 200) {
if (res.ok) {
return scrapeProfile(res.body.data, origin, include.scenes);
}