Fixed empty page breaking Vixen scraper.

This commit is contained in:
DebaucheryLibrarian 2020-11-26 03:13:43 +01:00
parent 980efbc93d
commit 54df9d0c78
8 changed files with 54 additions and 22 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 412 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -507,8 +507,8 @@ const networks = [
exports.seed = knex => Promise.resolve() exports.seed = knex => Promise.resolve()
.then(async () => { .then(async () => {
const { inserted, updated } = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex); const parentNetworkEntries = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex);
const parentNetworksBySlug = [].concat(inserted, updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {}); const parentNetworksBySlug = [].concat(parentNetworkEntries.inserted, parentNetworkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
const networksWithParent = networks.map(network => ({ const networksWithParent = networks.map(network => ({
slug: network.slug, slug: network.slug,
@ -521,5 +521,23 @@ exports.seed = knex => Promise.resolve()
parent_id: parentNetworksBySlug[network.parent] || null, parent_id: parentNetworksBySlug[network.parent] || null,
})); }));
return upsert('entities', networksWithParent, ['slug', 'type'], knex); const networkEntries = await upsert('entities', networksWithParent, ['slug', 'type'], knex);
const networkIdsBySlug = [].concat(networkEntries.inserted, networkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
const tagSlugs = networks.map(network => network.tags).flat().filter(Boolean);
const tagEntries = await knex('tags').whereIn('slug', tagSlugs);
const tagIdsBySlug = tagEntries.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag.id }), {});
const tagAssociations = networks
.map(network => (network.tags
? network.tags.map(tagSlug => ({
entity_id: networkIdsBySlug[network.slug],
tag_id: tagIdsBySlug[tagSlug],
inherit: true,
}))
: []))
.flat();
await upsert('entities_tags', tagAssociations, ['entity_id', 'tag_id'], knex);
}); });

View File

@ -806,6 +806,7 @@ const tagPhotos = [
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'], ['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'], // ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'], ['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
['fake-cum', 3, 'Alexia Anders in "Thanksgiving Creampies" for Cum 4K'],
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'], ['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
['fake-cum', 1, 'Ricki White for Fucked Up Facials'], ['fake-cum', 1, 'Ricki White for Fucked Up Facials'],
['fingering', 2, 'Kylie Page and Hadley Viscara in "Busty Blonde Bombshells" for LesbianX'], ['fingering', 2, 'Kylie Page and Hadley Viscara in "Busty Blonde Bombshells" for LesbianX'],

View File

@ -79,13 +79,12 @@ async function fetchIncludedEntities() {
}; };
const rawNetworks = await knex.raw(` const rawNetworks = await knex.raw(`
WITH RECURSIVE channels AS ( WITH RECURSIVE included_entities AS (
/* select configured channels and networks */ /* select configured channels and networks */
SELECT SELECT
entities.*, json_agg(siblings) as siblings entities.*
FROM FROM
entities entities
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
WHERE WHERE
CASE WHEN :includeAll CASE WHEN :includeAll
THEN THEN
@ -102,17 +101,16 @@ async function fetchIncludedEntities() {
AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel')) AND entities.type = 'channel'))
GROUP BY entities.id
UNION ALL UNION ALL
/* select recursive children of configured networks */ /* select recursive children of configured networks */
SELECT SELECT
entities.*, null as siblings entities.*
FROM FROM
entities entities
INNER JOIN INNER JOIN
channels ON channels.id = entities.parent_id included_entities ON included_entities.id = entities.parent_id
WHERE WHERE
NOT ((entities.slug = ANY(:excludedNetworks) NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network') AND entities.type = 'network')
@ -121,17 +119,20 @@ async function fetchIncludedEntities() {
) )
/* select recursive channels as children of networks */ /* select recursive channels as children of networks */
SELECT SELECT
entities.*, json_agg(channels ORDER BY channels.id) as children parents.*, json_agg(included_entities ORDER BY included_entities.id) as children
FROM FROM
channels included_entities
LEFT JOIN LEFT JOIN
entities ON entities.id = channels.parent_id entities AS parents ON parents.id = included_entities.parent_id
WHERE WHERE
channels.type = 'channel' included_entities.type = 'channel'
GROUP BY GROUP BY
entities.id; parents.id;
`, include); `, include);
// console.log(rawNetworks.rows[0]);
// console.log(rawNetworks.toString());
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true)); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks; return curatedNetworks;

View File

@ -6,17 +6,21 @@ const slugify = require('../utils/slugify');
function matchChannel(release, channel) { function matchChannel(release, channel) {
const series = channel.children || channel.parent.children; const series = channel.children || channel.parent.children;
console.log(channel, series); // console.log(series?.length, release.url, channel.name);
const serieNames = series.reduce((acc, serie) => ({ const serieNames = series.reduce((acc, serie) => ({
...acc, ...acc,
[serie.name]: serie, [serie.name]: serie,
[serie.slug]: serie, [serie.slug]: serie,
}), {}); }), {
vr: 'littlecapricevr',
});
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0]; const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')]; const serie = serieName && serieNames[slugify(serieName, '')];
console.log(release.title, serieName);
if (serie) { if (serie) {
return { return {
channel: serie.slug, channel: serie.slug,

View File

@ -207,8 +207,12 @@ async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`; const url = `${site.url}/api/videos?page=${page}`;
const res = await http.get(url); const res = await http.get(url);
if (res.status === 200) { if (res.ok) {
return scrapeAll(res.body.data.videos, site); if (res.body.data.videos) {
return scrapeAll(res.body.data.videos, site);
}
return null;
} }
return res.status; return res.status;
@ -218,8 +222,12 @@ async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`; const apiUrl = `${site.url}/api`;
const res = await http.get(apiUrl); const res = await http.get(apiUrl);
if (res.status === 200) { if (res.ok) {
return scrapeUpcoming(res.body.data.nextScene, site); if (res.body.data.nextScene) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return null;
} }
return res.status; return res.status;
@ -231,7 +239,7 @@ async function fetchScene(url, site, baseRelease) {
const res = await http.get(apiUrl); const res = await http.get(apiUrl);
if (res.status === 200) { if (res.ok) {
return scrapeScene(res.body.data, url, site, baseRelease); return scrapeScene(res.body.data, url, site, baseRelease);
} }
@ -244,7 +252,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
const url = `${origin}/api/${actorSlug}`; const url = `${origin}/api/${actorSlug}`;
const res = await http.get(url); const res = await http.get(url);
if (res.status === 200) { if (res.ok) {
return scrapeProfile(res.body.data, origin, include.scenes); return scrapeProfile(res.body.data, origin, include.scenes);
} }