Fixed empty page breaking Vixen scraper.
This commit is contained in:
parent
980efbc93d
commit
54df9d0c78
Binary file not shown.
After Width: | Height: | Size: 412 KiB |
Binary file not shown.
After Width: | Height: | Size: 6.8 KiB |
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
|
@ -507,8 +507,8 @@ const networks = [
|
||||||
|
|
||||||
exports.seed = knex => Promise.resolve()
|
exports.seed = knex => Promise.resolve()
|
||||||
.then(async () => {
|
.then(async () => {
|
||||||
const { inserted, updated } = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex);
|
const parentNetworkEntries = await upsert('entities', parentNetworks.map(network => ({ ...network, type: 'network' })), ['slug', 'type'], knex);
|
||||||
const parentNetworksBySlug = [].concat(inserted, updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
|
const parentNetworksBySlug = [].concat(parentNetworkEntries.inserted, parentNetworkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
|
||||||
|
|
||||||
const networksWithParent = networks.map(network => ({
|
const networksWithParent = networks.map(network => ({
|
||||||
slug: network.slug,
|
slug: network.slug,
|
||||||
|
@ -521,5 +521,23 @@ exports.seed = knex => Promise.resolve()
|
||||||
parent_id: parentNetworksBySlug[network.parent] || null,
|
parent_id: parentNetworksBySlug[network.parent] || null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
return upsert('entities', networksWithParent, ['slug', 'type'], knex);
|
const networkEntries = await upsert('entities', networksWithParent, ['slug', 'type'], knex);
|
||||||
|
|
||||||
|
const networkIdsBySlug = [].concat(networkEntries.inserted, networkEntries.updated).reduce((acc, network) => ({ ...acc, [network.slug]: network.id }), {});
|
||||||
|
const tagSlugs = networks.map(network => network.tags).flat().filter(Boolean);
|
||||||
|
|
||||||
|
const tagEntries = await knex('tags').whereIn('slug', tagSlugs);
|
||||||
|
const tagIdsBySlug = tagEntries.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag.id }), {});
|
||||||
|
|
||||||
|
const tagAssociations = networks
|
||||||
|
.map(network => (network.tags
|
||||||
|
? network.tags.map(tagSlug => ({
|
||||||
|
entity_id: networkIdsBySlug[network.slug],
|
||||||
|
tag_id: tagIdsBySlug[tagSlug],
|
||||||
|
inherit: true,
|
||||||
|
}))
|
||||||
|
: []))
|
||||||
|
.flat();
|
||||||
|
|
||||||
|
await upsert('entities_tags', tagAssociations, ['entity_id', 'tag_id'], knex);
|
||||||
});
|
});
|
||||||
|
|
|
@ -806,6 +806,7 @@ const tagPhotos = [
|
||||||
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
|
['fake-boobs', 15, 'Amber Jade and Karma Rx in "Amber In The Hills: Part 1" for Brazzers'],
|
||||||
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
|
// ['fake-boobs', 6, 'Cathy Heaven in "Heavenly Ass" for Big Wett Butts'],
|
||||||
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
|
['fake-boobs', 12, 'Nikki Monroe and Kortney Kane for Big Tits In Uniform'],
|
||||||
|
['fake-cum', 3, 'Alexia Anders in "Thanksgiving Creampies" for Cum 4K'],
|
||||||
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
|
['fake-cum', 0, 'Jynx Maze for Cumshot Surprise (Porn Pros)'],
|
||||||
['fake-cum', 1, 'Ricki White for Fucked Up Facials'],
|
['fake-cum', 1, 'Ricki White for Fucked Up Facials'],
|
||||||
['fingering', 2, 'Kylie Page and Hadley Viscara in "Busty Blonde Bombshells" for LesbianX'],
|
['fingering', 2, 'Kylie Page and Hadley Viscara in "Busty Blonde Bombshells" for LesbianX'],
|
||||||
|
|
|
@ -79,13 +79,12 @@ async function fetchIncludedEntities() {
|
||||||
};
|
};
|
||||||
|
|
||||||
const rawNetworks = await knex.raw(`
|
const rawNetworks = await knex.raw(`
|
||||||
WITH RECURSIVE channels AS (
|
WITH RECURSIVE included_entities AS (
|
||||||
/* select configured channels and networks */
|
/* select configured channels and networks */
|
||||||
SELECT
|
SELECT
|
||||||
entities.*, json_agg(siblings) as siblings
|
entities.*
|
||||||
FROM
|
FROM
|
||||||
entities
|
entities
|
||||||
LEFT JOIN entities AS siblings ON siblings.parent_id = entities.parent_id
|
|
||||||
WHERE
|
WHERE
|
||||||
CASE WHEN :includeAll
|
CASE WHEN :includeAll
|
||||||
THEN
|
THEN
|
||||||
|
@ -102,17 +101,16 @@ async function fetchIncludedEntities() {
|
||||||
AND entities.type = 'network')
|
AND entities.type = 'network')
|
||||||
OR (entities.slug = ANY(:excludedChannels)
|
OR (entities.slug = ANY(:excludedChannels)
|
||||||
AND entities.type = 'channel'))
|
AND entities.type = 'channel'))
|
||||||
GROUP BY entities.id
|
|
||||||
|
|
||||||
UNION ALL
|
UNION ALL
|
||||||
|
|
||||||
/* select recursive children of configured networks */
|
/* select recursive children of configured networks */
|
||||||
SELECT
|
SELECT
|
||||||
entities.*, null as siblings
|
entities.*
|
||||||
FROM
|
FROM
|
||||||
entities
|
entities
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
channels ON channels.id = entities.parent_id
|
included_entities ON included_entities.id = entities.parent_id
|
||||||
WHERE
|
WHERE
|
||||||
NOT ((entities.slug = ANY(:excludedNetworks)
|
NOT ((entities.slug = ANY(:excludedNetworks)
|
||||||
AND entities.type = 'network')
|
AND entities.type = 'network')
|
||||||
|
@ -121,17 +119,20 @@ async function fetchIncludedEntities() {
|
||||||
)
|
)
|
||||||
/* select recursive channels as children of networks */
|
/* select recursive channels as children of networks */
|
||||||
SELECT
|
SELECT
|
||||||
entities.*, json_agg(channels ORDER BY channels.id) as children
|
parents.*, json_agg(included_entities ORDER BY included_entities.id) as children
|
||||||
FROM
|
FROM
|
||||||
channels
|
included_entities
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
entities ON entities.id = channels.parent_id
|
entities AS parents ON parents.id = included_entities.parent_id
|
||||||
WHERE
|
WHERE
|
||||||
channels.type = 'channel'
|
included_entities.type = 'channel'
|
||||||
GROUP BY
|
GROUP BY
|
||||||
entities.id;
|
parents.id;
|
||||||
`, include);
|
`, include);
|
||||||
|
|
||||||
|
// console.log(rawNetworks.rows[0]);
|
||||||
|
// console.log(rawNetworks.toString());
|
||||||
|
|
||||||
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
|
||||||
|
|
||||||
return curatedNetworks;
|
return curatedNetworks;
|
||||||
|
|
|
@ -6,17 +6,21 @@ const slugify = require('../utils/slugify');
|
||||||
function matchChannel(release, channel) {
|
function matchChannel(release, channel) {
|
||||||
const series = channel.children || channel.parent.children;
|
const series = channel.children || channel.parent.children;
|
||||||
|
|
||||||
console.log(channel, series);
|
// console.log(series?.length, release.url, channel.name);
|
||||||
|
|
||||||
const serieNames = series.reduce((acc, serie) => ({
|
const serieNames = series.reduce((acc, serie) => ({
|
||||||
...acc,
|
...acc,
|
||||||
[serie.name]: serie,
|
[serie.name]: serie,
|
||||||
[serie.slug]: serie,
|
[serie.slug]: serie,
|
||||||
}), {});
|
}), {
|
||||||
|
vr: 'littlecapricevr',
|
||||||
|
});
|
||||||
|
|
||||||
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
|
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
|
||||||
const serie = serieName && serieNames[slugify(serieName, '')];
|
const serie = serieName && serieNames[slugify(serieName, '')];
|
||||||
|
|
||||||
|
console.log(release.title, serieName);
|
||||||
|
|
||||||
if (serie) {
|
if (serie) {
|
||||||
return {
|
return {
|
||||||
channel: serie.slug,
|
channel: serie.slug,
|
||||||
|
|
|
@ -207,8 +207,12 @@ async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}/api/videos?page=${page}`;
|
const url = `${site.url}/api/videos?page=${page}`;
|
||||||
const res = await http.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.status === 200) {
|
if (res.ok) {
|
||||||
return scrapeAll(res.body.data.videos, site);
|
if (res.body.data.videos) {
|
||||||
|
return scrapeAll(res.body.data.videos, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -218,8 +222,12 @@ async function fetchUpcoming(site) {
|
||||||
const apiUrl = `${site.url}/api`;
|
const apiUrl = `${site.url}/api`;
|
||||||
const res = await http.get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.status === 200) {
|
if (res.ok) {
|
||||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
if (res.body.data.nextScene) {
|
||||||
|
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -231,7 +239,7 @@ async function fetchScene(url, site, baseRelease) {
|
||||||
|
|
||||||
const res = await http.get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.status === 200) {
|
if (res.ok) {
|
||||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,7 +252,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
|
||||||
const url = `${origin}/api/${actorSlug}`;
|
const url = `${origin}/api/${actorSlug}`;
|
||||||
const res = await http.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.status === 200) {
|
if (res.ok) {
|
||||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue