Compare commits

..

No commits in common. "ab83dd2e55f919b7176a3de01672ca3f64d40d4e" and "46c0b269c339fff3508f00cfbce283c141e281a2" have entirely different histories.

6 changed files with 82 additions and 103 deletions

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.161.0", "version": "1.160.4",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.161.0", "version": "1.160.4",
"description": "All the latest porn releases in one place", "description": "All the latest porn releases in one place",
"main": "src/app.js", "main": "src/app.js",
"scripts": { "scripts": {

View File

@ -5,10 +5,10 @@ const merge = require('object-merge-advanced');
const argv = require('./argv'); const argv = require('./argv');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
const { fetchReleaseEntities, urlToSiteSlug } = require('./entities'); const { fetchReleaseEntities, urlToSiteSlug } = require('./entities');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const qu = require('./utils/qu'); const qu = require('./utils/qu');
const scrapers = require('./scrapers/scrapers');
function toBaseReleases(baseReleasesOrUrls, entity = null) { function toBaseReleases(baseReleasesOrUrls, entity = null) {
if (!baseReleasesOrUrls) { if (!baseReleasesOrUrls) {
@ -68,6 +68,30 @@ async function fetchScene(scraper, url, entity, baseRelease, options) {
return null; return null;
} }
function findScraper(entity) {
if (scrapers.releases[entity.slug]) {
return scrapers.releases[entity.slug];
}
if (entity.parent) {
return findScraper(entity.parent);
}
return null;
}
function findLayoutScraper(entity, scraper) {
if (scraper?.[entity.parameters?.layout]) {
return scraper[entity.parameters.layout];
}
if (entity.parent) {
return findLayoutScraper(entity.parent, scraper);
}
return scraper;
}
async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)]; const entity = baseRelease.entity || entitiesBySlug[urlToSiteSlug(baseRelease.url)];
@ -83,8 +107,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
}; };
} }
const scraper = resolveScraper(entity); const scraper = findScraper(entity);
const layoutScraper = resolveLayoutScraper(entity, scraper); const layoutScraper = findLayoutScraper(entity, scraper);
if (!layoutScraper) { if (!layoutScraper) {
logger.warn(`Could not find scraper for ${baseRelease.url}`); logger.warn(`Could not find scraper for ${baseRelease.url}`);

View File

@ -94,80 +94,62 @@ async function fetchIncludedEntities() {
WITH RECURSIVE included_entities AS ( WITH RECURSIVE included_entities AS (
/* select configured channels and networks */ /* select configured channels and networks */
SELECT SELECT
entities.* entities.*
FROM FROM
entities entities
WHERE WHERE
CASE WHEN :includeAll CASE WHEN :includeAll
THEN THEN
/* select all top level networks and independent channels */ /* select all top level networks and independent channels */
entities.parent_id IS NULL entities.parent_id IS NULL
ELSE ELSE
((entities.slug = ANY(:includedNetworks) ((entities.slug = ANY(:includedNetworks)
AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:includedChannels) OR (entities.slug = ANY(:includedChannels)
AND entities.type = 'channel')) AND entities.type = 'channel'))
END END
AND NOT ( AND NOT (
(entities.slug = ANY(:excludedNetworks) (entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel')) AND entities.type = 'channel'))
UNION ALL UNION ALL
/* select recursive children of configured networks */ /* select recursive children of configured networks */
SELECT SELECT
entities.* entities.*
FROM FROM
entities entities
INNER JOIN INNER JOIN
included_entities ON included_entities.id = entities.parent_id included_entities ON included_entities.id = entities.parent_id
WHERE WHERE
NOT ((entities.slug = ANY(:excludedNetworks) NOT ((entities.slug = ANY(:excludedNetworks)
AND entities.type = 'network') AND entities.type = 'network')
OR (entities.slug = ANY(:excludedChannels) OR (entities.slug = ANY(:excludedChannels)
AND entities.type = 'channel')) AND entities.type = 'channel'))
), included_per_network AS (
/* select recursive channels as children of networks */
SELECT
parents.*,
json_agg(included_entities ORDER BY included_entities.id) included_children,
row_to_json(grandparents) AS parent,
(SELECT json_agg(children)
FROM entities AS children
WHERE children.parent_id = parents.id) children
FROM
included_entities
LEFT JOIN
entities AS parents ON parents.id = included_entities.parent_id
LEFT JOIN
entities AS grandparents ON grandparents.id = parents.parent_id
WHERE
included_entities.type = 'channel'
GROUP BY
parents.id, grandparents.id
), entity_tree as (
/* get recursive parents of networks (necessary for scraper resolve) */
SELECT to_jsonb(included_per_network) as entity,
parent_id,
array['parent'] as parent_path,
0 as depth
FROM included_per_network
UNION ALL
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)),
entities.parent_id,
entity_tree.parent_path || array['parent'],
depth + 1
FROM entity_tree
JOIN entities ON entity_tree.parent_id = entities.id
) )
SELECT entity FROM entity_tree WHERE parent_id is null; /* select recursive channels as children of networks */
SELECT
parents.*,
json_agg(included_entities ORDER BY included_entities.id) included_children,
row_to_json(grandparents) AS parent,
(SELECT json_agg(children)
FROM entities AS children
WHERE children.parent_id = parents.id) children
FROM
included_entities
LEFT JOIN
entities AS parents ON parents.id = included_entities.parent_id
LEFT JOIN
entities AS grandparents ON grandparents.id = parents.parent_id
WHERE
included_entities.type = 'channel'
GROUP BY
parents.id, grandparents.id;
`, include); `, include);
const curatedNetworks = rawNetworks.rows.map(({ entity }) => curateEntity(entity, true)); const curatedNetworks = rawNetworks.rows.map(entity => curateEntity(entity, true));
return curatedNetworks; return curatedNetworks;
} }
@ -182,7 +164,7 @@ async function fetchReleaseEntities(baseReleases) {
)); ));
const entities = await knex.raw(` const entities = await knex.raw(`
WITH RECURSIVE entity_tree as ( WITH RECURSIVE tree as (
SELECT to_jsonb(entities) as entity, SELECT to_jsonb(entities) as entity,
parent_id, parent_id,
array['parent'] as parent_path, array['parent'] as parent_path,
@ -192,14 +174,14 @@ async function fetchReleaseEntities(baseReleases) {
UNION ALL UNION ALL
SELECT jsonb_set(entity_tree.entity, entity_tree.parent_path, to_jsonb(entities)), SELECT jsonb_set(tree.entity, tree.parent_path, to_jsonb(entities)),
entities.parent_id, entities.parent_id,
entity_tree.parent_path || array['parent'], tree.parent_path || array['parent'],
depth + 1 depth + 1
FROM entity_tree FROM tree
JOIN entities ON entity_tree.parent_id = entities.id JOIN entities ON tree.parent_id = entities.id
) )
SELECT entity FROM entity_tree WHERE parent_id is null SELECT entity FROM tree WHERE parent_id is null
ORDER BY entity->'type' ASC; ORDER BY entity->'type' ASC;
`, { entitySlugs }); `, { entitySlugs });

View File

@ -1,32 +0,0 @@
'use strict';
const scrapers = require('./scrapers');
function resolveScraper(entity) {
if (scrapers.releases[entity.slug]) {
return scrapers.releases[entity.slug];
}
if (entity.parent) {
return resolveScraper(entity.parent);
}
return null;
}
function resolveLayoutScraper(entity, scraper) {
if (scraper?.[entity.parameters?.layout]) {
return scraper[entity.parameters.layout];
}
if (entity.parent) {
return resolveLayoutScraper(entity.parent, scraper);
}
return scraper;
}
module.exports = {
resolveScraper,
resolveLayoutScraper,
};

View File

@ -9,7 +9,7 @@ const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const { curateRelease } = require('./releases'); const { curateRelease } = require('./releases');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve'); const scrapers = require('./scrapers/scrapers');
const { fetchIncludedEntities } = require('./entities'); const { fetchIncludedEntities } = require('./entities');
const emptyReleases = { uniqueReleases: [], duplicateReleases: [] }; const emptyReleases = { uniqueReleases: [], duplicateReleases: [] };
@ -205,8 +205,13 @@ async function scrapeChannelReleases(scraper, channelEntity, preData) {
} }
async function scrapeChannel(channelEntity, accNetworkReleases) { async function scrapeChannel(channelEntity, accNetworkReleases) {
const scraper = resolveScraper(channelEntity); console.log(channelEntity);
const layoutScraper = resolveLayoutScraper(channelEntity, scraper);
const scraper = scrapers.releases[channelEntity.slug]
|| scrapers.releases[channelEntity.parent?.slug]
|| scrapers.releases[channelEntity.parent?.parent?.slug];
const layoutScraper = scraper?.[channelEntity.parameters?.layout] || scraper?.[channelEntity.parent?.parameters?.layout] || scraper?.[channelEntity.parent?.parent?.parameters?.layout] || scraper;
if (!layoutScraper) { if (!layoutScraper) {
logger.warn(`No scraper found for '${channelEntity.name}' (${channelEntity.parent?.name})`); logger.warn(`No scraper found for '${channelEntity.name}' (${channelEntity.parent?.name})`);