Added virtual entity spawning for multi-page updates (i.e. Elegant Angel). Fixed ffmpeg error freezing process. Refactored Adult Empire/Elegant Angel scraper.

This commit is contained in:
DebaucheryLibrarian
2024-08-16 23:26:52 +02:00
parent 958c6d83fa
commit bca677b0a8
16 changed files with 287 additions and 249 deletions

View File

@@ -55,7 +55,8 @@ function curateEntity(entity, includeParameters = false) {
}
if (includeParameters) {
curatedEntity.parameters = entity.parameters;
curatedEntity.options = entity.options; // global internal options
curatedEntity.parameters = entity.parameters; // scraper-specific parameters
}
if (entity.children) {
@@ -66,10 +67,25 @@ function curateEntity(entity, includeParameters = false) {
}
if (entity.included_children) {
curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({
...child,
parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters));
curatedEntity.includedChildren = entity.included_children.flatMap((child) => {
const curatedChild = curateEntity({
...child,
parent: curatedEntity.id ? curatedEntity : null,
}, includeParameters);
// allow entities to 'spawn' virtual copies of themselves, this is useful for sites that use two separate update pages (i.e. Elegant Angel)
if (child.options?.spawn) {
return [
curatedChild,
...child.options.spawn.map((spawnEntity) => ({
...curatedChild,
...spawnEntity,
})),
];
}
return curatedChild;
});
}
const scraper = resolveScraper(curatedEntity);
@@ -199,7 +215,7 @@ async function fetchIncludedEntities() {
return curatedNetworks;
}
async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' }) {
const entities = await knex.raw(`
WITH RECURSIVE entity_tree as (
SELECT to_jsonb(entities) as entity,
@@ -208,7 +224,7 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
FROM entities
WHERE (slug = ANY(:entitySlugs)
OR url ILIKE ANY(:entityHosts))
AND type IN ('channel', 'network')
AND type = ANY(:entityTypes)
UNION ALL
@@ -236,7 +252,8 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
`, {
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`),
sort: knex.raw(prefer === 'channel' ? 'asc' : 'desc'),
entityTypes: options.types || ['channel', 'network'],
sort: knex.raw(options.prefer === 'channel' ? 'asc' : 'desc'),
});
// channel entity will overwrite network entity
@@ -263,7 +280,7 @@ async function fetchReleaseEntities(baseReleases) {
.filter(Boolean),
));
return fetchEntitiesBySlug(entitySlugs, argv.prefer || 'network');
return fetchEntitiesBySlug(entitySlugs, { prefer: argv.prefer || 'network' });
}
async function fetchEntity(entityId, type) {