Matching URLs to entity using hostname rather than slug to minimize collisions. Fixed missing Cum Louder POV logo.

This commit is contained in:
DebaucheryLibrarian
2023-06-04 21:50:59 +02:00
parent 7e2840a00d
commit 164757ee26
55 changed files with 67 additions and 24 deletions

View File

@@ -82,6 +82,7 @@ async function curateEntities(entities, includeParameters) {
return Promise.all(entities.map(async (entity) => curateEntity(entity, includeParameters)));
}
/* obsolete in favor of urlToHostname
function urlToSiteSlug(url) {
try {
const slug = new URL(url)
@@ -96,6 +97,21 @@ function urlToSiteSlug(url) {
return null;
}
}
*/
function urlToHostname(url) {
try {
const hostname = new URL(url)
.hostname
.match(/(www\.)(.*)/)?.at(-1);
return hostname;
} catch (error) {
logger.warn(`Failed to derive entity hostname from '${url}': ${error.message}`);
return null;
}
}
async function fetchIncludedEntities() {
const include = {
@@ -191,6 +207,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
array['parent'] as parent_path
FROM entities
WHERE slug = ANY(:entitySlugs)
OR url ILIKE ANY(:entityHosts)
UNION ALL
@@ -215,14 +232,23 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
WHERE entity_tree.parent_id IS NULL
GROUP BY entity_tree.entity
ORDER BY entity->'type' :sort;
`, { entitySlugs, sort: knex.raw(sort) });
`, {
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}%`),
sort: knex.raw(sort),
});
// channel entity will overwrite network entity
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
...accEntities,
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true),
[urlToSiteSlug(entity.url)]: accEntities[urlToSiteSlug(entity.url)] || curateEntity(entity, true),
}), {});
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => {
const host = urlToHostname(entity.url);
const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true);
return {
...accEntities,
[entity.slug]: curatedEntity,
[host]: curatedEntity,
};
}, {});
return entitiesBySlug;
}
@@ -232,7 +258,7 @@ async function fetchReleaseEntities(baseReleases) {
const entitySlugs = Array.from(new Set(
baseReleasesWithoutEntity
.map((baseRelease) => urlToSiteSlug(baseRelease.url))
.map((baseRelease) => urlToHostname(baseRelease.url))
.filter(Boolean),
));
@@ -409,5 +435,6 @@ module.exports = {
getRecursiveParent,
searchEntities,
flushEntities,
urlToSiteSlug,
urlToHostname,
// urlToSiteSlug,
};