2019-03-24 00:29:22 +00:00
|
|
|
'use strict';
|
|
|
|
|
2019-03-25 02:57:33 +00:00
|
|
|
const knex = require('./knex');
|
2020-03-22 02:50:24 +00:00
|
|
|
const slugify = require('./utils/slugify');
|
|
|
|
|
|
|
|
async function matchReleaseTags(releases) {
|
|
|
|
const rawTags = releases
|
|
|
|
.map(release => release.tags).flat()
|
|
|
|
.filter(Boolean);
|
|
|
|
|
2020-03-23 00:43:49 +00:00
|
|
|
const casedTags = [...new Set(
|
2020-03-22 02:50:24 +00:00
|
|
|
rawTags
|
|
|
|
.concat(rawTags.map(tag => tag.toLowerCase()))
|
|
|
|
.concat(rawTags.map(tag => tag.toUpperCase())),
|
2020-03-23 00:43:49 +00:00
|
|
|
)];
|
2020-03-22 02:50:24 +00:00
|
|
|
|
|
|
|
const tagEntries = await knex('tags')
|
|
|
|
.select('tags.id', 'tags.name', 'tags.alias_for')
|
|
|
|
.whereIn('tags.name', casedTags);
|
|
|
|
|
|
|
|
const tagIdsBySlug = tagEntries
|
|
|
|
.reduce((acc, tag) => ({
|
|
|
|
...acc,
|
|
|
|
[slugify(tag.name)]: tag.alias_for || tag.id,
|
|
|
|
}), {});
|
2019-11-11 04:18:28 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
return tagIdsBySlug;
|
2019-11-11 04:18:28 +00:00
|
|
|
}
|
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
async function getSiteTags(releases) {
|
|
|
|
const siteIds = releases.map(release => release.site.id);
|
|
|
|
const siteTags = await knex('sites_tags').whereIn('site_id', siteIds);
|
2020-02-08 01:49:39 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
const siteTagIdsBySiteId = siteTags.reduce((acc, siteTag) => {
|
|
|
|
if (!acc[siteTag.site_id]) {
|
|
|
|
acc[siteTag.site_id] = [];
|
|
|
|
}
|
2019-12-05 00:26:22 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
acc[siteTag.site_id].push(siteTag.tag_id);
|
|
|
|
|
|
|
|
return acc;
|
|
|
|
}, {});
|
2019-12-05 00:26:22 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
return siteTagIdsBySiteId;
|
2019-12-05 00:26:22 +00:00
|
|
|
}
|
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
function buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId) {
|
|
|
|
const tagAssociations = releases
|
|
|
|
.map((release) => {
|
|
|
|
const siteTagIds = siteTagIdsBySiteId[release.site.id];
|
2020-03-23 00:43:49 +00:00
|
|
|
const releaseTags = release.tags || [];
|
2020-03-22 02:50:24 +00:00
|
|
|
|
2020-03-23 00:43:49 +00:00
|
|
|
const releaseTagIds = releaseTags.every(tag => typeof tag === 'number')
|
|
|
|
? releaseTags // obsolete scraper returned pre-matched tags
|
|
|
|
: releaseTags.map(tag => tagIdsBySlug[slugify(tag)]);
|
2020-03-22 02:50:24 +00:00
|
|
|
|
2020-03-28 03:37:04 +00:00
|
|
|
const tags = [...new Set(
|
2020-03-22 02:50:24 +00:00
|
|
|
// filter duplicates and empties
|
|
|
|
releaseTagIds
|
|
|
|
.concat(siteTagIds)
|
|
|
|
.filter(Boolean),
|
2020-03-23 00:43:49 +00:00
|
|
|
)]
|
2020-03-22 02:50:24 +00:00
|
|
|
.map(tagId => ({
|
|
|
|
release_id: release.id,
|
|
|
|
tag_id: tagId,
|
|
|
|
}));
|
2020-03-28 03:37:04 +00:00
|
|
|
|
|
|
|
return tags;
|
2020-03-22 02:50:24 +00:00
|
|
|
})
|
|
|
|
.flat();
|
2020-01-13 22:45:09 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
return tagAssociations;
|
|
|
|
}
|
2020-01-13 22:45:09 +00:00
|
|
|
|
2020-03-28 03:37:04 +00:00
|
|
|
async function filterUniqueAssociations(tagAssociations) {
|
|
|
|
const duplicateAssociations = await knex('releases_tags')
|
|
|
|
.whereIn(['release_id', 'tag_id'], tagAssociations.map(association => [association.release_id, association.tag_id]));
|
2020-01-13 22:45:09 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
const duplicateAssociationsByReleaseIdAndTagId = duplicateAssociations.reduce((acc, association) => {
|
|
|
|
if (!acc[association.release_id]) {
|
|
|
|
acc[association.release_id] = {};
|
|
|
|
}
|
2019-11-16 02:33:36 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
acc[association.release_id][association.tag_id] = true;
|
2019-12-07 03:41:16 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
return acc;
|
|
|
|
}, {});
|
2019-12-07 03:41:16 +00:00
|
|
|
|
2020-03-22 02:50:24 +00:00
|
|
|
const uniqueAssociations = tagAssociations
|
|
|
|
.filter(association => !duplicateAssociationsByReleaseIdAndTagId[association.release_id]?.[association.tag_id]);
|
|
|
|
|
|
|
|
return uniqueAssociations;
|
2019-11-11 04:18:28 +00:00
|
|
|
}
|
|
|
|
|
2020-03-29 02:00:46 +00:00
|
|
|
async function associateReleaseTags(releases) {
|
2020-03-22 02:50:24 +00:00
|
|
|
const tagIdsBySlug = await matchReleaseTags(releases);
|
|
|
|
const siteTagIdsBySiteId = await getSiteTags(releases);
|
|
|
|
|
|
|
|
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, siteTagIdsBySiteId);
|
2020-03-28 03:37:04 +00:00
|
|
|
const uniqueAssociations = await filterUniqueAssociations(tagAssociations);
|
2020-03-22 02:50:24 +00:00
|
|
|
|
|
|
|
await knex('releases_tags').insert(uniqueAssociations);
|
2019-11-11 04:18:28 +00:00
|
|
|
}
|
|
|
|
|
2019-11-11 02:20:00 +00:00
|
|
|
module.exports = {
|
2020-03-29 02:00:46 +00:00
|
|
|
associateReleaseTags,
|
2019-11-11 02:20:00 +00:00
|
|
|
};
|