From 5a3d2dd0300c287832be8c4521b76b3b6e1ffd97 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:54 +0200 Subject: [PATCH] Ignoring empty interpolated metadata. Added URL query wildcard to imgur and reddit images. Using host ID rather than URL to improve duplicate detection. --- src/curate/submissions.js | 8 +++++--- src/dissectLink.js | 4 ++-- src/fetch/content.js | 7 +++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/curate/submissions.js b/src/curate/submissions.js index d9f7583..83b322b 100644 --- a/src/curate/submissions.js +++ b/src/curate/submissions.js @@ -7,7 +7,9 @@ function curateSubmissions(submissions) { const processed = new Set(); return submissions.reduce((acc, submission, index) => { - if(config.fetch.avoidDuplicates && processed.has(submission.url)) { + const host = dissectLink(submission.url); + + if(config.fetch.avoidDuplicates && processed.has(host.id)) { console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`); return acc; @@ -23,10 +25,10 @@ function curateSubmissions(submissions) { url: submission.url, datetime: new Date(submission.created_utc * 1000), subreddit: submission.subreddit.display_name, - host: dissectLink(submission.url) + host }; - processed.add(submission.url); + processed.add(host.id); return acc.concat(curatedSubmission); }, []); diff --git a/src/dissectLink.js b/src/dissectLink.js index b9629ad..3cbaeb1 100644 --- a/src/dissectLink.js +++ b/src/dissectLink.js @@ -13,11 +13,11 @@ const hosts = [{ }, { method: 'reddit', label: 'reddit', - pattern: new urlPattern('https\\://i.reddituploads.com/:id?(fit=:fit)(&h=:height)(&w=:width)(&s=:signature)') + pattern: new urlPattern('https\\://i.reddituploads.com/:id(?*)') }, { method: 'imgurImage', label: 'imgur', - pattern: new urlPattern('http(s)\\://(i.)imgur.com/:id(.:ext)(?:num)') + pattern: new urlPattern('http(s)\\://(i.)imgur.com/:id(.:ext)(?*)') }, { method: 'imgurAlbum', label: 'imgur', diff --git a/src/fetch/content.js b/src/fetch/content.js index 5ac2b04..c110533 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -43,7 +43,11 @@ module.exports = function(posts, user) { return save(filepath, item.stream); }).then(() => { const interpolatedMeta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { - acc[key] = interpolate(value, user, post, item); + const interpolatedValue = interpolate(value, user, post, item); + + if(interpolatedValue) { + acc[key] = interpolatedValue; + } return acc; }, {}); @@ -58,7 +62,6 @@ module.exports = function(posts, user) { }); })); }).then(() => { - console.log('closing...'); return ep.close(); }).catch(error => { return ep.close();