Ignoring empty interpolated metadata. Added URL query wildcard to imgur and reddit images. Using host ID rather than URL to improve duplicate detection.

This commit is contained in:
2018-04-26 00:22:56 +02:00
parent dce9ef3379
commit 0b9fba7af2
3 changed files with 12 additions and 7 deletions

View File

@@ -7,7 +7,9 @@ function curateSubmissions(submissions) {
const processed = new Set();
return submissions.reduce((acc, submission, index) => {
if(config.fetch.avoidDuplicates && processed.has(submission.url)) {
const host = dissectLink(submission.url);
if(config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring cross-post or repost '${submission.title}' - ${submission.url}`);
return acc;
@@ -23,10 +25,10 @@ function curateSubmissions(submissions) {
url: submission.url,
datetime: new Date(submission.created_utc * 1000),
subreddit: submission.subreddit.display_name,
host: dissectLink(submission.url)
host
};
processed.add(submission.url);
processed.add(host.id);
return acc.concat(curatedSubmission);
}, []);