Removed HTML scraper from imgur module, no longer support due to login restriction. Preserving 'preview' property in index file.

This commit is contained in:
Niels Simenon 2019-10-31 05:22:07 +01:00
parent cd0fde24f0
commit db6317e560
7 changed files with 392 additions and 372 deletions

740
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -77,6 +77,7 @@ function fetchSavePosts(userPosts, ep) {
async function initApp() { async function initApp() {
try { try {
const userPosts = await getCompleteUserPosts(); const userPosts = await getCompleteUserPosts();
const ep = new exiftool.ExiftoolProcess(exiftoolBin); const ep = new exiftool.ExiftoolProcess(exiftoolBin);
await ep.open(); await ep.open();

View File

@ -10,7 +10,7 @@ const { isAfter, isBefore, isEqual } = require('date-fns');
function report(curatedPosts, indexed, user, args) { function report(curatedPosts, indexed, user, args) {
const { const {
posts, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored,
} = curatedPosts; } = curatedPosts;
if (indexedUpdated.length > 0) { if (indexedUpdated.length > 0) {
@ -68,7 +68,11 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
}; };
if (indexed.entries.length) { if (indexed.entries.length) {
if (!args.redownload && indexed.entries.find(entry => entry.id === post.id)) { const indexedPost = indexed.entries.find(entry => entry.id === post.id);
if (indexedPost && !args.redownload) {
curatedPost.previewFallback = indexedPost.preview;
return { ...acc, indexedUpdated: [...acc.indexedUpdated, curatedPost] }; return { ...acc, indexedUpdated: [...acc.indexedUpdated, curatedPost] };
} }

View File

@ -19,6 +19,7 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a
return [...accPosts, { ...post, content: await methods[post.host.method](post) }]; return [...accPosts, { ...post, content: await methods[post.host.method](post) }];
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
console.log(error);
if (config.fetch.archives.preview && post.preview) { if (config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);

View File

@ -44,6 +44,10 @@ async function imgurAlbumApi(post) {
} }
async function imgurAlbum(post) { async function imgurAlbum(post) {
return imgurAlbumApi(post);
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/a/${post.host.id}`); const res = await fetch(`https://imgur.com/a/${post.host.id}`);
const html = await res.text(); const html = await res.text();
@ -80,6 +84,7 @@ async function imgurAlbum(post) {
datetime: new Date(item.datetime), datetime: new Date(item.datetime),
})), })),
}; };
*/
} }
module.exports = imgurAlbum; module.exports = imgurAlbum;

View File

@ -29,6 +29,10 @@ async function imgurImageApi(post) {
} }
async function imgurImage(post) { async function imgurImage(post) {
return imgurImageApi(post);
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/${post.host.id}`); const res = await fetch(`https://imgur.com/${post.host.id}`);
const html = await res.text(); const html = await res.text();
@ -56,6 +60,7 @@ async function imgurImage(post) {
datetime: new Date(data.timestamp || data.datetime), datetime: new Date(data.timestamp || data.datetime),
}], }],
}; };
*/
} }
module.exports = imgurImage; module.exports = imgurImage;

View File

@ -10,6 +10,8 @@ async function getUser(username, reddit) {
try { try {
const user = await reddit.getUser(username).fetch(); const user = await reddit.getUser(username).fetch();
console.log(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`);
return curateUser(user); return curateUser(user);
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
@ -28,6 +30,8 @@ async function getPosts(username, reddit, args) {
limit: Infinity, limit: Infinity,
}); });
console.log(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);
return submissions; return submissions;
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`); console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);