Added include and exclude source arguments. Improved stream fetch failure handling and improved logging details.
This commit is contained in:
parent
77e12d3c0f
commit
068ffbdbd8
|
@ -41,6 +41,8 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d
|
|||
* `--limit <number>`: Maximum amount posts per user to fetch content from. Limit is applied after fltering out ignored, cross- and reposts.
|
||||
* `--sort <method>`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
|
||||
* `--ignore <prop> [<prop>...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`.
|
||||
* `--exclude <source> [<source>...]`: Do not include posts from these sources (e.g. `self`, `reddit`, `imgur`, `gfycat`, ...). Should not be used in combination with `--include`.
|
||||
* `--include <source> [<source>...]`: Only include posts from these sources (e.g. `self`, `reddit`, `imgur`, `gfycat`, ...). Should not be used in combination with `--exclude`.
|
||||
|
||||
### Examples
|
||||
* `npm start -- --user AWildSketchAppeared`
|
||||
|
|
|
@ -34,6 +34,7 @@ module.exports = {
|
|||
sort: 'new',
|
||||
limit: 1000,
|
||||
avoidDuplicates: true,
|
||||
retries: 3,
|
||||
archives: {
|
||||
search: false,
|
||||
preview: true,
|
||||
|
|
|
@ -38,7 +38,7 @@ Promise.resolve().then(() => {
|
|||
|
||||
return userPosts;
|
||||
}).then(posts => {
|
||||
return curatePosts(posts, args.ignore).slice(0, args.limit);
|
||||
return curatePosts(posts, args).slice(0, args.limit);
|
||||
}).then(posts => {
|
||||
return attachContentInfo(posts);
|
||||
}).then(posts => {
|
||||
|
|
|
@ -23,6 +23,12 @@ module.exports = yargs.command('npm start -- --user <username>').option('users',
|
|||
describe: 'Ignore posts with any of these properties',
|
||||
type: 'array',
|
||||
choices: ['pinned', 'stickied', 'hidden', 'spoiler', 'over_18']
|
||||
}).option('include', {
|
||||
describe: 'Include only these sources',
|
||||
type: 'array'
|
||||
}).option('exclude', {
|
||||
describe: 'Do not include these sources',
|
||||
type: 'array'
|
||||
}).option('archives', {
|
||||
describe: 'Search archives for deleted posts',
|
||||
type: 'boolean'
|
||||
|
|
|
@ -3,12 +3,14 @@
|
|||
const config = require('config');
|
||||
const dissectLink = require('../dissectLink.js');
|
||||
|
||||
function curatePosts(posts, ignore) {
|
||||
function curatePosts(posts, args) {
|
||||
const processed = new Set();
|
||||
|
||||
return posts.reduce((acc, post, index) => {
|
||||
const host = dissectLink(post.url);
|
||||
const ignoring = ignore ? ignore.find(prop => {
|
||||
post.permalink = 'https://reddit.com' + post.permalink;
|
||||
|
||||
const ignoring = args.ignore ? args.ignore.find(prop => {
|
||||
return post[prop];
|
||||
}) : null;
|
||||
|
||||
|
@ -25,6 +27,12 @@ function curatePosts(posts, ignore) {
|
|||
return acc;
|
||||
}
|
||||
|
||||
if((args.include && !args.include.includes(host.label)) || (args.exclude && args.exclude.includes(host.label))) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring source '${host.label}' from post '${post.url}' (${post.permalink})`);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
processed.add(host.id);
|
||||
}
|
||||
|
||||
|
@ -34,7 +42,7 @@ function curatePosts(posts, ignore) {
|
|||
title: post.title,
|
||||
text: post.selftext,
|
||||
user: post.user,
|
||||
permalink: 'https://reddit.com' + post.permalink,
|
||||
permalink: post.permalink,
|
||||
url: post.url,
|
||||
datetime: new Date(post.created_utc * 1000),
|
||||
subreddit: post.subreddit.display_name,
|
||||
|
|
|
@ -32,9 +32,15 @@ module.exports = function(posts) {
|
|||
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
|
||||
|
||||
return Promise.all(sources.map(source => {
|
||||
return fetchItem(source, 0);
|
||||
})).then(streams => Object.assign({}, item, {streams}));
|
||||
})).then(items => {
|
||||
return fetchItem(source, 0, post);
|
||||
})).then(streams => {
|
||||
if(streams.filter(stream => stream).length > 0) {
|
||||
Object.assign({}, item, {streams})
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
})).then(items => items.filter(item => item)).then(items => {
|
||||
return Promise.all(items.map(item => {
|
||||
const type = item.type.split('/')[0];
|
||||
const filepath = post.content.album ? interpolate(config.library.album[type], post.user, post, item) : interpolate(config.library[type], post.user, post, item);
|
||||
|
@ -42,7 +48,7 @@ module.exports = function(posts) {
|
|||
return Promise.resolve().then(() => {
|
||||
return fs.ensureDir(path.dirname(filepath));
|
||||
}).then(() => {
|
||||
return save(filepath, item.streams || item.stream, item);
|
||||
return save(filepath, item.streams || item.stream, item, post);
|
||||
}).then(sourcePaths => {
|
||||
if(item.mux) {
|
||||
return mux(filepath, sourcePaths, item);
|
||||
|
|
|
@ -1,22 +1,29 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const fetch = require('node-fetch');
|
||||
|
||||
function fetchItem(url, attempt) {
|
||||
function fetchItem(url, attempt, post) {
|
||||
function retry(error) {
|
||||
console.log(error);
|
||||
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post.permalink})`);
|
||||
|
||||
if(attempt < 3) {
|
||||
if(attempt < config.fetch.retries) {
|
||||
console.log('Retrying...');
|
||||
|
||||
return fetchItem(url, ++attempt);
|
||||
return fetchItem(url, ++attempt, post);
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
return fetch(url).then(res => {
|
||||
return res.ok ? res : Promise.reject(`Failed to fetch ${url}`);
|
||||
if(!res.ok) {
|
||||
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
|
||||
}
|
||||
|
||||
return res;
|
||||
}).then(res => {
|
||||
console.log(`Fetched '${url}'`);
|
||||
console.log(`Fetched '${url}' (${post.permalink})`);
|
||||
|
||||
return res.body;
|
||||
}).catch(retry);
|
||||
|
|
|
@ -4,7 +4,7 @@ const fs = require('fs-extra');
|
|||
const path = require('path');
|
||||
const ffmpeg = require('fluent-ffmpeg');
|
||||
|
||||
function save(filepath, streams, item) {
|
||||
function save(filepath, streams, item, post) {
|
||||
const pathComponents = path.parse(filepath);
|
||||
|
||||
// allow for single stream argument
|
||||
|
|
Loading…
Reference in New Issue