Added support for RedGifs and Reddit albums. Improved command line logger. Added rate limiters for reddit and host URLs.

This commit is contained in:
2021-12-17 02:04:25 +01:00
parent 196f318316
commit 66e5bfa50a
33 changed files with 586 additions and 256 deletions

View File

@@ -7,21 +7,21 @@ const Promise = require('bluebird');
const exiftool = require('node-exiftool');
const exiftoolBin = require('dist-exiftool');
const cron = require('node-cron');
const { format } = require('date-fns');
require('array.prototype.flatten').shim();
const reddit = new Snoowrap(config.reddit.api);
const args = require('./cli.js')();
const args = require('./cli')();
const logger = require('./logger')(__filename);
const dissectLink = require('./dissectLink.js');
const curatePosts = require('./curate/posts.js');
const dissectLink = require('./dissectLink');
const curatePosts = require('./curate/posts');
const { attachContentInfo, getInfo } = require('./fetch/info.js');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js');
const { attachContentInfo, getInfo } = require('./fetch/info');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content');
const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
const getPosts = require('./sources/getPosts')(reddit, args);
const getUserPosts = require('./sources/getUserPosts')(reddit, args);
async function getFileContents(location, label) {
try {
@@ -29,7 +29,7 @@ async function getFileContents(location, label) {
return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#');
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);
logger.error(`Could not read ${label} file '${location}': ${error}.`);
return [];
}
@@ -120,13 +120,13 @@ async function initApp() {
await ep.close();
if (args.watch) {
console.log(`[${format(new Date(), 'YYYY-MM-DD HH:mm:ss')}] Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
logger.info(`Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
}
} catch (error) {
if (args.debug) {
console.log('\x1b[31m%s\x1b[0m', error.stack);
logger.error(error.stack);
} else {
console.log('\x1b[31m%s\x1b[0m', error.message);
logger.error(error.message);
}
}
}

View File

@@ -2,18 +2,20 @@
const config = require('config');
const archives = require('./archives.js');
const logger = require('../logger')(__filename);
const archives = require('./archives');
function getArchivePostIds(username, exclude) {
console.log(`Finding archived posts for '${username}'...`);
logger.info(`Finding archived posts for '${username}'...`);
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username))).then(postIds => postIds.flatten()).then(postIds => {
return exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds;
}).then(postIds => {
console.log(`Found ${postIds.length} unique archived posts for user '${username}'`);
return Promise.all(config.fetch.archives.reddit.map(source => archives[source](username)))
.then(postIds => postIds.flatten())
.then(postIds => (exclude ? postIds.filter(postId => !exclude.includes(postId)) : postIds))
.then((postIds) => {
logger.info(`Found ${postIds.length} unique archived posts for user '${username}'`);
return postIds;
});
};
return postIds;
});
}
module.exports = getArchivePostIds;

View File

@@ -6,6 +6,22 @@ const yargs = require('yargs');
function getArgs() {
const args = yargs
.command('npm start -- --user <username>')
.option('log-level', {
alias: 'level',
describe: 'CLI log verbosity',
type: 'string',
default: config.logger.level,
})
.option('interval', {
describe: 'Minimum wait time between HTTP requests',
type: 'number',
default: config.limiter.interval,
})
.option('concurrency', {
describe: 'Maximum HTTP requests pending at the same time',
type: 'number',
default: config.limiter.concurrency,
})
.option('users', {
alias: 'user',
describe: 'Reddit usernames to fetch posts from',

View File

@@ -7,43 +7,39 @@ const dissectLink = require('../dissectLink.js');
const hashPost = require('./hashPost.js');
const { isAfter, isBefore, isEqual } = require('date-fns');
const logger = require('../logger')(__filename);
function report(curatedPosts, indexed, user, args) {
const {
indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored,
indexedUpdated, tooOldCount, tooRecentCount, beforeIndexedCount, afterIndexedCount, requestedIgnored, duplicates,
} = curatedPosts;
if (indexedUpdated.length > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
logger.info(`Ignoring ${indexedUpdated.length} indexed posts for '${user.name}'`);
}
if (requestedIgnored.length > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`,
);
logger.info(`Ignoring ${requestedIgnored.length} posts because their IDs are specified to be ignored for '${user.name}'`);
}
if (tooOldCount > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
logger.info(`Ignoring ${tooOldCount} older posts for '${user.name}' for specified date limit '${args.after}'`);
}
if (tooRecentCount > 0) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`);
logger.info(`Ignoring ${tooRecentCount} newer posts for '${user.name}' for specified date limit '${args.before}'`);
}
if (beforeIndexedCount > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`,
);
logger.info(`Ignoring ${beforeIndexedCount} posts older than the ${args.afterIndexed} indexed post (${indexed[args.afterIndexed].id}, ${indexed[args.afterIndexed].date}) for '${user.name}'`);
}
if (afterIndexedCount > 0) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`,
);
logger.info(`Ignoring ${afterIndexedCount} posts newer than the ${args.beforeIndexed} indexed post (${indexed[args.beforeIndexed].id}, ${indexed[args.beforeIndexed].date}) for '${user.name}'`);
}
if (duplicates.length > 0) {
logger.info(`Ignoring ${duplicates.length} duplicate posts for '${user.name}'`);
}
}
@@ -103,7 +99,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
const ignoring = args.ignore ? args.ignore.find(prop => post[prop]) : null;
if (ignoring) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring ${ignoring} post '${post.title}' (${permalink})`);
logger.verbose(`Ignoring ${ignoring} post '${post.title}' (${permalink})`);
return acc;
}
@@ -117,21 +113,15 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
}
if (hostIncludes || hostExcluded) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`,
);
logger.info(`Ignoring source '${host.label}' from post '${post.url}' (${permalink})`);
return acc;
}
if (config.fetch.avoidDuplicates && processed.has(host.id)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`,
);
logger.verbose(`Ignoring duplicate content '${post.url}' (cross-post, repost or superfluous --post ID) (${permalink})`);
return acc;
return { ...acc, duplicates: [...acc.duplicates, curatedPost] };
}
processed.add(host.id);
@@ -139,6 +129,7 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
return {
...acc,
processed,
posts: [
...acc.posts,
curatedPost,
@@ -161,6 +152,8 @@ const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts
posts: [],
indexedUpdated: [],
requestedIgnored: [],
duplicates: [],
duplicateCount: 0,
tooOldCount: 0,
tooRecentCount: 0,
beforeIndexedCount: 0,

View File

@@ -18,6 +18,11 @@ const hosts = [
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
},
{
method: 'redditAlbum',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/gallery/:id'),
},
{
method: 'redditVideo',
label: 'reddit',
@@ -53,6 +58,11 @@ const hosts = [
label: 'gfycat',
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
},
{
method: 'redgifs',
label: 'redgifs',
pattern: new UrlPattern('http(s)\\://(:subdomain.)redgifs.com(/watch)/(:id-mobile)(:id)(.:ext)(?*)'),
},
{
method: 'erome',
label: 'erome',
@@ -139,7 +149,5 @@ module.exports = function dissectLink(url) {
}
}
console.log(url);
return null;
};

View File

@@ -4,14 +4,15 @@ const config = require('config');
const Promise = require('bluebird');
const yaml = require('js-yaml');
const saveProfileDetails = require('../save/profileDetails.js');
const fetchItem = require('./item.js');
const interpolate = require('../interpolate.js');
const save = require('../save/save.js');
// const textToStream = require('../save/textToStream.js');
const saveMeta = require('../save/meta.js');
const mux = require('../save/mux.js');
const writeToIndex = require('../save/writeToIndex.js');
const logger = require('../logger')(__filename);
const saveProfileDetails = require('../save/profileDetails');
const fetchItem = require('./item');
const interpolate = require('../interpolate');
const save = require('../save/save');
// const textToStream = require('../save/textToStream');
const saveMeta = require('../save/meta');
const mux = require('../save/mux');
const writeToIndex = require('../save/writeToIndex');
function curateComments(comments) {
return comments.map((comment) => {
@@ -107,6 +108,10 @@ async function fetchSaveUserContent(user, ep, args) {
const hashes = new Set(user.indexed.original.map(item => item.hash));
const posts = await Promise.map(user.posts, async (post) => {
if (!post.content) {
return null;
}
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const buffers = await getBuffers(item, post, post.host);
@@ -118,10 +123,7 @@ async function fetchSaveUserContent(user, ep, args) {
// prevent duplicates
if (config.fetch.avoidDuplicates && hashes.has(buffers[0].hash)) {
console.log(
'\x1b[33m%s\x1b[0m',
`Ignoring duplicate file '${post.url}' (${post.permalink})`,
);
logger.verbose(`Ignoring duplicate file '${post.url}' (${post.permalink})`);
return buffers[0].hash;
}
@@ -148,12 +150,12 @@ async function fetchSaveUserContent(user, ep, args) {
concurrency: config.fetch.concurrency,
});
return writeToIndex(posts, profilePaths, user, args);
return writeToIndex(posts.filter(Boolean), profilePaths, user, args);
}
async function fetchSaveDirectContent(content, host, ep) {
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
console.log(`Fetching and saving '${host.url}'`);
logger.info(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index };
const buffers = await getBuffers(item, null, host);

View File

@@ -3,7 +3,8 @@
const config = require('config');
const Promise = require('bluebird');
const methods = require('../methods/methods.js');
const logger = require('../logger')(__filename);
const methods = require('../methods/methods');
const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users), async (accUsers, user) => ({
...accUsers,
@@ -11,7 +12,7 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
...user,
posts: await Promise.reduce(user.posts, async (accPosts, post) => {
if (!post.host || !methods[post.host.method]) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${post.url}' (${post.permalink})`);
logger.warn(`Ignoring unsupported content '${post.url}' (${post.permalink})`);
return accPosts;
}
@@ -25,10 +26,10 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
},
];
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
logger.warn(`${error.message} (${post.permalink})`);
if (config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
return [
...accPosts,
@@ -53,7 +54,7 @@ async function getInfo(host, reddit, url) {
return info;
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`);
logger.verbose(`Ignoring unsupported content '${url}'`);
return null;
}

View File

@@ -4,13 +4,14 @@ const config = require('config');
const bhttp = require('bhttp');
const blake2 = require('blake2');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').items;
async function fetchItem(url, attempt, post, host) {
async function retry(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
logger.warn(`Failed to fetch '${url}', ${attempt < config.fetch.retries ? 'retrying' : 'giving up'}: ${error.message} (${post ? post.permalink : 'no post'})`);
if (attempt < config.fetch.retries) {
console.log('Retrying...');
return fetchItem(url, attempt + 1, post);
}
@@ -18,13 +19,17 @@ async function fetchItem(url, attempt, post, host) {
}
try {
const res = await bhttp.get(url);
const res = await limiter.schedule(async () => bhttp.get(url));
if (!res.statusCode === 200) {
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
}
console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`);
if (!Buffer.isBuffer(res.body)) {
throw new Error(`Unexpected response for '${url}' (${res.status}): ${res.body}`);
}
logger.debug(`Fetched '${host ? host.url : url}' (${post ? post.permalink : 'no post'})`);
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(res.body);
@@ -32,7 +37,6 @@ async function fetchItem(url, attempt, post, host) {
return Object.assign(res.body, { hash: contentHash });
} catch (error) {
console.log(error);
return retry(error);
}
}

19
src/limiter.js Normal file
View File

@@ -0,0 +1,19 @@
'use strict';
const Bottleneck = require('bottleneck');
const args = require('./cli')();
module.exports = {
reddit: new Bottleneck({
reservoir: 30,
reservoirRefreshAmount: 30,
reservoirRefreshInterval: 60000,
maxConcurrent: 1,
minTime: 100,
}),
items: new Bottleneck({
maxConcurrent: args.concurrency,
minTime: args.interval,
}),
};

26
src/logger.js Normal file
View File

@@ -0,0 +1,26 @@
'use strict';
const winston = require('winston');
const args = require('./cli.js')();
const logger = winston.createLogger({
level: args.logLevel,
transports: [
new winston.transports.Console({
level: args.logLevel,
format: winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
winston.format.colorize(),
winston.format.printf(info => `${info.timestamp} ${info.level}: ${info.message}`),
),
timestamp: true,
}),
],
});
function getLogger() {
return logger;
}
module.exports = getLogger;

View File

@@ -4,6 +4,8 @@ const config = require('config');
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const logger = require('../logger')(__filename);
const base = 'https://www.erome.com/';
async function erome(host, post) {
@@ -50,7 +52,7 @@ async function erome(host, post) {
const extract = config.library.extractSingleAlbumItem && (items.length === 1);
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
}
return {

View File

@@ -1,13 +1,28 @@
'use strict';
const fetch = require('node-fetch');
const bhttp = require('bhttp');
const redgifs = require('./redgifs');
async function gfycat(host) {
const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.json();
const res = await bhttp.get(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.body;
if (data.error) {
throw new Error(data.error);
if (data.errorMessage) {
const redirectRes = await bhttp.head(host.url, {
followRedirects: false,
});
if (redirectRes.statusCode === 301) {
// Gfycat redirects all NSFW gifs to RedGifs, likely the case
return redgifs({
...host,
url: `https://www.redgifs.com/watch/${host.id}`,
method: 'redgifs',
label: 'redgifs',
});
}
throw new Error(`Gfycat API returned error for source '${host.url}' (${res.status}): ${data.errorMessage}`);
}
return {

View File

@@ -2,7 +2,8 @@
const config = require('config');
const fetch = require('node-fetch');
// const mime = require('mime-types');
const logger = require('../logger')(__filename);
async function imgurAlbumApi(host, post) {
const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, {
@@ -20,7 +21,7 @@ async function imgurAlbumApi(host, post) {
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
}
return {
@@ -45,46 +46,4 @@ async function imgurAlbumApi(host, post) {
};
}
/*
* as of late 2019, imgur requires log in to view albums and gallery images
async function imgurAlbum(host, post) {
const res = await fetch(`https://imgur.com/a/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not fetch info for direct imgur album '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurAlbumApi(post);
}
throw new Error(`Could not fetch info for imgur album '${post.host.id}' (${res.statusText}) no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
const extract = config.library.album.extractSingleItem && data.album_images.images.length === 1;
return {
album: extract ? null : {
id: data.id,
url: `https://imgur.com/a/${post.host.id}`,
title: data.title,
description: data.description,
datetime: new Date(data.datetime),
},
items: data.album_images.images.map(item => ({
extracted: extract,
id: item.hash,
url: data.animated ? `https://i.imgur.com/${item.hash}.mp4` : `https://i.imgur.com/${item.hash}${item.ext}`,
title: item.title || (extract ? data.title : null),
description: item.description || (extract ? data.description : null),
type: item.animated ? 'video/mp4' : mime.lookup(item.ext.split('?')[0]),
datetime: new Date(item.datetime),
})),
};
}
*/
module.exports = imgurAlbumApi;

View File

@@ -10,6 +10,10 @@ async function imgurImageApi(host) {
},
});
if (res.status !== 200) {
throw new Error(`Imgur API returned HTTP ${res.status} for source '${host.url}'`);
}
const { data } = await res.json();
if (res.status !== 200) {
@@ -32,37 +36,6 @@ async function imgurImageApi(host) {
async function imgurImage(host, post) {
return imgurImageApi(host, post);
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurImageApi(post);
}
throw new Error(`Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
return {
album: null,
items: [{
id: data.hash,
url: data.animated ? `https://i.imgur.com/${post.host.id}.mp4` : `https://i.imgur.com/${post.host.id}${data.ext}`,
title: data.title,
description: data.description,
type: data.animated ? 'video/mp4' : data.mimetype,
datetime: new Date(data.timestamp || data.datetime),
}],
};
*/
}
module.exports = imgurImage;

View File

@@ -7,8 +7,10 @@ const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage');
const redditImage = require('./redditImage');
const redditAlbum = require('./redditAlbum');
const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo');
const redgifs = require('./redgifs');
const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage');
@@ -23,8 +25,10 @@ module.exports = {
imgurAlbum,
imgurImage,
redditImage,
redditAlbum,
redditPreview,
redditVideo,
redgifs,
self,
tube,
vidbleAlbum,

View File

@@ -0,0 +1,32 @@
'use strict';
const mime = require('mime');
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
async function redditAlbum(host, post) {
const res = await bhttp.get(host.url);
if (res.statusCode !== 200) {
throw new Error(res.body.toString());
}
const { document } = new JSDOM(res.body.toString(), { runScripts: 'dangerously' }).window;
const items = Array.from(document.querySelectorAll('li a'), el => el.href);
return {
album: {
id: host.id,
url: host.url,
title: post.title,
},
items: items.map(url => ({
id: new URL(url).pathname.match(/\/(.*).jpg/)[1],
url,
datetime: post.datetime,
type: mime.getType(url) || 'image/jpeg',
})),
};
}
module.exports = redditAlbum;

View File

@@ -1,6 +1,6 @@
'use strict';
const mime = require('mime-types');
const mime = require('mime');
async function redditImage(host, post) {
return {
@@ -10,7 +10,7 @@ async function redditImage(host, post) {
url: post.url,
title: post.title,
datetime: post.datetime,
type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg',
type: mime.getType(post.url) || 'image/jpeg',
original: post,
}],
};

69
src/methods/redgifs.js Normal file
View File

@@ -0,0 +1,69 @@
'use strict';
const fetch = require('node-fetch');
const mime = require('mime');
function scrapeGallery(data) {
const oldestDate = Math.min(...data.gifs.map(gif => gif.createDate));
return {
album: {
id: data.id,
datetime: new Date(oldestDate * 1000),
},
items: data.gifs.map(gif => ({
id: gif.id,
url: gif.urls.hd,
description: gif.tags.join(', '),
type: mime.getType(gif.urls.hd),
datetime: new Date(gif.createDate * 1000),
original: gif,
})),
};
}
async function fetchGallery(galleryId) {
const res = await fetch(`https://api.redgifs.com/v2/gallery/${galleryId}`);
const data = await res.json();
if (!data.gifs) {
return null;
}
return scrapeGallery(data);
}
async function redgifs(host) {
const res = await fetch(`https://api.redgifs.com/v2/gifs/${host.id.toLowerCase()}`);
const data = await res.json();
if (data.errorMessage) {
throw new Error(`RedGifs API returned error for source '${host.url}' (${res.status}): ${data.errorMessage.description}`);
}
if (data.id && data.gifs) {
return scrapeGallery(data);
}
if (!data.gif) {
return null;
}
if (data.gif.gallery) {
return fetchGallery(data.gif.gallery);
}
return {
album: null,
items: [{
id: data.gif.id,
url: data.gif.urls.hd,
description: data.gif.tags.join(', '),
type: mime.getType(data.gif.urls.hd),
datetime: new Date(data.gif.createDate * 1000),
original: data.gif,
}],
};
}
module.exports = redgifs;

View File

@@ -3,6 +3,8 @@
const youtubedl = require('youtube-dl');
const dateFns = require('date-fns');
const logger = require('../logger')(__filename);
async function tube(host, post) {
try {
const data = await new Promise((resolve, reject) => {
@@ -15,7 +17,7 @@ async function tube(host, post) {
});
});
host.id = data.display_id;
host.id = data.display_id; // eslint-disable-line no-param-reassign
return {
album: null,
@@ -32,7 +34,7 @@ async function tube(host, post) {
],
};
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring possible profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
logger.warn(`Ignoring possible image or profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
return null;
}

View File

@@ -6,6 +6,8 @@ const UrlPattern = require('url-pattern');
const cheerio = require('cheerio');
const mime = require('mime-types');
const logger = require('../logger')(__filename);
const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)');
async function vidbleAlbum(host, post) {
@@ -23,7 +25,7 @@ async function vidbleAlbum(host, post) {
const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`);
logger.verbose(`Extracting single item from album '${post.title}' - ${res.link}`);
}
return {
@@ -43,7 +45,7 @@ async function vidbleAlbum(host, post) {
id,
url: `https://vidble.com/${id}.${components.ext}`,
type: mimetype,
datetime: post.datetime,
datetime: post ? post.datetime : null,
};
}),
};

View File

@@ -1,9 +1,11 @@
'use strict';
const logger = require('../logger')(__filename);
async function saveMeta(filepath, meta, ep) {
await ep.writeMetadata(filepath, meta, ['overwrite_original']);
console.log('\x1b[36m%s\x1b[0m', `Wrote metadata to '${filepath}'`);
logger.debug(`Wrote metadata to '${filepath}'`);
}
module.exports = saveMeta;

View File

@@ -3,24 +3,25 @@
const ffmpeg = require('fluent-ffmpeg');
const fs = require('fs-extra');
function mux(target, sources, item) {
return new Promise((resolve, reject) => {
return sources.reduce((acc, source) => {
return acc.input(source);
}, ffmpeg()).videoCodec('copy').audioCodec('copy').on('start', cmd => {
console.log('\x1b[36m%s\x1b[0m', `Muxing ${sources.length} streams to '${target}'`);
}).on('end', (stdout) => {
console.log('\x1b[32m%s\x1b[0m', `Muxed and saved '${target}'`);
const logger = require('../logger')(__filename);
function mux(target, sources) {
return new Promise((resolve, reject) => sources.reduce((acc, source) => acc.input(source), ffmpeg())
.videoCodec('copy')
.audioCodec('copy')
.on('start', () => {
logger.verbose(`Muxing ${sources.length} streams to '${target}'`);
})
.on('end', (stdout) => {
logger.verbose(`Muxed and saved '${target}'`);
resolve(stdout);
}).on('error', error => reject).save(target);
}).then(() => {
return Promise.all(sources.map(source => {
return fs.remove(source);
})).then(() => {
console.log('\x1b[36m%s\x1b[0m', `Cleaned up temporary files for '${target}'`);
});
});
};
})
.on('error', () => reject)
.save(target))
.then(() => Promise.all(sources.map(source => fs.remove(source))).then(() => {
logger.verbose(`Cleaned up temporary files for '${target}'`);
}));
}
module.exports = mux;

View File

@@ -4,14 +4,15 @@ const config = require('config');
const Promise = require('bluebird');
const UrlPattern = require('url-pattern');
const interpolate = require('../interpolate.js');
const fetchItem = require('../fetch/item.js');
// const textToStream = require('./textToStream.js');
const save = require('./save.js');
const interpolate = require('../interpolate');
const fetchItem = require('../fetch/item');
// const textToStream = require('./textToStream');
const save = require('./save');
const logger = require('../logger')(__filename);
async function saveProfileImage(user, args) {
if (!args.redownloadProfile && user.indexed.profile.image) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring already present profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
return user.indexed.profile.image;
}
@@ -20,7 +21,7 @@ async function saveProfileImage(user, args) {
const image = user.profile ? user.profile.image : user.image;
if (config.library.profile.avoidAvatar && new UrlPattern('http(s)\\://(www.)redditstatic.com/avatars/:id(.:ext)(?:query)').match(image)) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring standard avatar profile image for '${user.name}' (https://reddit.com/user/${user.name})`);
return null;
}
@@ -44,7 +45,7 @@ async function saveProfileImage(user, args) {
return targets[0];
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
logger.warn(`Could not save profile image for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
return null;
}
@@ -55,7 +56,7 @@ async function saveProfileImage(user, args) {
async function saveProfileDescription(user, args) {
if (!args.redownloadProfile && user.indexed.profile.description) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`Ignoring already present profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
return user.indexed.profile.description;
}
@@ -70,13 +71,13 @@ async function saveProfileDescription(user, args) {
return targets[0];
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
logger.error(`Could not save profile description for '${user.name}': ${error} (https://reddit.com/user/${user.name})`);
return null;
}
}
console.log('\x1b[33m%s\x1b[0m', `No profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
logger.verbose(`No profile description for '${user.name}' (https://reddit.com/user/${user.name})`);
return null;
}

View File

@@ -4,6 +4,7 @@ const config = require('config');
const fs = require('fs-extra');
const path = require('path');
const truncate = require('../utils/truncate-bytes');
const logger = require('../logger')(__filename);
function limitPathElement(element, limit) {
return element.split('/').map((component) => {
@@ -30,48 +31,14 @@ async function writeBufferToFile(target, buffer, item) {
await fs.writeFile(target, buffer);
if (item && item.mux) {
console.log(`Temporarily saved '${target}', queued for muxing`);
logger.debug(`Temporarily saved '${target}', queued for muxing`);
} else {
console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`);
logger.verbose(`Saved '${target}'`);
}
return target;
}
/*
async function pipeStreamToFile(target, stream, item) {
const file = fs.createWriteStream(target);
return new Promise((resolve, reject) => {
stream.pipe(file);
stream.on('error', reject);
stream.on('end', () => {
if (item && item.mux) {
console.log(`Temporarily saved '${target}', queued for muxing`);
} else {
console.log('\x1b[32m%s\x1b[0m', `Saved '${target}'`);
}
resolve(target);
});
});
}
async function save(requestedFilepath, streamOrStreams, item) {
const pathElements = getPathElements(requestedFilepath);
const streams = [].concat(streamOrStreams); // allow for single stream argument
await fs.ensureDir(pathElements.dir);
return Promise.all(streams.map((stream, index) => {
const target = path.join(pathElements.root, pathElements.dir, `${pathElements.name}${streams.length > 1 ? `-${index}` : ''}${pathElements.ext}`);
return pipeStreamToFile(target, stream, item);
}));
}
*/
async function save(requestedFilepath, bufferOrBuffers, item) {
const pathElements = getPathElements(requestedFilepath);
const buffers = [].concat(bufferOrBuffers); // allow for single stream argument

View File

@@ -6,6 +6,7 @@ const yaml = require('js-yaml');
const interpolate = require('../interpolate');
// const textToStream = require('./textToStream');
const save = require('./save');
const logger = require('../logger')(__filename);
async function writeToIndex(posts, profilePaths, user, args) {
const filepath = interpolate(config.library.index.file, null, null, null, null, user, false);
@@ -44,8 +45,14 @@ async function writeToIndex(posts, profilePaths, user, args) {
return false;
}
// return save(filepath, textToStream(yaml.safeDump(data)));
return save(filepath, Buffer.from(yaml.safeDump(data), 'utf8'));
try {
const yamlIndex = yaml.safeDump(data);
return save(filepath, Buffer.from(yamlIndex, 'utf8'));
} catch (error) {
logger.error(`Could not save index for ${user.username}: ${error.message}`);
return null;
}
}
module.exports = writeToIndex;

View File

@@ -4,7 +4,8 @@ const config = require('config');
const fs = require('fs-extra');
const yaml = require('js-yaml');
const interpolate = require('../interpolate.js');
const logger = require('../logger')(__filename);
const interpolate = require('../interpolate');
async function getIndex(user) {
const indexFilePath = interpolate(config.library.index.file, null, null, null, null, user, false);
@@ -14,7 +15,7 @@ async function getIndex(user) {
return yaml.safeLoad(indexFile);
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `No index file found for '${user.name}' at '${indexFilePath}'`);
logger.info(`No index file found for '${user.name}' at '${indexFilePath}'`);
return { profile: { image: null, description: null }, posts: [] };
}

View File

@@ -5,33 +5,38 @@ const Promise = require('bluebird');
const getIndex = require('./getIndex.js');
const curateUser = require('../curate/user.js');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').reddit;
async function getUser(username, reddit) {
try {
const user = await reddit.getUser(username).fetch();
const user = await limiter.schedule(async () => reddit.getUser(username).fetch());
return curateUser(user);
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return {
name: username,
fallback: true,
};
}
};
}
const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
return Promise.reduce(postIds, (accUserPosts, postId) => Promise.resolve().then(async () => {
const post = await reddit
.getSubmission(postId)
.fetch();
const post = await limiter.schedule(async () => reddit.getSubmission(postId).fetch());
post.direct = true;
if (accUserPosts[post.author.name]) {
accUserPosts[post.author.name].posts = accUserPosts[post.author.name].posts.concat(post);
return accUserPosts;
return {
...accUserPosts,
[post.author.name]: {
...accUserPosts[post.author.name],
posts: [...accUserPosts[post.author.name].posts, post],
},
};
}
// don't attempt to fetch deleted user

View File

@@ -6,15 +6,18 @@ const getIndex = require('./getIndex.js');
const getArchivePostIds = require('../archives/getArchivePostIds.js');
const curateUser = require('../curate/user.js');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').reddit;
async function getUser(username, reddit) {
try {
const user = await reddit.getUser(username).fetch();
const user = await limiter.schedule(async () => reddit.getUser(username).fetch());
console.log(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`);
logger.info(`Fetched user profile for '${username}' (https://reddit.com/user/${username})`);
return curateUser(user);
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.error(`Failed to fetch reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return {
name: username,
@@ -25,18 +28,18 @@ async function getUser(username, reddit) {
async function getPosts(username, reddit, args) {
try {
const submissions = await reddit
const submissions = await limiter.schedule(async () => reddit
.getUser(username)
.getSubmissions({
sort: args.sort,
limit: Infinity,
});
}));
console.log(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);
logger.info(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);
return submissions;
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
logger.warn(`Failed to fetch posts from reddit user '${username}': ${error.message} (https://reddit.com/user/${username})`);
return [];
}
@@ -45,7 +48,7 @@ async function getPosts(username, reddit, args) {
async function getArchivedPosts(username, posts, reddit) {
const postIds = await getArchivePostIds(username, posts.map(post => post.id));
return Promise.all(postIds.map(postId => reddit.getSubmission(postId).fetch()));
return Promise.all(postIds.map(postId => limiter.schedule(async () => reddit.getSubmission(postId).fetch())));
}
function getUserPostsWrap(reddit, args) {
@@ -78,12 +81,12 @@ function getUserPostsWrap(reddit, args) {
return null;
} catch (error) {
console.log(username, error);
logger.error(`Failed to fetch posts from 'username': ${error.message}`);
return null;
}
}, {
concurrency: 5,
concurrency: 10,
});
return users.reduce(
@@ -97,7 +100,7 @@ function getUserPostsWrap(reddit, args) {
{},
);
} catch (error) {
console.log(error);
logger.error(error);
throw error;
}