15 changed files with 5660 additions and 2 deletions
-
14.editorconfig
-
20.eslintrc
-
1.gitignore
-
1.nvmrc
-
6config/default.js
-
5242package-lock.json
-
31package.json
-
35src/app.js
-
77src/args.js
-
7src/content/content.js
-
21src/content/redgifs.js
-
10src/feeds/feeds.js
-
51src/feeds/reddit.js
-
39src/logger.js
-
107src/utils/http.js
@ -0,0 +1,14 @@ |
|||
# top-most EditorConfig file |
|||
root = true |
|||
|
|||
# Unix-style newlines with a newline ending every file |
|||
[*] |
|||
end_of_line = lf |
|||
insert_final_newline = true |
|||
indent_style = tab |
|||
indent_size = 4 |
|||
|
|||
# Matches multiple files with brace expansion notation |
|||
# Set default charset |
|||
[*.js] |
|||
charset = utf-8 |
@ -0,0 +1,20 @@ |
|||
{ |
|||
"extends": "airbnb-base", |
|||
"parserOptions": { |
|||
"parser": "babel-eslint", |
|||
"sourceType": "script", |
|||
"ecmaVersion": 2020 |
|||
}, |
|||
"rules": { |
|||
"strict": 0, |
|||
"indent": ["error", "tab"], |
|||
"no-tabs": "off", |
|||
"no-unused-vars": ["error", {"argsIgnorePattern": "^_"}], |
|||
"no-console": 0, |
|||
"no-underscore-dangle": 0, |
|||
"prefer-destructuring": "off", |
|||
"template-curly-spacing": "off", |
|||
"object-curly-newline": "off", |
|||
"max-len": [2, {"code": 300, "tabWidth": 4, "ignoreUrls": true}], |
|||
} |
|||
} |
@ -0,0 +1 @@ |
|||
14.13.0 |
@ -0,0 +1,6 @@ |
|||
module.exports = { |
|||
limits: { |
|||
requestInterval: 1000, |
|||
requestConcurrency: 1, |
|||
}, |
|||
}; |
5242
package-lock.json
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,35 @@ |
|||
'use strict'; |
|||
|
|||
// const util = require('util');
|
|||
const { argv } = require('./args'); |
|||
const feeds = require('./feeds/feeds'); |
|||
const content = require('./content/content'); |
|||
|
|||
async function init() { |
|||
const postsPerFeed = (await Promise.all(Object.keys(feeds).map(async (feedType) => { |
|||
if (argv[feedType]) { |
|||
return Promise.all(argv[feedType].map(async (channelName) => { |
|||
const posts = await feeds[feedType](channelName); |
|||
|
|||
return { |
|||
name: channelName, |
|||
type: feedType, |
|||
posts, |
|||
}; |
|||
})); |
|||
} |
|||
|
|||
return null; |
|||
}))).flat().filter(Boolean); |
|||
|
|||
if (argv.items) { |
|||
const items = await Promise.all(argv.items.map((url) => content.redgifs.fetchContent(url))); |
|||
|
|||
console.log(items); |
|||
} |
|||
|
|||
// console.log(util.inspect(itemsPerChannel, null, null));
|
|||
console.log(postsPerFeed); |
|||
} |
|||
|
|||
init(); |
@ -0,0 +1,77 @@ |
|||
const yargs = require('yargs'); |
|||
const moment = require('moment'); |
|||
|
|||
function toDate(dateString) { |
|||
if (!dateString) { |
|||
return null; |
|||
} |
|||
|
|||
if (/\d{2,4}-\d{2}-\d{2,4}/.test(dateString)) { |
|||
// using date
|
|||
return moment |
|||
.utc(dateString, ['YYYY-MM-DD', 'DD-MM-YYYY']) |
|||
.toDate(); |
|||
} |
|||
|
|||
// using timespan (e.g. "1 month")
|
|||
return moment |
|||
.utc() |
|||
.subtract(...dateString.split(' ')) |
|||
.toDate(); |
|||
} |
|||
|
|||
const args = yargs |
|||
.command('npm start') |
|||
.option('reddit-users', { |
|||
describe: 'Fetch content from a reddit user by username.', |
|||
type: 'array', |
|||
alias: 'reddit-user', |
|||
}) |
|||
.option('reddit-posts', { |
|||
describe: 'Fetch content from a reddit post by post ID.', |
|||
type: 'array', |
|||
alias: 'reddit-post', |
|||
}) |
|||
.option('reddit-subs', { |
|||
describe: 'Fetch content from a subreddit by subreddit name.', |
|||
alias: ['reddit-sub', 'subreddit', 'subreddits'], |
|||
type: 'array', |
|||
}) |
|||
.option('items', { |
|||
describe: 'Fetch media items directly from host URL.', |
|||
type: 'array', |
|||
alias: 'item', |
|||
}) |
|||
.option('limit', { |
|||
describe: 'Maximum number of items to fetch content from.', |
|||
type: 'boolean', |
|||
}) |
|||
.option('after', { |
|||
describe: 'Only include items uploaded after this date or timespan.', |
|||
type: 'string', |
|||
default: toDate(), |
|||
}) |
|||
.option('before', { |
|||
describe: 'Only include items uploaded before this date or timespan.', |
|||
type: 'string', |
|||
default: toDate(), |
|||
}) |
|||
.option('sort', { |
|||
describe: 'How to sort the items before applying limits.', |
|||
type: 'string', |
|||
}) |
|||
.option('redownload', { |
|||
describe: 'Ignore index file and force a redownload of every item in the selection.', |
|||
alias: 'force', |
|||
type: 'boolean', |
|||
}) |
|||
.option('url') |
|||
.option('log-level', { |
|||
describe: 'Log level', |
|||
type: 'string', |
|||
default: process.env.NODE_ENV === 'development' ? 'silly' : 'info', |
|||
}) |
|||
.coerce('after', toDate) |
|||
.coerce('before', toDate); |
|||
|
|||
module.exports = args; |
@ -0,0 +1,7 @@ |
|||
'use strict'; |
|||
|
|||
const redgifs = require('./redgifs'); |
|||
|
|||
module.exports = { |
|||
redgifs, |
|||
}; |
@ -0,0 +1,21 @@ |
|||
'use strict'; |
|||
|
|||
const http = require('../utils/http'); |
|||
|
|||
async function fetchContent(url, _post) { |
|||
const res = await http.get(url); |
|||
|
|||
if (res.ok) { |
|||
const id = new URL(url).pathname.match(/\/watch\/(\w+)/)[1]; |
|||
|
|||
return { |
|||
src: `https://thcf8.redgifs.com/${id}.webm`, |
|||
}; |
|||
} |
|||
|
|||
return null; |
|||
} |
|||
|
|||
module.exports = { |
|||
fetchContent, |
|||
}; |
@ -0,0 +1,10 @@ |
|||
'use strict'; |
|||
|
|||
const reddit = require('./reddit'); |
|||
|
|||
const feeds = { |
|||
redditUser: reddit.fetchUserPosts, |
|||
redditSub: reddit.fetchSubPosts, |
|||
}; |
|||
|
|||
module.exports = feeds; |
@ -0,0 +1,51 @@ |
|||
'use strict'; |
|||
|
|||
const moment = require('moment'); |
|||
|
|||
const http = require('../utils/http'); |
|||
|
|||
function curatePost(rawPost) { |
|||
const post = { |
|||
id: rawPost.id, |
|||
title: rawPost.title, |
|||
rawPost: `https://reddit.com${rawPost.permalink}`, |
|||
subreddit: rawPost.subreddit, |
|||
author: rawPost.author, |
|||
pinned: rawPost.pinned, |
|||
stickied: rawPost.stickied, |
|||
upvotes: rawPost.ups, |
|||
nsfw: rawPost.over_18, |
|||
date: moment.utc(rawPost.created_utc * 1000).toDate(), |
|||
url: rawPost.url, |
|||
raw: rawPost, |
|||
}; |
|||
|
|||
return post; |
|||
} |
|||
|
|||
async function fetchUserPosts(username, _options) { |
|||
// const res = await http.get(`https://www.reddit.com/user/${username}/submitted.json?limit=100&sort=new`);
|
|||
const res = await http.get(`https://api.pushshift.io/reddit/submission/search?author=${username}&sort_type=created_utc`); |
|||
|
|||
if (res.ok) { |
|||
return res.body.data.map((post) => curatePost(post)); |
|||
} |
|||
|
|||
return null; |
|||
} |
|||
|
|||
async function fetchSubPosts(subreddit, _options) { |
|||
// const res = await http.get(`https://www.reddit.com/r/${subreddit}.json?limit=100&sort=new`);
|
|||
const res = await http.get(`https://api.pushshift.io/reddit/submission/search?subreddit=${subreddit}&sort_type=created_utc`); |
|||
|
|||
if (res.ok) { |
|||
return res.body.data.map((post) => curatePost(post)); |
|||
} |
|||
|
|||
return null; |
|||
} |
|||
|
|||
module.exports = { |
|||
fetchUserPosts, |
|||
fetchSubPosts, |
|||
}; |
@ -0,0 +1,39 @@ |
|||
'use strict'; |
|||
|
|||
const util = require('util'); |
|||
const path = require('path'); |
|||
const winston = require('winston'); |
|||
|
|||
require('winston-daily-rotate-file'); |
|||
|
|||
const { argv } = require('./args'); |
|||
|
|||
function logger(context) { |
|||
const root = context.match(/src[/\\]|dist[/\\]/); |
|||
const filename = context.slice(root.index + root[0].length) |
|||
.replace(path.extname(context), ''); |
|||
|
|||
return winston.createLogger({ |
|||
format: winston.format.combine( |
|||
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), |
|||
winston.format((info) => (info instanceof Error |
|||
? { ...info, message: info.stack } |
|||
: { ...info, message: typeof info.message === 'string' ? info.message : util.inspect(info.message) }))(), |
|||
winston.format.colorize(), |
|||
winston.format.printf(({ level, timestamp, label, message }) => `${timestamp} ${level} [${label || filename}] ${message}`), |
|||
), |
|||
transports: [ |
|||
new winston.transports.Console({ |
|||
level: argv.logLevel, |
|||
timestamp: true, |
|||
}), |
|||
new winston.transports.DailyRotateFile({ |
|||
datePattern: 'YYYY-MM-DD', |
|||
filename: path.join('log', '%DATE%.log'), |
|||
level: 'silly', |
|||
}), |
|||
], |
|||
}); |
|||
} |
|||
|
|||
module.exports = logger; |
@ -0,0 +1,107 @@ |
|||
'use strict'; |
|||
|
|||
const config = require('config'); |
|||
const bhttp = require('bhttp'); |
|||
const Bottleneck = require('bottleneck'); |
|||
|
|||
const logger = require('../logger')(__filename); |
|||
|
|||
const defaultLimiterOptions = { |
|||
minTime: config.limits.requestInterval, |
|||
maxConcurrent: config.limits.requestConcurrency, |
|||
}; |
|||
|
|||
const defaultLimiter = new Bottleneck(defaultLimiterOptions); |
|||
|
|||
const limiters = {}; |
|||
|
|||
const defaultOptions = { |
|||
encodeJSON: true, |
|||
headers: { |
|||
'user-agent': 'ripunzel', |
|||
}, |
|||
}; |
|||
|
|||
function getLimiter(limit) { |
|||
if (limit) { |
|||
if (!limiters[limit.interval]?.[limit.concurrency]) { |
|||
limiters[limit.interval || null][limit.concurrency || null] = new Bottleneck({ |
|||
...(limit.interval && { mineTime: limit.interval }), |
|||
...(limit.concurrency && { maxConcurrent: limit.concurrency }), |
|||
}); |
|||
} |
|||
|
|||
return limiters[limit.interval][limit.concurrency]; |
|||
} |
|||
|
|||
return defaultLimiter; |
|||
} |
|||
|
|||
async function request(method = 'get', url, body, requestOptions, session) { |
|||
const http = session || bhttp; |
|||
|
|||
const options = { |
|||
...defaultOptions, |
|||
...requestOptions, |
|||
responseTimeout: requestOptions?.responseTimeout || requestOptions?.timeout || 60000, |
|||
}; |
|||
|
|||
logger.silly(`GET (${options.limit?.interval || defaultLimiterOptions.minTime}ms/${options.limit?.concurrency || defaultLimiterOptions.maxConcurrent}p) ${url}`); |
|||
|
|||
const res = body |
|||
? await http[method](url, options) |
|||
: await http[method](url, body, options); |
|||
|
|||
if (Buffer.isBuffer(res.body)) { |
|||
return { |
|||
...res, |
|||
body: res.body.toString(), |
|||
status: res.statusCode, |
|||
ok: res.statusCode >= 200 && res.statusCode <= 299, |
|||
}; |
|||
} |
|||
|
|||
return { |
|||
...res, |
|||
body: res.body, |
|||
status: res.statusCode, |
|||
ok: res.statusCode >= 200 && res.statusCode <= 299, |
|||
}; |
|||
} |
|||
|
|||
async function scheduleRequest(method = 'get', url, body, options, session) { |
|||
return getLimiter(options && options.limit).schedule(() => request(method, url, body, options, session)); |
|||
} |
|||
|
|||
async function get(url, options, session) { |
|||
return scheduleRequest('get', url, null, options, session); |
|||
} |
|||
|
|||
async function post(url, body, options, session) { |
|||
return scheduleRequest('post', url, body, options, session); |
|||
} |
|||
|
|||
async function put(url, body, options, session) { |
|||
return scheduleRequest('put', url, body, options, session); |
|||
} |
|||
|
|||
async function patch(url, body, options, session) { |
|||
return scheduleRequest('patch', url, body, options, session); |
|||
} |
|||
|
|||
async function del(url, options, session) { |
|||
return scheduleRequest('delete', url, null, options, session); |
|||
} |
|||
|
|||
function getSession(options) { |
|||
return bhttp.session(options); |
|||
} |
|||
|
|||
module.exports = { |
|||
get, |
|||
post, |
|||
delete: del, |
|||
put, |
|||
patch, |
|||
session: getSession, |
|||
}; |
Write
Preview
Loading…
Cancel
Save
Reference in new issue