Saving index per user as TSV. Refactoring.
This commit is contained in:
parent
6696438da0
commit
5eb2eb651a
|
@ -1,5 +1,8 @@
|
|||
{
|
||||
"extends": "airbnb-base",
|
||||
"parserOptions": {
|
||||
"sourceType": "script"
|
||||
},
|
||||
"rules": {
|
||||
"no-console": 0,
|
||||
"indent": ["error", 4],
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
'use strict';
|
||||
|
||||
module.exports = {
|
||||
library: {
|
||||
base: 'output/$user/',
|
||||
|
@ -7,17 +9,17 @@ module.exports = {
|
|||
album: {
|
||||
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
||||
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
||||
extractSingleItem: true
|
||||
extractSingleItem: true,
|
||||
},
|
||||
profile: {
|
||||
image: '$base$userCreated - profile$ext',
|
||||
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
|
||||
avoidAvatar: true
|
||||
avoidAvatar: true,
|
||||
},
|
||||
index: {
|
||||
file: '$base/index',
|
||||
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle',
|
||||
unique: true
|
||||
format: 'tsv',
|
||||
keys: ['postId', 'postTitle', 'subreddit', 'postDate', 'url'],
|
||||
},
|
||||
booleans: {
|
||||
extracted: 'extracted-',
|
||||
|
@ -25,15 +27,15 @@ module.exports = {
|
|||
verified: '✔',
|
||||
verifiedEmail: '✉',
|
||||
gold: '★',
|
||||
over18: '♥'
|
||||
over18: '♥',
|
||||
},
|
||||
meta: {
|
||||
comment: '$itemDescription'
|
||||
comment: '$itemDescription',
|
||||
},
|
||||
dateFormat: 'YYYYMMDD',
|
||||
truncate: {
|
||||
limit: 250,
|
||||
truncator: '...'
|
||||
truncator: '...',
|
||||
},
|
||||
indexOffset: 1,
|
||||
slashSubstitute: '#',
|
||||
|
@ -47,8 +49,8 @@ module.exports = {
|
|||
search: false,
|
||||
preview: true,
|
||||
reddit: ['ip'],
|
||||
reupload: []
|
||||
}
|
||||
reupload: [],
|
||||
},
|
||||
},
|
||||
reddit: {
|
||||
api: {
|
||||
|
@ -58,12 +60,12 @@ module.exports = {
|
|||
token_type: 'bearer',
|
||||
expires_in: 3600,
|
||||
refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr',
|
||||
scope: 'history identity mysubreddits read subscribe'
|
||||
}
|
||||
scope: 'history identity mysubreddits read subscribe',
|
||||
},
|
||||
},
|
||||
methods: {
|
||||
imgur: {
|
||||
clientId: '1234567abcdefgh'
|
||||
}
|
||||
}
|
||||
clientId: '1234567abcdefgh',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
@ -461,6 +461,35 @@
|
|||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz",
|
||||
"integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0="
|
||||
},
|
||||
"csv": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/csv/-/csv-3.1.0.tgz",
|
||||
"integrity": "sha512-SfnePMkhjljB7ehvubZESGjgrnM7V/gBe5ubZWKxeKwgmTl/HtVCdfSaGRgH/i/vG7qJaSLMpP0krNbAuunRBg==",
|
||||
"requires": {
|
||||
"csv-generate": "2.0.2",
|
||||
"csv-parse": "2.5.0",
|
||||
"csv-stringify": "3.1.1",
|
||||
"stream-transform": "1.0.2"
|
||||
}
|
||||
},
|
||||
"csv-generate": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-2.0.2.tgz",
|
||||
"integrity": "sha512-oyidhQ/sQcqKOyt+hRnL9oiqFFWsEkOwBE7tEV3pwku6dSuFUQqTGfhYXH/HZ3rKy8xBtcrwsspmXVo+LPijuA=="
|
||||
},
|
||||
"csv-parse": {
|
||||
"version": "2.5.0",
|
||||
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-2.5.0.tgz",
|
||||
"integrity": "sha512-4OcjOJQByI0YDU5COYw9HAqjo8/MOLLmT9EKyMCXUzgvh30vS1SlMK+Ho84IH5exN44cSnrYecw/7Zpu2m4lkA=="
|
||||
},
|
||||
"csv-stringify": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-3.1.1.tgz",
|
||||
"integrity": "sha512-Ni9r/BdQM2cGnWzwAP09zp12LVOAMHLJ86azNHGC7s4OUo2WidGfcM3QwYEjD8c4ELCL/a4AzfIsVCzroeys+g==",
|
||||
"requires": {
|
||||
"lodash.get": "4.4.2"
|
||||
}
|
||||
},
|
||||
"dashdash": {
|
||||
"version": "1.14.1",
|
||||
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
|
||||
|
@ -1444,6 +1473,11 @@
|
|||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
|
||||
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
|
||||
},
|
||||
"lodash.get": {
|
||||
"version": "4.4.2",
|
||||
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
|
||||
"integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk="
|
||||
},
|
||||
"lru-cache": {
|
||||
"version": "4.1.2",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
|
||||
|
@ -2146,6 +2180,11 @@
|
|||
"tweetnacl": "0.14.5"
|
||||
}
|
||||
},
|
||||
"stream-transform": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-1.0.2.tgz",
|
||||
"integrity": "sha512-LNcZSF01PZ+bM0OqwPY7UHPiKoxSmLGHAcqakvh01DCU98ONEslLORdyBPdmTqjTpZSfCiaYLV4sci9y5M47oA=="
|
||||
},
|
||||
"string-width": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
"description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.",
|
||||
"main": "app.js",
|
||||
"scripts": {
|
||||
"start": "node src/app.js"
|
||||
"start": "node src/app.js",
|
||||
"inspect": "node --inspect src/app.js"
|
||||
},
|
||||
"pkg": {
|
||||
"scripts": "src/app.js",
|
||||
|
@ -31,6 +32,7 @@
|
|||
"bluebird": "^3.5.1",
|
||||
"cheerio": "^1.0.0-rc.2",
|
||||
"config": "^1.30.0",
|
||||
"csv": "^3.1.0",
|
||||
"date-fns": "^1.29.0",
|
||||
"dist-exiftool": "^10.53.0",
|
||||
"fluent-ffmpeg": "^2.1.2",
|
||||
|
|
|
@ -19,7 +19,7 @@ const curatePosts = require('./curate/posts.js');
|
|||
const interpolate = require('./interpolate.js');
|
||||
|
||||
const attachContentInfo = require('./fetch/info.js');
|
||||
const fetchContent = require('./fetch/content.js');
|
||||
const fetchSaveContent = require('./fetch/content.js');
|
||||
|
||||
const getPosts = require('./sources/getPosts.js')(reddit, args);
|
||||
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
|
||||
|
@ -42,7 +42,7 @@ Promise.resolve().then(async () => {
|
|||
const infoUserPosts = await attachContentInfo(curatedUserPosts);
|
||||
|
||||
await ep.open();
|
||||
await Promise.all(Object.values(infoUserPosts).map(user => fetchContent(user, ep)));
|
||||
await Promise.all(Object.values(infoUserPosts).map(user => fetchSaveContent(user, ep)));
|
||||
await ep.close();
|
||||
}).catch(error => {
|
||||
return console.error(error);
|
||||
|
|
|
@ -8,9 +8,10 @@ const save = require('../save/save.js');
|
|||
const textToStream = require('../save/textToStream.js');
|
||||
const saveMeta = require('../save/meta.js');
|
||||
const mux = require('../save/mux.js');
|
||||
const writeToIndex = require('../save/writeToIndex.js');
|
||||
|
||||
async function getStreams(item, post) {
|
||||
if(item.self) {
|
||||
if (item.self) {
|
||||
return [textToStream(item.text)];
|
||||
}
|
||||
|
||||
|
@ -24,28 +25,7 @@ async function getStreams(item, post) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function fetchContent(user, ep) {
|
||||
await Promise.map(user.posts, async (post) => {
|
||||
const items = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
||||
const item = { ...originalItem, index };
|
||||
const streams = await getStreams(item, post);
|
||||
|
||||
// no streams, ignore item
|
||||
if (streams.length <= 0) {
|
||||
return accItems;
|
||||
}
|
||||
|
||||
const type = item.type.split('/')[0];
|
||||
const filepath = post.content.album
|
||||
? interpolate(config.library.album[type], user, post, item)
|
||||
: interpolate(config.library[type], user, post, item);
|
||||
|
||||
const sourcePaths = await save(filepath, streams, item, post);
|
||||
|
||||
if (item.mux) {
|
||||
await mux(filepath, sourcePaths, item);
|
||||
}
|
||||
|
||||
async function addMeta(filepath, ep, item, post, user) {
|
||||
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
||||
const interpolatedValue = interpolate(value, user, post, item);
|
||||
|
||||
|
@ -55,16 +35,43 @@ async function fetchContent(user, ep) {
|
|||
if (Object.keys(meta).length > 0) {
|
||||
await saveMeta(filepath, meta, ep);
|
||||
}
|
||||
}
|
||||
|
||||
function getFilepath(item, post, user) {
|
||||
const type = item.type.split('/')[0];
|
||||
|
||||
return post.content.album
|
||||
? interpolate(config.library.album[type], user, post, item)
|
||||
: interpolate(config.library[type], user, post, item);
|
||||
}
|
||||
|
||||
async function fetchSaveContent(user, ep) {
|
||||
const posts = await Promise.map(user.posts, async (post) => {
|
||||
await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
||||
const item = { ...originalItem, index };
|
||||
const streams = await getStreams(item, post);
|
||||
|
||||
// no streams, ignore item
|
||||
if (streams.length <= 0) {
|
||||
return accItems;
|
||||
}
|
||||
|
||||
const filepath = getFilepath(item, post, user);
|
||||
const sourcePaths = await save(filepath, streams, item, post);
|
||||
|
||||
if (item.mux) {
|
||||
await mux(filepath, sourcePaths, item);
|
||||
}
|
||||
|
||||
await addMeta(filepath, ep, item, post, user);
|
||||
|
||||
return sourcePaths;
|
||||
}, []);
|
||||
|
||||
console.log(items);
|
||||
|
||||
const filename = interpolate(config.library.index.file, user, post);
|
||||
const entry = `${interpolate(config.library.index.entry, user, post, null, false)}\n`;
|
||||
|
||||
await fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
|
||||
return post;
|
||||
});
|
||||
|
||||
return writeToIndex(posts, user);
|
||||
}
|
||||
module.exports = fetchContent;
|
||||
|
||||
module.exports = fetchSaveContent;
|
||||
|
|
|
@ -6,14 +6,12 @@ const url = require('url');
|
|||
const dateFns = require('date-fns');
|
||||
const mime = require('mime-types');
|
||||
|
||||
function interpolate(pattern, user, post, item, strip = true) {
|
||||
const dateFormat = config.library.dateFormat || 'YYYYMMDD';
|
||||
|
||||
function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) {
|
||||
const vars = {
|
||||
$base: config.library.base
|
||||
$base: config.library.base,
|
||||
};
|
||||
|
||||
if(user) {
|
||||
if (user) {
|
||||
Object.assign(vars, {
|
||||
$user: user.name,
|
||||
$username: user.name,
|
||||
|
@ -24,40 +22,41 @@ function interpolate(pattern, user, post, item, strip = true) {
|
|||
$userGold: user.gold ? config.library.booleans.gold : '',
|
||||
});
|
||||
|
||||
if(user.profile) {
|
||||
if (user.profile) {
|
||||
Object.assign(vars, {
|
||||
$profileId: user.profile.id,
|
||||
$profileTitle: user.profile.title,
|
||||
$profileDescription: user.profile.description,
|
||||
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : ''
|
||||
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : '',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if(post) {
|
||||
if (post) {
|
||||
Object.assign(vars, {
|
||||
$postId: post.id,
|
||||
$postTitle: (post.title || '').slice(0, config.library.titleLength),
|
||||
$postUser: post.user,
|
||||
$postDate: dateFns.format(post.datetime, dateFormat),
|
||||
$postIndex: post.index + config.library.indexOffset,
|
||||
$postHash: post.hash,
|
||||
$url: post.url,
|
||||
$subreddit: post.subreddit,
|
||||
$hostLabel: post.host.label,
|
||||
$hostId: post.host.id
|
||||
$hostId: post.host.id,
|
||||
});
|
||||
|
||||
if(post.content.album) {
|
||||
if (post.content.album) {
|
||||
Object.assign(vars, {
|
||||
$albumId: post.content.album.id,
|
||||
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength),
|
||||
$albumDescription: post.content.album.description,
|
||||
$albumDate: dateFns.format(post.content.album.datetime, dateFormat)
|
||||
$albumDate: dateFns.format(post.content.album.datetime, dateFormat),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if(item) {
|
||||
if (item) {
|
||||
Object.assign(vars, {
|
||||
$itemId: item.id,
|
||||
$itemTitle: (item.title || '').slice(0, config.library.titleLength),
|
||||
|
@ -66,18 +65,18 @@ function interpolate(pattern, user, post, item, strip = true) {
|
|||
$itemIndex: item.index + config.library.indexOffset,
|
||||
$extracted: item.extracted ? config.library.booleans.extracted : '',
|
||||
$preview: item.preview ? config.library.booleans.preview : '',
|
||||
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname)
|
||||
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname),
|
||||
});
|
||||
}
|
||||
|
||||
return Object.entries(vars).reduce((acc, [key, value], index) => {
|
||||
return Object.entries(vars).reduce((acc, [key, value]) => {
|
||||
// substitute slashes for filesystem compatability
|
||||
if(key !== '$base' && strip) {
|
||||
if (key !== '$base' && strip) {
|
||||
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
|
||||
}
|
||||
|
||||
return acc.replace(key, value);
|
||||
}, pattern);
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = interpolate;
|
||||
|
|
Loading…
Reference in New Issue