Saving index per user as TSV. Refactoring.
This commit is contained in:
parent
6696438da0
commit
5eb2eb651a
|
@ -1,5 +1,8 @@
|
||||||
{
|
{
|
||||||
"extends": "airbnb-base",
|
"extends": "airbnb-base",
|
||||||
|
"parserOptions": {
|
||||||
|
"sourceType": "script"
|
||||||
|
},
|
||||||
"rules": {
|
"rules": {
|
||||||
"no-console": 0,
|
"no-console": 0,
|
||||||
"indent": ["error", 4],
|
"indent": ["error", 4],
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
library: {
|
library: {
|
||||||
base: 'output/$user/',
|
base: 'output/$user/',
|
||||||
|
@ -7,17 +9,17 @@ module.exports = {
|
||||||
album: {
|
album: {
|
||||||
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
||||||
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
|
||||||
extractSingleItem: true
|
extractSingleItem: true,
|
||||||
},
|
},
|
||||||
profile: {
|
profile: {
|
||||||
image: '$base$userCreated - profile$ext',
|
image: '$base$userCreated - profile$ext',
|
||||||
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
|
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
|
||||||
avoidAvatar: true
|
avoidAvatar: true,
|
||||||
},
|
},
|
||||||
index: {
|
index: {
|
||||||
file: '$base/index',
|
file: '$base/index',
|
||||||
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle',
|
format: 'tsv',
|
||||||
unique: true
|
keys: ['postId', 'postTitle', 'subreddit', 'postDate', 'url'],
|
||||||
},
|
},
|
||||||
booleans: {
|
booleans: {
|
||||||
extracted: 'extracted-',
|
extracted: 'extracted-',
|
||||||
|
@ -25,15 +27,15 @@ module.exports = {
|
||||||
verified: '✔',
|
verified: '✔',
|
||||||
verifiedEmail: '✉',
|
verifiedEmail: '✉',
|
||||||
gold: '★',
|
gold: '★',
|
||||||
over18: '♥'
|
over18: '♥',
|
||||||
},
|
},
|
||||||
meta: {
|
meta: {
|
||||||
comment: '$itemDescription'
|
comment: '$itemDescription',
|
||||||
},
|
},
|
||||||
dateFormat: 'YYYYMMDD',
|
dateFormat: 'YYYYMMDD',
|
||||||
truncate: {
|
truncate: {
|
||||||
limit: 250,
|
limit: 250,
|
||||||
truncator: '...'
|
truncator: '...',
|
||||||
},
|
},
|
||||||
indexOffset: 1,
|
indexOffset: 1,
|
||||||
slashSubstitute: '#',
|
slashSubstitute: '#',
|
||||||
|
@ -47,8 +49,8 @@ module.exports = {
|
||||||
search: false,
|
search: false,
|
||||||
preview: true,
|
preview: true,
|
||||||
reddit: ['ip'],
|
reddit: ['ip'],
|
||||||
reupload: []
|
reupload: [],
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
reddit: {
|
reddit: {
|
||||||
api: {
|
api: {
|
||||||
|
@ -58,12 +60,12 @@ module.exports = {
|
||||||
token_type: 'bearer',
|
token_type: 'bearer',
|
||||||
expires_in: 3600,
|
expires_in: 3600,
|
||||||
refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr',
|
refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr',
|
||||||
scope: 'history identity mysubreddits read subscribe'
|
scope: 'history identity mysubreddits read subscribe',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
imgur: {
|
imgur: {
|
||||||
clientId: '1234567abcdefgh'
|
clientId: '1234567abcdefgh',
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -461,6 +461,35 @@
|
||||||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz",
|
||||||
"integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0="
|
"integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0="
|
||||||
},
|
},
|
||||||
|
"csv": {
|
||||||
|
"version": "3.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv/-/csv-3.1.0.tgz",
|
||||||
|
"integrity": "sha512-SfnePMkhjljB7ehvubZESGjgrnM7V/gBe5ubZWKxeKwgmTl/HtVCdfSaGRgH/i/vG7qJaSLMpP0krNbAuunRBg==",
|
||||||
|
"requires": {
|
||||||
|
"csv-generate": "2.0.2",
|
||||||
|
"csv-parse": "2.5.0",
|
||||||
|
"csv-stringify": "3.1.1",
|
||||||
|
"stream-transform": "1.0.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"csv-generate": {
|
||||||
|
"version": "2.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-2.0.2.tgz",
|
||||||
|
"integrity": "sha512-oyidhQ/sQcqKOyt+hRnL9oiqFFWsEkOwBE7tEV3pwku6dSuFUQqTGfhYXH/HZ3rKy8xBtcrwsspmXVo+LPijuA=="
|
||||||
|
},
|
||||||
|
"csv-parse": {
|
||||||
|
"version": "2.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-2.5.0.tgz",
|
||||||
|
"integrity": "sha512-4OcjOJQByI0YDU5COYw9HAqjo8/MOLLmT9EKyMCXUzgvh30vS1SlMK+Ho84IH5exN44cSnrYecw/7Zpu2m4lkA=="
|
||||||
|
},
|
||||||
|
"csv-stringify": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-Ni9r/BdQM2cGnWzwAP09zp12LVOAMHLJ86azNHGC7s4OUo2WidGfcM3QwYEjD8c4ELCL/a4AzfIsVCzroeys+g==",
|
||||||
|
"requires": {
|
||||||
|
"lodash.get": "4.4.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
"dashdash": {
|
"dashdash": {
|
||||||
"version": "1.14.1",
|
"version": "1.14.1",
|
||||||
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
|
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
|
||||||
|
@ -1444,6 +1473,11 @@
|
||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
|
||||||
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
|
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
|
||||||
},
|
},
|
||||||
|
"lodash.get": {
|
||||||
|
"version": "4.4.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
|
||||||
|
"integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk="
|
||||||
|
},
|
||||||
"lru-cache": {
|
"lru-cache": {
|
||||||
"version": "4.1.2",
|
"version": "4.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
|
||||||
|
@ -2146,6 +2180,11 @@
|
||||||
"tweetnacl": "0.14.5"
|
"tweetnacl": "0.14.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"stream-transform": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-LNcZSF01PZ+bM0OqwPY7UHPiKoxSmLGHAcqakvh01DCU98ONEslLORdyBPdmTqjTpZSfCiaYLV4sci9y5M47oA=="
|
||||||
|
},
|
||||||
"string-width": {
|
"string-width": {
|
||||||
"version": "2.1.1",
|
"version": "2.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
|
||||||
|
|
|
@ -4,7 +4,8 @@
|
||||||
"description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.",
|
"description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.",
|
||||||
"main": "app.js",
|
"main": "app.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node src/app.js"
|
"start": "node src/app.js",
|
||||||
|
"inspect": "node --inspect src/app.js"
|
||||||
},
|
},
|
||||||
"pkg": {
|
"pkg": {
|
||||||
"scripts": "src/app.js",
|
"scripts": "src/app.js",
|
||||||
|
@ -31,6 +32,7 @@
|
||||||
"bluebird": "^3.5.1",
|
"bluebird": "^3.5.1",
|
||||||
"cheerio": "^1.0.0-rc.2",
|
"cheerio": "^1.0.0-rc.2",
|
||||||
"config": "^1.30.0",
|
"config": "^1.30.0",
|
||||||
|
"csv": "^3.1.0",
|
||||||
"date-fns": "^1.29.0",
|
"date-fns": "^1.29.0",
|
||||||
"dist-exiftool": "^10.53.0",
|
"dist-exiftool": "^10.53.0",
|
||||||
"fluent-ffmpeg": "^2.1.2",
|
"fluent-ffmpeg": "^2.1.2",
|
||||||
|
|
|
@ -19,7 +19,7 @@ const curatePosts = require('./curate/posts.js');
|
||||||
const interpolate = require('./interpolate.js');
|
const interpolate = require('./interpolate.js');
|
||||||
|
|
||||||
const attachContentInfo = require('./fetch/info.js');
|
const attachContentInfo = require('./fetch/info.js');
|
||||||
const fetchContent = require('./fetch/content.js');
|
const fetchSaveContent = require('./fetch/content.js');
|
||||||
|
|
||||||
const getPosts = require('./sources/getPosts.js')(reddit, args);
|
const getPosts = require('./sources/getPosts.js')(reddit, args);
|
||||||
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
|
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
|
||||||
|
@ -42,7 +42,7 @@ Promise.resolve().then(async () => {
|
||||||
const infoUserPosts = await attachContentInfo(curatedUserPosts);
|
const infoUserPosts = await attachContentInfo(curatedUserPosts);
|
||||||
|
|
||||||
await ep.open();
|
await ep.open();
|
||||||
await Promise.all(Object.values(infoUserPosts).map(user => fetchContent(user, ep)));
|
await Promise.all(Object.values(infoUserPosts).map(user => fetchSaveContent(user, ep)));
|
||||||
await ep.close();
|
await ep.close();
|
||||||
}).catch(error => {
|
}).catch(error => {
|
||||||
return console.error(error);
|
return console.error(error);
|
||||||
|
|
|
@ -8,9 +8,10 @@ const save = require('../save/save.js');
|
||||||
const textToStream = require('../save/textToStream.js');
|
const textToStream = require('../save/textToStream.js');
|
||||||
const saveMeta = require('../save/meta.js');
|
const saveMeta = require('../save/meta.js');
|
||||||
const mux = require('../save/mux.js');
|
const mux = require('../save/mux.js');
|
||||||
|
const writeToIndex = require('../save/writeToIndex.js');
|
||||||
|
|
||||||
async function getStreams(item, post) {
|
async function getStreams(item, post) {
|
||||||
if(item.self) {
|
if (item.self) {
|
||||||
return [textToStream(item.text)];
|
return [textToStream(item.text)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,28 +25,7 @@ async function getStreams(item, post) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchContent(user, ep) {
|
async function addMeta(filepath, ep, item, post, user) {
|
||||||
await Promise.map(user.posts, async (post) => {
|
|
||||||
const items = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
|
||||||
const item = { ...originalItem, index };
|
|
||||||
const streams = await getStreams(item, post);
|
|
||||||
|
|
||||||
// no streams, ignore item
|
|
||||||
if (streams.length <= 0) {
|
|
||||||
return accItems;
|
|
||||||
}
|
|
||||||
|
|
||||||
const type = item.type.split('/')[0];
|
|
||||||
const filepath = post.content.album
|
|
||||||
? interpolate(config.library.album[type], user, post, item)
|
|
||||||
: interpolate(config.library[type], user, post, item);
|
|
||||||
|
|
||||||
const sourcePaths = await save(filepath, streams, item, post);
|
|
||||||
|
|
||||||
if (item.mux) {
|
|
||||||
await mux(filepath, sourcePaths, item);
|
|
||||||
}
|
|
||||||
|
|
||||||
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
|
||||||
const interpolatedValue = interpolate(value, user, post, item);
|
const interpolatedValue = interpolate(value, user, post, item);
|
||||||
|
|
||||||
|
@ -55,16 +35,43 @@ async function fetchContent(user, ep) {
|
||||||
if (Object.keys(meta).length > 0) {
|
if (Object.keys(meta).length > 0) {
|
||||||
await saveMeta(filepath, meta, ep);
|
await saveMeta(filepath, meta, ep);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFilepath(item, post, user) {
|
||||||
|
const type = item.type.split('/')[0];
|
||||||
|
|
||||||
|
return post.content.album
|
||||||
|
? interpolate(config.library.album[type], user, post, item)
|
||||||
|
: interpolate(config.library[type], user, post, item);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSaveContent(user, ep) {
|
||||||
|
const posts = await Promise.map(user.posts, async (post) => {
|
||||||
|
await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
||||||
|
const item = { ...originalItem, index };
|
||||||
|
const streams = await getStreams(item, post);
|
||||||
|
|
||||||
|
// no streams, ignore item
|
||||||
|
if (streams.length <= 0) {
|
||||||
|
return accItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
const filepath = getFilepath(item, post, user);
|
||||||
|
const sourcePaths = await save(filepath, streams, item, post);
|
||||||
|
|
||||||
|
if (item.mux) {
|
||||||
|
await mux(filepath, sourcePaths, item);
|
||||||
|
}
|
||||||
|
|
||||||
|
await addMeta(filepath, ep, item, post, user);
|
||||||
|
|
||||||
return sourcePaths;
|
return sourcePaths;
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
console.log(items);
|
return post;
|
||||||
|
|
||||||
const filename = interpolate(config.library.index.file, user, post);
|
|
||||||
const entry = `${interpolate(config.library.index.entry, user, post, null, false)}\n`;
|
|
||||||
|
|
||||||
await fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return writeToIndex(posts, user);
|
||||||
}
|
}
|
||||||
module.exports = fetchContent;
|
|
||||||
|
module.exports = fetchSaveContent;
|
||||||
|
|
|
@ -6,14 +6,12 @@ const url = require('url');
|
||||||
const dateFns = require('date-fns');
|
const dateFns = require('date-fns');
|
||||||
const mime = require('mime-types');
|
const mime = require('mime-types');
|
||||||
|
|
||||||
function interpolate(pattern, user, post, item, strip = true) {
|
function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) {
|
||||||
const dateFormat = config.library.dateFormat || 'YYYYMMDD';
|
|
||||||
|
|
||||||
const vars = {
|
const vars = {
|
||||||
$base: config.library.base
|
$base: config.library.base,
|
||||||
};
|
};
|
||||||
|
|
||||||
if(user) {
|
if (user) {
|
||||||
Object.assign(vars, {
|
Object.assign(vars, {
|
||||||
$user: user.name,
|
$user: user.name,
|
||||||
$username: user.name,
|
$username: user.name,
|
||||||
|
@ -24,40 +22,41 @@ function interpolate(pattern, user, post, item, strip = true) {
|
||||||
$userGold: user.gold ? config.library.booleans.gold : '',
|
$userGold: user.gold ? config.library.booleans.gold : '',
|
||||||
});
|
});
|
||||||
|
|
||||||
if(user.profile) {
|
if (user.profile) {
|
||||||
Object.assign(vars, {
|
Object.assign(vars, {
|
||||||
$profileId: user.profile.id,
|
$profileId: user.profile.id,
|
||||||
$profileTitle: user.profile.title,
|
$profileTitle: user.profile.title,
|
||||||
$profileDescription: user.profile.description,
|
$profileDescription: user.profile.description,
|
||||||
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : ''
|
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : '',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(post) {
|
if (post) {
|
||||||
Object.assign(vars, {
|
Object.assign(vars, {
|
||||||
$postId: post.id,
|
$postId: post.id,
|
||||||
$postTitle: (post.title || '').slice(0, config.library.titleLength),
|
$postTitle: (post.title || '').slice(0, config.library.titleLength),
|
||||||
$postUser: post.user,
|
$postUser: post.user,
|
||||||
$postDate: dateFns.format(post.datetime, dateFormat),
|
$postDate: dateFns.format(post.datetime, dateFormat),
|
||||||
$postIndex: post.index + config.library.indexOffset,
|
$postIndex: post.index + config.library.indexOffset,
|
||||||
|
$postHash: post.hash,
|
||||||
$url: post.url,
|
$url: post.url,
|
||||||
$subreddit: post.subreddit,
|
$subreddit: post.subreddit,
|
||||||
$hostLabel: post.host.label,
|
$hostLabel: post.host.label,
|
||||||
$hostId: post.host.id
|
$hostId: post.host.id,
|
||||||
});
|
});
|
||||||
|
|
||||||
if(post.content.album) {
|
if (post.content.album) {
|
||||||
Object.assign(vars, {
|
Object.assign(vars, {
|
||||||
$albumId: post.content.album.id,
|
$albumId: post.content.album.id,
|
||||||
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength),
|
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength),
|
||||||
$albumDescription: post.content.album.description,
|
$albumDescription: post.content.album.description,
|
||||||
$albumDate: dateFns.format(post.content.album.datetime, dateFormat)
|
$albumDate: dateFns.format(post.content.album.datetime, dateFormat),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(item) {
|
if (item) {
|
||||||
Object.assign(vars, {
|
Object.assign(vars, {
|
||||||
$itemId: item.id,
|
$itemId: item.id,
|
||||||
$itemTitle: (item.title || '').slice(0, config.library.titleLength),
|
$itemTitle: (item.title || '').slice(0, config.library.titleLength),
|
||||||
|
@ -66,18 +65,18 @@ function interpolate(pattern, user, post, item, strip = true) {
|
||||||
$itemIndex: item.index + config.library.indexOffset,
|
$itemIndex: item.index + config.library.indexOffset,
|
||||||
$extracted: item.extracted ? config.library.booleans.extracted : '',
|
$extracted: item.extracted ? config.library.booleans.extracted : '',
|
||||||
$preview: item.preview ? config.library.booleans.preview : '',
|
$preview: item.preview ? config.library.booleans.preview : '',
|
||||||
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname)
|
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return Object.entries(vars).reduce((acc, [key, value], index) => {
|
return Object.entries(vars).reduce((acc, [key, value]) => {
|
||||||
// substitute slashes for filesystem compatability
|
// substitute slashes for filesystem compatability
|
||||||
if(key !== '$base' && strip) {
|
if (key !== '$base' && strip) {
|
||||||
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
|
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
|
||||||
}
|
}
|
||||||
|
|
||||||
return acc.replace(key, value);
|
return acc.replace(key, value);
|
||||||
}, pattern);
|
}, pattern);
|
||||||
};
|
}
|
||||||
|
|
||||||
module.exports = interpolate;
|
module.exports = interpolate;
|
||||||
|
|
Loading…
Reference in New Issue