Saving index per user as TSV. Refactoring.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent 6696438da0
commit 5eb2eb651a
7 changed files with 110 additions and 58 deletions

View File

@ -1,5 +1,8 @@
{
"extends": "airbnb-base",
"parserOptions": {
"sourceType": "script"
},
"rules": {
"no-console": 0,
"indent": ["error", 4],

View File

@ -1,3 +1,5 @@
'use strict';
module.exports = {
library: {
base: 'output/$user/',
@ -7,17 +9,17 @@ module.exports = {
album: {
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
extractSingleItem: true
extractSingleItem: true,
},
profile: {
image: '$base$userCreated - profile$ext',
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
avoidAvatar: true
avoidAvatar: true,
},
index: {
file: '$base/index',
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle',
unique: true
format: 'tsv',
keys: ['postId', 'postTitle', 'subreddit', 'postDate', 'url'],
},
booleans: {
extracted: 'extracted-',
@ -25,15 +27,15 @@ module.exports = {
verified: '✔',
verifiedEmail: '✉',
gold: '★',
over18: '♥'
over18: '♥',
},
meta: {
comment: '$itemDescription'
comment: '$itemDescription',
},
dateFormat: 'YYYYMMDD',
truncate: {
limit: 250,
truncator: '...'
truncator: '...',
},
indexOffset: 1,
slashSubstitute: '#',
@ -47,8 +49,8 @@ module.exports = {
search: false,
preview: true,
reddit: ['ip'],
reupload: []
}
reupload: [],
},
},
reddit: {
api: {
@ -58,12 +60,12 @@ module.exports = {
token_type: 'bearer',
expires_in: 3600,
refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr',
scope: 'history identity mysubreddits read subscribe'
}
scope: 'history identity mysubreddits read subscribe',
},
},
methods: {
imgur: {
clientId: '1234567abcdefgh'
}
}
clientId: '1234567abcdefgh',
},
},
};

39
package-lock.json generated
View File

@ -461,6 +461,35 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz",
"integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0="
},
"csv": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/csv/-/csv-3.1.0.tgz",
"integrity": "sha512-SfnePMkhjljB7ehvubZESGjgrnM7V/gBe5ubZWKxeKwgmTl/HtVCdfSaGRgH/i/vG7qJaSLMpP0krNbAuunRBg==",
"requires": {
"csv-generate": "2.0.2",
"csv-parse": "2.5.0",
"csv-stringify": "3.1.1",
"stream-transform": "1.0.2"
}
},
"csv-generate": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-2.0.2.tgz",
"integrity": "sha512-oyidhQ/sQcqKOyt+hRnL9oiqFFWsEkOwBE7tEV3pwku6dSuFUQqTGfhYXH/HZ3rKy8xBtcrwsspmXVo+LPijuA=="
},
"csv-parse": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-2.5.0.tgz",
"integrity": "sha512-4OcjOJQByI0YDU5COYw9HAqjo8/MOLLmT9EKyMCXUzgvh30vS1SlMK+Ho84IH5exN44cSnrYecw/7Zpu2m4lkA=="
},
"csv-stringify": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-3.1.1.tgz",
"integrity": "sha512-Ni9r/BdQM2cGnWzwAP09zp12LVOAMHLJ86azNHGC7s4OUo2WidGfcM3QwYEjD8c4ELCL/a4AzfIsVCzroeys+g==",
"requires": {
"lodash.get": "4.4.2"
}
},
"dashdash": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
@ -1444,6 +1473,11 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
},
"lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk="
},
"lru-cache": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
@ -2146,6 +2180,11 @@
"tweetnacl": "0.14.5"
}
},
"stream-transform": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-1.0.2.tgz",
"integrity": "sha512-LNcZSF01PZ+bM0OqwPY7UHPiKoxSmLGHAcqakvh01DCU98ONEslLORdyBPdmTqjTpZSfCiaYLV4sci9y5M47oA=="
},
"string-width": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",

View File

@ -4,7 +4,8 @@
"description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.",
"main": "app.js",
"scripts": {
"start": "node src/app.js"
"start": "node src/app.js",
"inspect": "node --inspect src/app.js"
},
"pkg": {
"scripts": "src/app.js",
@ -31,6 +32,7 @@
"bluebird": "^3.5.1",
"cheerio": "^1.0.0-rc.2",
"config": "^1.30.0",
"csv": "^3.1.0",
"date-fns": "^1.29.0",
"dist-exiftool": "^10.53.0",
"fluent-ffmpeg": "^2.1.2",

View File

@ -19,7 +19,7 @@ const curatePosts = require('./curate/posts.js');
const interpolate = require('./interpolate.js');
const attachContentInfo = require('./fetch/info.js');
const fetchContent = require('./fetch/content.js');
const fetchSaveContent = require('./fetch/content.js');
const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
@ -42,7 +42,7 @@ Promise.resolve().then(async () => {
const infoUserPosts = await attachContentInfo(curatedUserPosts);
await ep.open();
await Promise.all(Object.values(infoUserPosts).map(user => fetchContent(user, ep)));
await Promise.all(Object.values(infoUserPosts).map(user => fetchSaveContent(user, ep)));
await ep.close();
}).catch(error => {
return console.error(error);

View File

@ -8,9 +8,10 @@ const save = require('../save/save.js');
const textToStream = require('../save/textToStream.js');
const saveMeta = require('../save/meta.js');
const mux = require('../save/mux.js');
const writeToIndex = require('../save/writeToIndex.js');
async function getStreams(item, post) {
if(item.self) {
if (item.self) {
return [textToStream(item.text)];
}
@ -24,9 +25,29 @@ async function getStreams(item, post) {
return null;
}
async function fetchContent(user, ep) {
await Promise.map(user.posts, async (post) => {
const items = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
async function addMeta(filepath, ep, item, post, user) {
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, user, post, item);
return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc;
}, {});
if (Object.keys(meta).length > 0) {
await saveMeta(filepath, meta, ep);
}
}
function getFilepath(item, post, user) {
const type = item.type.split('/')[0];
return post.content.album
? interpolate(config.library.album[type], user, post, item)
: interpolate(config.library[type], user, post, item);
}
async function fetchSaveContent(user, ep) {
const posts = await Promise.map(user.posts, async (post) => {
await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const streams = await getStreams(item, post);
@ -35,36 +56,22 @@ async function fetchContent(user, ep) {
return accItems;
}
const type = item.type.split('/')[0];
const filepath = post.content.album
? interpolate(config.library.album[type], user, post, item)
: interpolate(config.library[type], user, post, item);
const filepath = getFilepath(item, post, user);
const sourcePaths = await save(filepath, streams, item, post);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, user, post, item);
return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc;
}, {});
if (Object.keys(meta).length > 0) {
await saveMeta(filepath, meta, ep);
}
await addMeta(filepath, ep, item, post, user);
return sourcePaths;
}, []);
console.log(items);
const filename = interpolate(config.library.index.file, user, post);
const entry = `${interpolate(config.library.index.entry, user, post, null, false)}\n`;
await fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
return post;
});
return writeToIndex(posts, user);
}
module.exports = fetchContent;
module.exports = fetchSaveContent;

View File

@ -6,14 +6,12 @@ const url = require('url');
const dateFns = require('date-fns');
const mime = require('mime-types');
function interpolate(pattern, user, post, item, strip = true) {
const dateFormat = config.library.dateFormat || 'YYYYMMDD';
function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) {
const vars = {
$base: config.library.base
$base: config.library.base,
};
if(user) {
if (user) {
Object.assign(vars, {
$user: user.name,
$username: user.name,
@ -24,40 +22,41 @@ function interpolate(pattern, user, post, item, strip = true) {
$userGold: user.gold ? config.library.booleans.gold : '',
});
if(user.profile) {
if (user.profile) {
Object.assign(vars, {
$profileId: user.profile.id,
$profileTitle: user.profile.title,
$profileDescription: user.profile.description,
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : ''
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : '',
});
}
}
if(post) {
if (post) {
Object.assign(vars, {
$postId: post.id,
$postTitle: (post.title || '').slice(0, config.library.titleLength),
$postUser: post.user,
$postDate: dateFns.format(post.datetime, dateFormat),
$postIndex: post.index + config.library.indexOffset,
$postHash: post.hash,
$url: post.url,
$subreddit: post.subreddit,
$hostLabel: post.host.label,
$hostId: post.host.id
$hostId: post.host.id,
});
if(post.content.album) {
if (post.content.album) {
Object.assign(vars, {
$albumId: post.content.album.id,
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength),
$albumDescription: post.content.album.description,
$albumDate: dateFns.format(post.content.album.datetime, dateFormat)
$albumDate: dateFns.format(post.content.album.datetime, dateFormat),
});
}
}
if(item) {
if (item) {
Object.assign(vars, {
$itemId: item.id,
$itemTitle: (item.title || '').slice(0, config.library.titleLength),
@ -66,18 +65,18 @@ function interpolate(pattern, user, post, item, strip = true) {
$itemIndex: item.index + config.library.indexOffset,
$extracted: item.extracted ? config.library.booleans.extracted : '',
$preview: item.preview ? config.library.booleans.preview : '',
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname)
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname),
});
}
return Object.entries(vars).reduce((acc, [key, value], index) => {
return Object.entries(vars).reduce((acc, [key, value]) => {
// substitute slashes for filesystem compatability
if(key !== '$base' && strip) {
if (key !== '$base' && strip) {
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
}
return acc.replace(key, value);
}, pattern);
};
}
module.exports = interpolate;