Saving index per user as TSV. Refactoring.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:56 +02:00
parent 6696438da0
commit 5eb2eb651a
7 changed files with 110 additions and 58 deletions

View File

@ -1,5 +1,8 @@
{ {
"extends": "airbnb-base", "extends": "airbnb-base",
"parserOptions": {
"sourceType": "script"
},
"rules": { "rules": {
"no-console": 0, "no-console": 0,
"indent": ["error", 4], "indent": ["error", 4],

View File

@ -1,3 +1,5 @@
'use strict';
module.exports = { module.exports = {
library: { library: {
base: 'output/$user/', base: 'output/$user/',
@ -7,17 +9,17 @@ module.exports = {
album: { album: {
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext', image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext', video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
extractSingleItem: true extractSingleItem: true,
}, },
profile: { profile: {
image: '$base$userCreated - profile$ext', image: '$base$userCreated - profile$ext',
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)', description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)',
avoidAvatar: true avoidAvatar: true,
}, },
index: { index: {
file: '$base/index', file: '$base/index',
entry: '$postId (r/$subreddit) - $hostId ($url) - $postTitle', format: 'tsv',
unique: true keys: ['postId', 'postTitle', 'subreddit', 'postDate', 'url'],
}, },
booleans: { booleans: {
extracted: 'extracted-', extracted: 'extracted-',
@ -25,15 +27,15 @@ module.exports = {
verified: '✔', verified: '✔',
verifiedEmail: '✉', verifiedEmail: '✉',
gold: '★', gold: '★',
over18: '♥' over18: '♥',
}, },
meta: { meta: {
comment: '$itemDescription' comment: '$itemDescription',
}, },
dateFormat: 'YYYYMMDD', dateFormat: 'YYYYMMDD',
truncate: { truncate: {
limit: 250, limit: 250,
truncator: '...' truncator: '...',
}, },
indexOffset: 1, indexOffset: 1,
slashSubstitute: '#', slashSubstitute: '#',
@ -47,8 +49,8 @@ module.exports = {
search: false, search: false,
preview: true, preview: true,
reddit: ['ip'], reddit: ['ip'],
reupload: [] reupload: [],
} },
}, },
reddit: { reddit: {
api: { api: {
@ -58,12 +60,12 @@ module.exports = {
token_type: 'bearer', token_type: 'bearer',
expires_in: 3600, expires_in: 3600,
refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr', refresh_token: '1234567-A-Bc-defg8912hij-klm345opqr',
scope: 'history identity mysubreddits read subscribe' scope: 'history identity mysubreddits read subscribe',
} },
}, },
methods: { methods: {
imgur: { imgur: {
clientId: '1234567abcdefgh' clientId: '1234567abcdefgh',
} },
} },
}; };

39
package-lock.json generated
View File

@ -461,6 +461,35 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz", "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz",
"integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0=" "integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0="
}, },
"csv": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/csv/-/csv-3.1.0.tgz",
"integrity": "sha512-SfnePMkhjljB7ehvubZESGjgrnM7V/gBe5ubZWKxeKwgmTl/HtVCdfSaGRgH/i/vG7qJaSLMpP0krNbAuunRBg==",
"requires": {
"csv-generate": "2.0.2",
"csv-parse": "2.5.0",
"csv-stringify": "3.1.1",
"stream-transform": "1.0.2"
}
},
"csv-generate": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-2.0.2.tgz",
"integrity": "sha512-oyidhQ/sQcqKOyt+hRnL9oiqFFWsEkOwBE7tEV3pwku6dSuFUQqTGfhYXH/HZ3rKy8xBtcrwsspmXVo+LPijuA=="
},
"csv-parse": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-2.5.0.tgz",
"integrity": "sha512-4OcjOJQByI0YDU5COYw9HAqjo8/MOLLmT9EKyMCXUzgvh30vS1SlMK+Ho84IH5exN44cSnrYecw/7Zpu2m4lkA=="
},
"csv-stringify": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-3.1.1.tgz",
"integrity": "sha512-Ni9r/BdQM2cGnWzwAP09zp12LVOAMHLJ86azNHGC7s4OUo2WidGfcM3QwYEjD8c4ELCL/a4AzfIsVCzroeys+g==",
"requires": {
"lodash.get": "4.4.2"
}
},
"dashdash": { "dashdash": {
"version": "1.14.1", "version": "1.14.1",
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
@ -1444,6 +1473,11 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.5.tgz",
"integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw==" "integrity": "sha512-svL3uiZf1RwhH+cWrfZn3A4+U58wbP0tGVTLQPbjplZxZ8ROD9VLuNgsRniTlLe7OlSqR79RUehXgpBW/s0IQw=="
}, },
"lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk="
},
"lru-cache": { "lru-cache": {
"version": "4.1.2", "version": "4.1.2",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.2.tgz",
@ -2146,6 +2180,11 @@
"tweetnacl": "0.14.5" "tweetnacl": "0.14.5"
} }
}, },
"stream-transform": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-1.0.2.tgz",
"integrity": "sha512-LNcZSF01PZ+bM0OqwPY7UHPiKoxSmLGHAcqakvh01DCU98ONEslLORdyBPdmTqjTpZSfCiaYLV4sci9y5M47oA=="
},
"string-width": { "string-width": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",

View File

@ -4,7 +4,8 @@
"description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.", "description": "Reddit user post dump tool with versatile saving patterns and fallback searches for deleted users.",
"main": "app.js", "main": "app.js",
"scripts": { "scripts": {
"start": "node src/app.js" "start": "node src/app.js",
"inspect": "node --inspect src/app.js"
}, },
"pkg": { "pkg": {
"scripts": "src/app.js", "scripts": "src/app.js",
@ -31,6 +32,7 @@
"bluebird": "^3.5.1", "bluebird": "^3.5.1",
"cheerio": "^1.0.0-rc.2", "cheerio": "^1.0.0-rc.2",
"config": "^1.30.0", "config": "^1.30.0",
"csv": "^3.1.0",
"date-fns": "^1.29.0", "date-fns": "^1.29.0",
"dist-exiftool": "^10.53.0", "dist-exiftool": "^10.53.0",
"fluent-ffmpeg": "^2.1.2", "fluent-ffmpeg": "^2.1.2",

View File

@ -19,7 +19,7 @@ const curatePosts = require('./curate/posts.js');
const interpolate = require('./interpolate.js'); const interpolate = require('./interpolate.js');
const attachContentInfo = require('./fetch/info.js'); const attachContentInfo = require('./fetch/info.js');
const fetchContent = require('./fetch/content.js'); const fetchSaveContent = require('./fetch/content.js');
const getPosts = require('./sources/getPosts.js')(reddit, args); const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args); const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
@ -42,7 +42,7 @@ Promise.resolve().then(async () => {
const infoUserPosts = await attachContentInfo(curatedUserPosts); const infoUserPosts = await attachContentInfo(curatedUserPosts);
await ep.open(); await ep.open();
await Promise.all(Object.values(infoUserPosts).map(user => fetchContent(user, ep))); await Promise.all(Object.values(infoUserPosts).map(user => fetchSaveContent(user, ep)));
await ep.close(); await ep.close();
}).catch(error => { }).catch(error => {
return console.error(error); return console.error(error);

View File

@ -8,6 +8,7 @@ const save = require('../save/save.js');
const textToStream = require('../save/textToStream.js'); const textToStream = require('../save/textToStream.js');
const saveMeta = require('../save/meta.js'); const saveMeta = require('../save/meta.js');
const mux = require('../save/mux.js'); const mux = require('../save/mux.js');
const writeToIndex = require('../save/writeToIndex.js');
async function getStreams(item, post) { async function getStreams(item, post) {
if (item.self) { if (item.self) {
@ -24,28 +25,7 @@ async function getStreams(item, post) {
return null; return null;
} }
async function fetchContent(user, ep) { async function addMeta(filepath, ep, item, post, user) {
await Promise.map(user.posts, async (post) => {
const items = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const streams = await getStreams(item, post);
// no streams, ignore item
if (streams.length <= 0) {
return accItems;
}
const type = item.type.split('/')[0];
const filepath = post.content.album
? interpolate(config.library.album[type], user, post, item)
: interpolate(config.library[type], user, post, item);
const sourcePaths = await save(filepath, streams, item, post);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, user, post, item); const interpolatedValue = interpolate(value, user, post, item);
@ -55,16 +35,43 @@ async function fetchContent(user, ep) {
if (Object.keys(meta).length > 0) { if (Object.keys(meta).length > 0) {
await saveMeta(filepath, meta, ep); await saveMeta(filepath, meta, ep);
} }
}
function getFilepath(item, post, user) {
const type = item.type.split('/')[0];
return post.content.album
? interpolate(config.library.album[type], user, post, item)
: interpolate(config.library[type], user, post, item);
}
async function fetchSaveContent(user, ep) {
const posts = await Promise.map(user.posts, async (post) => {
await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const streams = await getStreams(item, post);
// no streams, ignore item
if (streams.length <= 0) {
return accItems;
}
const filepath = getFilepath(item, post, user);
const sourcePaths = await save(filepath, streams, item, post);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, ep, item, post, user);
return sourcePaths; return sourcePaths;
}, []); }, []);
console.log(items); return post;
const filename = interpolate(config.library.index.file, user, post);
const entry = `${interpolate(config.library.index.entry, user, post, null, false)}\n`;
await fs.appendFile(filename, config.library.index.unique ? `${post.hash} ${entry}` : entry);
}); });
return writeToIndex(posts, user);
} }
module.exports = fetchContent;
module.exports = fetchSaveContent;

View File

@ -6,11 +6,9 @@ const url = require('url');
const dateFns = require('date-fns'); const dateFns = require('date-fns');
const mime = require('mime-types'); const mime = require('mime-types');
function interpolate(pattern, user, post, item, strip = true) { function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) {
const dateFormat = config.library.dateFormat || 'YYYYMMDD';
const vars = { const vars = {
$base: config.library.base $base: config.library.base,
}; };
if (user) { if (user) {
@ -29,7 +27,7 @@ function interpolate(pattern, user, post, item, strip = true) {
$profileId: user.profile.id, $profileId: user.profile.id,
$profileTitle: user.profile.title, $profileTitle: user.profile.title,
$profileDescription: user.profile.description, $profileDescription: user.profile.description,
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : '' $profileOver18: user.profile.over18 ? config.library.booleans.over18 : '',
}); });
} }
} }
@ -41,10 +39,11 @@ function interpolate(pattern, user, post, item, strip = true) {
$postUser: post.user, $postUser: post.user,
$postDate: dateFns.format(post.datetime, dateFormat), $postDate: dateFns.format(post.datetime, dateFormat),
$postIndex: post.index + config.library.indexOffset, $postIndex: post.index + config.library.indexOffset,
$postHash: post.hash,
$url: post.url, $url: post.url,
$subreddit: post.subreddit, $subreddit: post.subreddit,
$hostLabel: post.host.label, $hostLabel: post.host.label,
$hostId: post.host.id $hostId: post.host.id,
}); });
if (post.content.album) { if (post.content.album) {
@ -52,7 +51,7 @@ function interpolate(pattern, user, post, item, strip = true) {
$albumId: post.content.album.id, $albumId: post.content.album.id,
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength), $albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength),
$albumDescription: post.content.album.description, $albumDescription: post.content.album.description,
$albumDate: dateFns.format(post.content.album.datetime, dateFormat) $albumDate: dateFns.format(post.content.album.datetime, dateFormat),
}); });
} }
} }
@ -66,11 +65,11 @@ function interpolate(pattern, user, post, item, strip = true) {
$itemIndex: item.index + config.library.indexOffset, $itemIndex: item.index + config.library.indexOffset,
$extracted: item.extracted ? config.library.booleans.extracted : '', $extracted: item.extracted ? config.library.booleans.extracted : '',
$preview: item.preview ? config.library.booleans.preview : '', $preview: item.preview ? config.library.booleans.preview : '',
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname) $ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname),
}); });
} }
return Object.entries(vars).reduce((acc, [key, value], index) => { return Object.entries(vars).reduce((acc, [key, value]) => {
// substitute slashes for filesystem compatability // substitute slashes for filesystem compatability
if (key !== '$base' && strip) { if (key !== '$base' && strip) {
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute); value = (value || '').toString().replace(/\//g, config.library.slashSubstitute);
@ -78,6 +77,6 @@ function interpolate(pattern, user, post, item, strip = true) {
return acc.replace(key, value); return acc.replace(key, value);
}, pattern); }, pattern);
}; }
module.exports = interpolate; module.exports = interpolate;