Fetching and storing comments for self posts.

This commit is contained in:
2019-11-05 01:58:00 +01:00
parent 804867626f
commit e175ce38c8
11 changed files with 272 additions and 149 deletions

View File

@@ -67,7 +67,7 @@ async function getCompletePosts() {
const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);
return attachContentInfo(curatedUserPosts);
return attachContentInfo(curatedUserPosts, reddit);
}
async function getDirectContent(links, ep) {

View File

@@ -2,8 +2,9 @@
const crypto = require('crypto');
const hashPost = post => {
return crypto.createHash('md5').update(post.id + post.subreddit_id + post.created_utc + post.title).digest('hex');
};
const hashPost = post => crypto
.createHash('md5')
.update(post.id + post.subreddit_id + post.created_utc + post.title)
.digest('hex');
module.exports = hashPost;

View File

@@ -64,6 +64,8 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
score: post.score,
preview: post.preview ? post.preview.images.map(image => image.source) : null,
host,
direct: post.direct,
comments: post.comments,
hash: hashPost(post),
};
@@ -135,7 +137,13 @@ function curatePost(acc, post, user, index, indexed, ignoreIds, processed, args)
processed.add(host.id);
}
return { ...acc, posts: [...acc.posts, curatedPost] };
return {
...acc,
posts: [
...acc.posts,
curatedPost,
],
};
}
const curatePosts = (userPosts, ignoreIdsArray, args) => Object.values(userPosts).reduce((accPosts, user) => {

View File

@@ -11,10 +11,46 @@ const textToStream = require('../save/textToStream.js');
const saveMeta = require('../save/meta.js');
const mux = require('../save/mux.js');
const writeToIndex = require('../save/writeToIndex.js');
const yaml = require('js-yaml');
function curateComments(comments) {
return comments.map((comment) => {
const curatedComment = {
text: comment.body,
author: comment.author,
score: comment.score,
date: comment.datetime,
};
if (comment.replies.length) {
return {
...curatedComment,
replies: curateComments(comment.replies),
};
}
return curatedComment;
});
}
function selfPostToText(item, post) {
const curatedPost = {
title: post.title,
text: post.text,
author: post.user.name,
subreddit: post.subreddit,
score: post.score,
datetime: post.datetime,
url: post.url,
comments: curateComments(item.comments),
};
return yaml.safeDump(curatedPost);
}
async function getStreams(item, post) {
if (item.self) {
return [textToStream(post.text)];
return [textToStream(selfPostToText(item, post))];
}
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
@@ -97,7 +133,7 @@ async function fetchSaveUserContent(user, ep, args) {
}
async function fetchSaveDirectContent(content, host, ep) {
const data = await Promise.reduce(content.items, async (accItems, originalItem, index) => {
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const streams = await getStreams(item, null);

View File

@@ -5,7 +5,7 @@ const Promise = require('bluebird');
const methods = require('../methods/methods.js');
const attachContentInfo = users => Promise.reduce(Object.values(users), async (accUsers, user) => ({
const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users), async (accUsers, user) => ({
...accUsers,
[user.name]: {
...user,
@@ -17,14 +17,27 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a
}
try {
return [...accPosts, { ...post, content: await methods[post.host.method](post.host, post) }];
return [
...accPosts,
{
...post,
content: await methods[post.host.method](post.host, post, reddit),
},
];
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
if (config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
return [...accPosts, { ...post, previewFallback: true, content: await methods.redditPreview(post.host, post) }];
return [
...accPosts,
{
...post,
previewFallback: true,
content: await methods.redditPreview(post.host, post),
},
];
}
return accPosts;
@@ -33,8 +46,8 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a
},
}), {});
async function getInfo(host) {
return methods[host.method](host);
async function getInfo(host, reddit) {
return methods[host.method](host, null, reddit);
}
module.exports = {

View File

@@ -1,7 +1,39 @@
'use strict';
function self(host, post) {
console.log(post);
function curateComments(comments) {
return comments.map(comment => ({
id: comment.id,
url: `https://reddit.com${comment.permalink}`,
author: comment.author.name,
body: comment.body,
html: comment.body_html,
score: comment.score,
datetime: new Date(comment.created * 1000),
edited: comment.edited,
controversiality: comment.controversiality,
gilded: comment.gilded,
stickied: comment.stickied,
distinguished: comment.distinguished,
locked: comment.locked,
archived: comment.archived,
parent: comment.parent_id,
replies: comment.replies ? curateComments(comment.replies) : [],
}));
}
async function getFullPost(postId, reddit) {
return reddit
.getSubmission(postId)
.expandReplies({
limit: Infinity,
depth: Infinity,
})
.fetch();
}
async function self(host, originalPost, reddit) {
const post = await getFullPost(originalPost.id, reddit) || originalPost;
const curatedComments = curateComments(post.comments);
return {
album: null,
@@ -10,11 +42,13 @@ function self(host, post) {
url: post.url,
title: post.title,
text: post.text,
author: post.author.name,
datetime: post.datetime,
comments: curatedComments,
type: 'text/plain',
self: true,
original: post,
}]
}],
};
}

View File

@@ -1,6 +1,6 @@
'use strict';
const Readable = require('stream').Readable;
const { Readable } = require('stream');
function textToStream(text) {
const stream = new Readable();
@@ -9,6 +9,6 @@ function textToStream(text) {
stream.push(null);
return stream;
};
}
module.exports = textToStream;

View File

@@ -5,7 +5,7 @@ const Promise = require('bluebird');
const getIndex = require('./getIndex.js');
const curateUser = require('../curate/user.js');
const getUser = async (username, reddit) => {
async function getUser(username, reddit) {
try {
const user = await reddit.getUser(username).fetch();
@@ -22,7 +22,9 @@ const getUser = async (username, reddit) => {
const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
return Promise.reduce(postIds, (accUserPosts, postId) => Promise.resolve().then(async () => {
const post = await reddit.getSubmission(postId).fetch();
const post = await reddit
.getSubmission(postId)
.fetch();
post.direct = true;
@@ -34,13 +36,31 @@ const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
// don't attempt to fetch deleted user
if (post.author.name === '[deleted]') {
return { ...accUserPosts, '[deleted]': { name: '[deleted]', deleted: true, posts: [post] } };
return {
...accUserPosts,
'[deleted]': {
name: '[deleted]',
deleted: true,
posts: [post],
},
};
}
const user = await getUser(post.author.name, reddit);
const { profile, posts: indexed } = await getIndex(user);
return { ...accUserPosts, [post.author.name]: { ...user, posts: [post], indexed: { profile, original: indexed, updated: [] } } };
return {
...accUserPosts,
[post.author.name]: {
...user,
posts: [post],
indexed: {
profile,
original: indexed,
updated: [],
},
},
};
}), userPosts);
};

View File

@@ -25,10 +25,12 @@ async function getUser(username, reddit) {
async function getPosts(username, reddit, args) {
try {
const submissions = await reddit.getUser(username).getSubmissions({
sort: args.sort,
limit: Infinity,
});
const submissions = await reddit
.getUser(username)
.getSubmissions({
sort: args.sort,
limit: Infinity,
});
console.log(`Fetched ${submissions.length} submissions for '${username}' (https://reddit.com/user/${username})`);