Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:57 +02:00
parent 20cb522689
commit b9a7e4b83a
26 changed files with 572 additions and 440 deletions

View File

@ -2,26 +2,41 @@
module.exports = { module.exports = {
library: { library: {
base: 'output/$user/', base: {
image: '$base$postDate - $preview$itemId - $postTitle$ext', posts: 'output/{user.name}/',
video: '$base$postDate - $preview$itemId - $postTitle$ext', direct: 'output/{host.name}/',
text: '$base$postDate - $preview$postId - $postTitle',
album: {
image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext',
extractSingleItem: true,
}, },
posts: {
image: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}',
video: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}',
text: '{base.posts}{post.date} - {tags.preview}{post.id} - {post.title}',
album: {
image: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}',
video: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}',
},
},
direct: {
image: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}',
video: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}',
text: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}',
album: {
image: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}',
video: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}',
},
},
extractSingleAlbumItem: true,
profile: { profile: {
image: '$base$userCreated - profile$ext', image: '{base.posts}{user.created} - profile{ext}',
description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)', description: '{base.posts}{user.created} - profile ({tags.verified}{tags.verifiedEmail}{tags.gold}{tags.over18})',
avoidAvatar: true, avoidAvatar: true,
}, },
index: { index: {
file: '$base/index', file: '{base.posts}index',
format: 'tsv',
keys: ['postId', 'subreddit', 'postDate', 'url', 'hostId', 'postTitle'],
}, },
booleans: { meta: {
comment: '{item.description}',
},
tags: {
extracted: 'extracted-', extracted: 'extracted-',
preview: 'preview-', preview: 'preview-',
verified: '✔', verified: '✔',
@ -29,9 +44,6 @@ module.exports = {
gold: '★', gold: '★',
over18: '♥', over18: '♥',
}, },
meta: {
comment: '$itemDescription',
},
dateFormat: 'YYYYMMDD', dateFormat: 'YYYYMMDD',
truncate: { truncate: {
limit: 250, limit: 250,

7
package-lock.json generated
View File

@ -528,7 +528,7 @@
"integrity": "sha1-i9F7rB5D3qzYHZrazQ8eJMaQEA8=", "integrity": "sha1-i9F7rB5D3qzYHZrazQ8eJMaQEA8=",
"requires": { "requires": {
"exiftool.exe": "10.53", "exiftool.exe": "10.53",
"exiftool.pl": "10.53", "exiftool.pl": "^10.53.0",
"platform-dependent-modules": "0.0.14" "platform-dependent-modules": "0.0.14"
} }
}, },
@ -2204,6 +2204,11 @@
"string-width": "^2.1.1" "string-width": "^2.1.1"
} }
}, },
"template-format": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/template-format/-/template-format-1.2.4.tgz",
"integrity": "sha512-+8ItNMtMTBbsEHyPR1l7Ke1WZfl91PAcoTvwAcx5U28CRLd7ylzDLazv0kuDTzNmdq/RAOnsxFVWzr4QwVIFVg=="
},
"text-table": { "text-table": {
"version": "0.2.0", "version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",

View File

@ -44,6 +44,7 @@
"object.omit": "^3.0.0", "object.omit": "^3.0.0",
"object.pick": "^1.3.0", "object.pick": "^1.3.0",
"snoowrap": "^1.15.2", "snoowrap": "^1.15.2",
"template-format": "^1.2.4",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"yargs": "^11.0.0" "yargs": "^11.0.0"
}, },

View File

@ -14,10 +14,11 @@ require('array.prototype.flatten').shim();
const reddit = new Snoowrap(config.reddit.api); const reddit = new Snoowrap(config.reddit.api);
const args = require('./cli.js')(); const args = require('./cli.js')();
const dissectLink = require('./dissectLink.js');
const curatePosts = require('./curate/posts.js'); const curatePosts = require('./curate/posts.js');
const attachContentInfo = require('./fetch/info.js'); const { attachContentInfo, getInfo } = require('./fetch/info.js');
const fetchSaveContent = require('./fetch/content.js'); const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js');
const getPosts = require('./sources/getPosts.js')(reddit, args); const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args); const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);
@ -34,7 +35,7 @@ async function getFileContents(location, label) {
} }
} }
async function getCompleteUserPosts() { async function getCompletePosts() {
let userPosts = {}; let userPosts = {};
let ignoreIds = []; let ignoreIds = [];
let usernames = args.users || []; let usernames = args.users || [];
@ -69,17 +70,51 @@ async function getCompleteUserPosts() {
return attachContentInfo(curatedUserPosts); return attachContentInfo(curatedUserPosts);
} }
async function getDirectContent(link, ep) {
const host = dissectLink(link);
const info = await getInfo(host);
console.log(info);
return fetchSaveDirectContent(info, host, ep);
/*
console.log(info);
await Promise.all(info.items.map(async (item, index) => {
const stream = await fetchItem(item.url);
if (info.album) {
const filepath = `./output/${host.label}/${host.id} - ${info.album.title}/${index + 1} - ${item.id}${path.extname(url.parse(item.url).pathname)}`;
console.log(filepath);
return saveItem(filepath, stream, item);
}
const filepath = `./output/${host.label}/${item.id}${path.extname(url.parse(item.url).pathname)}`;
console.log(filepath);
return saveItem(filepath, stream, item);
}));
*/
}
function fetchSavePosts(userPosts, ep) { function fetchSavePosts(userPosts, ep) {
// don't map to apply concurrency limit and reduce network stress // don't map to apply concurrency limit and reduce network stress
return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveContent(user, ep, args), null); return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveUserContent(user, ep, args), null);
} }
async function initApp() { async function initApp() {
try { try {
const userPosts = await getCompleteUserPosts();
const ep = new exiftool.ExiftoolProcess(exiftoolBin); const ep = new exiftool.ExiftoolProcess(exiftoolBin);
if (args.fetch) {
await getDirectContent(args.fetch, ep);
return;
}
const userPosts = await getCompletePosts();
await ep.open(); await ep.open();
await fetchSavePosts(userPosts, ep); await fetchSavePosts(userPosts, ep);
await ep.close(); await ep.close();
@ -94,8 +129,6 @@ async function initApp() {
console.log('\x1b[31m%s\x1b[0m', error.message); console.log('\x1b[31m%s\x1b[0m', error.message);
} }
} }
return true;
} }
initApp(); initApp();

View File

@ -24,6 +24,11 @@ function getArgs() {
describe: 'Load reddit post IDs from file', describe: 'Load reddit post IDs from file',
type: 'string', type: 'string',
}) })
.option('direct', {
describe: 'Get content directly from imgur and other hosts',
type: 'string',
alias: 'fetch',
})
.option('limit', { .option('limit', {
describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts', describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts',
type: 'number', type: 'number',

View File

@ -14,7 +14,7 @@ const writeToIndex = require('../save/writeToIndex.js');
async function getStreams(item, post) { async function getStreams(item, post) {
if (item.self) { if (item.self) {
return [textToStream(item.text)]; return [textToStream(post.text)];
} }
const sources = item.mux ? [item.url].concat(item.mux) : [item.url]; const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
@ -33,7 +33,7 @@ async function addMeta(filepath, item, post, user, ep) {
} }
const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => {
const interpolatedValue = interpolate(value, user, post, item); const interpolatedValue = interpolate(value, item, null, null, post, user);
return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc; return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc;
}, {}); }, {});
@ -45,15 +45,25 @@ async function addMeta(filepath, item, post, user, ep) {
return false; return false;
} }
function getFilepath(item, post, user) { function getFilepath(item, content, host, post, user) {
const type = item.type.split('/')[0]; const type = item.type.split('/')[0];
return post.content.album if (post && content.album) {
? interpolate(config.library.album[type], user, post, item) return interpolate(config.library.posts.album[type], item, content, host, post, user);
: interpolate(config.library[type], user, post, item); }
if (post) {
return interpolate(config.library.posts[type], item, content, host, post, user);
}
if (content.album) {
return interpolate(config.library.direct.album[type], item, content, host);
}
return interpolate(config.library.direct[type], item, content, host);
} }
async function fetchSaveContent(user, ep, args) { async function fetchSaveUserContent(user, ep, args) {
const profilePaths = await saveProfileDetails(user, args); const profilePaths = await saveProfileDetails(user, args);
const posts = await Promise.map(user.posts, async (post) => { const posts = await Promise.map(user.posts, async (post) => {
@ -66,7 +76,7 @@ async function fetchSaveContent(user, ep, args) {
return accItems; return accItems;
} }
const filepath = getFilepath(item, post, user); const filepath = getFilepath(item, post.content, post.host, post, user);
const sourcePaths = await save(filepath, streams, item, post); const sourcePaths = await save(filepath, streams, item, post);
if (item.mux) { if (item.mux) {
@ -86,4 +96,30 @@ async function fetchSaveContent(user, ep, args) {
return writeToIndex(posts, profilePaths, user, args); return writeToIndex(posts, profilePaths, user, args);
} }
module.exports = fetchSaveContent; async function fetchSaveDirectContent(content, host, ep) {
const data = await Promise.reduce(content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const streams = await getStreams(item, null);
// no streams, ignore item
if (!streams || streams.length === 0) {
return accItems;
}
const filepath = getFilepath(item, content, host, null, null);
const sourcePaths = await save(filepath, streams, item, null);
if (item.mux) {
await mux(filepath, sourcePaths, item);
}
await addMeta(filepath, item, null, null, ep);
return sourcePaths;
}, []);
}
module.exports = {
fetchSaveUserContent,
fetchSaveDirectContent,
};

View File

@ -2,6 +2,7 @@
const config = require('config'); const config = require('config');
const Promise = require('bluebird'); const Promise = require('bluebird');
const methods = require('../methods/methods.js'); const methods = require('../methods/methods.js');
const attachContentInfo = users => Promise.reduce(Object.values(users), async (accUsers, user) => ({ const attachContentInfo = users => Promise.reduce(Object.values(users), async (accUsers, user) => ({
@ -16,14 +17,14 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a
} }
try { try {
return [...accPosts, { ...post, content: await methods[post.host.method](post) }]; return [...accPosts, { ...post, content: await methods[post.host.method](post.host, post) }];
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`);
if (config.fetch.archives.preview && post.preview) { if (config.fetch.archives.preview && post.preview) {
console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`);
return [...accPosts, { ...post, previewFallback: true, content: await methods.redditPreview(post) }]; return [...accPosts, { ...post, previewFallback: true, content: await methods.redditPreview(post.host, post) }];
} }
return accPosts; return accPosts;
@ -32,4 +33,11 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a
}, },
}), {}); }), {});
module.exports = attachContentInfo; async function getInfo(host) {
return methods[host.method](host);
}
module.exports = {
attachContentInfo,
getInfo,
};

View File

@ -3,30 +3,32 @@
const config = require('config'); const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
function fetchItem(url, attempt, post) { async function fetchItem(url, attempt, post) {
function retry(error) { async function retry(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post.permalink})`); console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
if(attempt < config.fetch.retries) { if (attempt < config.fetch.retries) {
console.log('Retrying...'); console.log('Retrying...');
return fetchItem(url, ++attempt, post); return fetchItem(url, attempt + 1, post);
} }
return null; return null;
}; }
return fetch(url).then(res => { try {
if(!res.ok) { const res = await fetch(url);
if (!res.ok) {
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`); throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
} }
return res; console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`);
}).then(res => {
console.log(`Fetched '${url}' (${post.permalink})`);
return res.body; return res.body;
}).catch(retry); } catch (error) {
}; return retry(error);
}
}
module.exports = fetchItem; module.exports = fetchItem;

View File

@ -5,79 +5,130 @@ const path = require('path');
const url = require('url'); const url = require('url');
const dateFns = require('date-fns'); const dateFns = require('date-fns');
const mime = require('mime-types'); const mime = require('mime-types');
const format = require('template-format');
function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) { function interpolate(pattern, item = null, content = null, host = null, post = null, user = null, strip = true, dateFormat = config.library.dateFormat) {
const vars = { const data = {
$base: config.library.base, tags: {},
}; };
if (user) { if (item) {
Object.assign(vars, { Object.assign(data, {
$user: user.name, item: {
$username: user.name, id: item.id,
$userId: user.id, title: item.title && item.title.slice(0, config.library.titleLength),
$userCreated: dateFns.format(user.created, dateFormat), description: item.description,
$userVerified: user.verified ? config.library.booleans.verified : '', date: dateFns.format(item.datetime, dateFormat),
$userVerifiedEmail: user.verifiedEmail ? config.library.booleans.verifiedEmail : '', index: item.index + config.library.indexOffset,
$userGold: user.gold ? config.library.booleans.gold : '', },
ext: item.type
? `.${mime.extension(item.type)}`
: path.extname(url.parse(item.url).pathname),
tags: {
...data.tags,
extracted: item.extracted && config.library.tags.extracted,
},
}); });
}
if (user.profile) { if (host) {
Object.assign(vars, { Object.assign(data, {
$profileId: user.profile.id, host: {
$profileTitle: user.profile.title, id: host.id,
$profileDescription: user.profile.description, label: host.label,
$profileOver18: user.profile.over18 ? config.library.booleans.over18 : '', name: host.label,
}); },
} });
}
if (content && content.album) {
Object.assign(data, {
album: {
id: content.album.id,
title: content.album.title && content.album.title.slice(0, config.library.titleLength),
description: content.album.description,
date: dateFns.format(content.album.datetime, dateFormat),
},
});
} }
if (post) { if (post) {
Object.assign(vars, { Object.assign(data, {
$postId: post.id, post: {
$postTitle: (post.title || '').slice(0, config.library.titleLength), id: post.id,
$postUser: post.user, title: post.title && post.title.slice(0, config.library.titleLength),
$postDate: dateFns.format(post.datetime, dateFormat), url: post.url,
$postIndex: post.index + config.library.indexOffset, user: post.user,
$postHash: post.hash, date: dateFns.format(post.datetime, dateFormat),
$url: post.url, index: post.index + config.library.indexOffset,
$score: post.score, hash: post.hash,
$subreddit: post.subreddit, score: post.score,
$hostLabel: post.host.label, subreddit: post.subreddit,
$hostId: post.host.id, },
});
}
if (user) {
Object.assign(data, {
user: {
name: user.name,
username: user.name,
id: user.id,
created: dateFns.format(user.created, dateFormat),
},
tags: {
...data.tags,
verified: user.verified && config.library.tags.verified,
verifiedEmail: user.verifiedEmail && config.library.tags.verifiedEmail,
gold: user.gold && config.library.tags.gold,
},
}); });
if (post.content.album) { if (user.profile) {
Object.assign(vars, { Object.assign(data, {
$albumId: post.content.album.id, profile: {
$albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength), id: user.profile.id,
$albumDescription: post.content.album.description, title: user.profile.title,
$albumDate: dateFns.format(post.content.album.datetime, dateFormat), description: user.profile.description,
},
tags: {
...data.tags,
over18: user.profile.over18 && config.library.tags.over18,
},
}); });
} }
} }
if (item) { const strippedData = strip
Object.assign(vars, { ? Object.entries(data).reduce((acc, [key, value]) => {
$itemId: item.id, if (typeof value === 'string') {
$itemTitle: (item.title || '').slice(0, config.library.titleLength), return {
$itemDescription: item.description, ...acc,
$itemDate: dateFns.format(item.datetime, dateFormat), [key]: value && value.toString().replace(/\//g, config.library.slashSubstitute),
$itemIndex: item.index + config.library.indexOffset, };
$extracted: item.extracted ? config.library.booleans.extracted : '', }
$preview: item.preview ? config.library.booleans.preview : '',
$ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname),
});
}
return Object.entries(vars).reduce((acc, [key, value]) => { return {
// substitute slashes for filesystem compatability ...acc,
if (key !== '$base' && strip) { [key]: Object.entries(value).reduce((subacc, [subkey, subvalue]) => ({
value = (value || '').toString().replace(/\//g, config.library.slashSubstitute); ...subacc,
} [subkey]: subvalue && subvalue.toString().replace(/\//g, config.library.slashSubstitute),
}), {}),
};
}, {})
: data;
return acc.replace(key, value); const base = {
}, pattern); posts: format(config.library.base.posts, strippedData),
direct: format(config.library.base.direct, strippedData),
};
const interpolated = format(pattern, {
base,
...strippedData,
});
return interpolated;
} }
module.exports = interpolate; module.exports = interpolate;

View File

@ -1,32 +1,30 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const base = 'https://www.erome.com/'; const base = 'https://www.erome.com/';
function erome(post) { async function erome(host) {
return fetch(`${base}a/${post.host.id}`).then(res => { const res = await fetch(`${base}a/${host.id}`);
if(res.ok) {
return res.text();
}
throw new Error(`Unable to retrieve info for Erome album '${post.host.id}' :(`); if (res.ok) {
}).then(res => { throw new Error(`Unable to retrieve info for Erome album '${host.id}' :(`);
const $ = cheerio.load(res); }
const videoUrls = $('video').toArray().map(videoEl => {
const sourceHd = $(videoEl).find('source[label="HD"]');
const sourceSd = $(videoEl).find('source[label="SD"]');
console.log(sourceHd.attr('src')); const html = await res.text();
return sourceHd ? base + sourceHd.attr('src') : base + sourceSd.attr('src'); const $ = cheerio.load(html);
}); const videoUrls = $('video').toArray().map((videoEl) => {
const sourceHd = $(videoEl).find('source[label="HD"]');
const sourceSd = $(videoEl).find('source[label="SD"]');
console.log(videoUrls); console.log(sourceHd.attr('src'));
return sourceHd ? base + sourceHd.attr('src') : base + sourceSd.attr('src');
}); });
};
console.log(videoUrls);
}
module.exports = erome; module.exports = erome;

View File

@ -1,42 +1,38 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config'); const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
function eroshareAlbum(post) { async function eroshareAlbum(host) {
return fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/${post.host.id}`).then(res => { const res = await fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/${host.id}`);
if(res.ok) {
return res.text();
}
return Promise.reject(`Unable to recover Eroshare album or item '${post.host.id}' :(`); if (!res.ok) {
}).then(res => { throw new Error(`Unable to recover Eroshare album or item '${host.id}' :(`);
const data = JSON.parse(res.match(/var album = .*/)[0].slice(12, -1)); }
const extract = config.library.album.extractSingleItem && data.items.length === 1;
return { const html = await res.text();
album: extract ? null : { const data = JSON.parse(html.match(/var album = .*/)[0].slice(12, -1));
id: data.slug, const extract = config.library.extractSingleAlbumItem && data.items.length === 1;
title: data.title,
datetime: new Date(data.created_at) return {
}, album: extract ? null : {
items: data.items.map(item => { id: data.slug,
return { title: data.title,
extracted: extract, datetime: new Date(data.created_at),
id: item.slug, },
url: item.type === 'Image' ? item.url_full_protocol : item.url_mp4, items: data.items.map(item => ({
title: data.title, extracted: extract,
description: item.description, id: item.slug,
type: item.type === 'Image' ? 'image/jpeg' : 'video/mp4', url: item.type === 'Image' ? item.url_full_protocol : item.url_mp4,
datetime: new Date(data.created_at), title: data.title,
width: data.width, description: item.description,
height: data.height, type: item.type === 'Image' ? 'image/jpeg' : 'video/mp4',
original: item datetime: new Date(data.created_at),
}; width: data.width,
}) height: data.height,
}; original: item,
}); })),
}; };
}
module.exports = eroshareAlbum; module.exports = eroshareAlbum;

View File

@ -1,34 +1,30 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const mime = require('mime-types');
function eroshareItem(post) { async function eroshareItem(host, post) {
return fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/i/${post.host.id}`).then(res => { const res = await fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/i/${host.id}`);
if(res.ok) { if (!res.ok) {
return res.text(); throw new Error(`Unable to recover Eroshare item '${host.id}' :(`);
} }
return Promise.reject(`Unable to recover Eroshare item '${post.host.id}' :(`); const html = await res.text();
}).then(res => {
const $ = cheerio.load(res);
const videoElement = $('source[data-default="true"]');
return { const $ = cheerio.load(html);
album: null, const videoElement = $('source[data-default="true"]');
items: [{
id: post.host.id, return {
url: videoElement.attr('src'), album: null,
title: post.title, items: [{
type: videoElement.attr('type'), id: host.id,
datetime: post.datetime, url: videoElement.attr('src'),
original: post title: post ? post.title : null,
}] type: videoElement.attr('type'),
}; datetime: post ? post.datetime : null,
}); original: post || null,
}; }],
};
}
module.exports = eroshareItem; module.exports = eroshareItem;

View File

@ -2,8 +2,8 @@
const fetch = require('node-fetch'); const fetch = require('node-fetch');
async function gfycat(post) { async function gfycat(host) {
const res = await fetch(`https://api.gfycat.com/v1/gfycats/${post.host.id}`); const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.json(); const data = await res.json();
if (data.error) { if (data.error) {

View File

@ -2,52 +2,51 @@
const config = require('config'); const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const mime = require('mime-types'); // const mime = require('mime-types');
async function imgurAlbumApi(post) { async function imgurAlbumApi(host, post) {
return fetch(`https://api.imgur.com/3/album/${post.host.id}`, { const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, {
headers: { headers: {
Authorization: `Client-ID ${config.methods.imgur.clientId}`, Authorization: `Client-ID ${config.methods.imgur.clientId}`,
}, },
}).then(res => res.json()).then((res) => {
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur album '${post.host.id}': ${res.data.error}`);
}
const extract = config.library.album.extractSingleItem && res.data.images.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.data.link}`);
}
return {
album: extract ? null : {
id: res.data.id,
url: res.data.link,
title: res.data.title,
description: res.data.description,
datetime: new Date(res.data.datetime * 1000),
original: res.data,
},
items: res.data.images.map(item => ({
extracted: extract,
id: item.id,
url: item.animated ? item.mp4 : item.link,
title: item.title || (extract ? res.data.title : null),
description: item.description || (extract ? res.data.description : null),
type: item.animated ? 'video/mp4' : item.type,
datetime: item.datetime * 1000,
original: item,
})),
};
}); });
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`);
}
const { data } = await res.json();
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
}
return {
album: extract ? null : {
id: data.id,
url: data.link,
title: data.title,
description: data.description,
datetime: new Date(data.datetime * 1000),
original: data,
},
items: data.images.map(item => ({
extracted: extract,
id: item.id,
url: item.animated ? item.mp4 : item.link,
title: item.title || data.title || null,
description: item.description || data.description || null,
type: item.animated ? 'video/mp4' : item.type,
datetime: item.datetime * 1000,
original: item,
})),
};
} }
async function imgurAlbum(post) { /*
return imgurAlbumApi(post); * as of late 2019, imgur requires log in to view albums and gallery images
async function imgurAlbum(host, post) {
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/a/${post.host.id}`); const res = await fetch(`https://imgur.com/a/${post.host.id}`);
const html = await res.text(); const html = await res.text();
@ -84,7 +83,7 @@ async function imgurAlbum(post) {
datetime: new Date(item.datetime), datetime: new Date(item.datetime),
})), })),
}; };
*/
} }
*/
module.exports = imgurAlbum; module.exports = imgurAlbumApi;

View File

@ -3,33 +3,35 @@
const config = require('config'); const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
async function imgurImageApi(post) { async function imgurImageApi(host) {
return fetch(`https://api.imgur.com/3/image/${post.host.id}`, { const res = await fetch(`https://api.imgur.com/3/image/${host.id}`, {
headers: { headers: {
Authorization: `Client-ID ${config.methods.imgur.clientId}`, Authorization: `Client-ID ${config.methods.imgur.clientId}`,
}, },
}).then(res => res.json()).then((res) => {
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur image '${post.host.id}': ${res.data.error}`);
}
return {
album: null,
items: [{
id: res.data.id,
url: res.data.animated ? res.data.mp4 : res.data.link,
title: res.data.title,
description: res.data.description,
type: res.data.animated ? 'video/mp4' : res.data.type,
datetime: new Date(res.data.datetime * 1000),
original: res.data,
}],
};
}); });
const { data } = await res.json();
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`);
}
return {
album: null,
items: [{
id: data.id,
url: data.animated ? data.mp4 : data.link,
title: data.title,
description: data.description,
type: data.animated ? 'video/mp4' : data.type,
datetime: new Date(data.datetime * 1000),
original: data,
}],
};
} }
async function imgurImage(post) { async function imgurImage(host, post) {
return imgurImageApi(post); return imgurImageApi(host, post);
/* /*
* as of late 2019, imgur requires log in to view albums and gallery images * as of late 2019, imgur requires log in to view albums and gallery images

View File

@ -1,31 +1,33 @@
'use strict'; 'use strict';
const self = require('./self'); const erome = require('./erome');
const redditImage = require('./redditImage');
const redditVideo = require('./redditVideo');
const redditPreview = require('./redditPreview');
const imgurImage = require('./imgurImage');
const imgurAlbum = require('./imgurAlbum');
const vidbleImage = require('./vidbleImage');
const vidbleVideo = require('./vidbleVideo');
const vidbleAlbum = require('./vidbleAlbum');
const gfycat = require('./gfycat');
const eroshareAlbum = require('./eroshareAlbum'); const eroshareAlbum = require('./eroshareAlbum');
const eroshareItem = require('./eroshareItem'); const eroshareItem = require('./eroshareItem');
const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage');
const pornhub = require('./pornhub'); const pornhub = require('./pornhub');
const redditImage = require('./redditImage');
const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo');
const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage');
const vidbleVideo = require('./vidbleVideo');
module.exports = { module.exports = {
self, erome,
redditImage,
redditVideo,
redditPreview,
imgurImage,
imgurAlbum,
vidbleImage,
vidbleVideo,
vidbleAlbum,
gfycat,
eroshareAlbum, eroshareAlbum,
eroshareItem, eroshareItem,
gfycat,
imgurAlbum,
imgurImage,
pornhub, pornhub,
redditImage,
redditPreview,
redditVideo,
self,
vidbleAlbum,
vidbleImage,
vidbleVideo,
}; };

View File

@ -2,11 +2,11 @@
const fetch = require('node-fetch'); const fetch = require('node-fetch');
async function pornhub(post) { async function pornhub(host, post) {
const res = await fetch(`https://www.pornhub.com/view_video.php?viewkey=${post.host.id}`); const res = await fetch(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`);
if (res.status !== 200) { if (res.status !== 200) {
throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`); throw new Error(`Could not fetch info PornHub video '${host.id}': '${res.error}'`);
} }
const html = await res.text(); const html = await res.text();
@ -27,11 +27,11 @@ async function pornhub(post) {
return { return {
album: null, album: null,
items: [{ items: [{
id: post.host.id, id: host.id,
url, url,
title: post.title, title: post ? post.title : null,
type: 'video/mp4', type: 'video/mp4',
datetime: post.datetime, datetime: post ? post.datetime : null,
}], }],
}; };
} }

View File

@ -1,21 +1,19 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const mime = require('mime-types'); const mime = require('mime-types');
function redditImage(post) { async function redditImage(host, post) {
return Promise.resolve({ return {
album: null, album: null,
items: [{ items: [{
id: post.host.id || post.id, id: host.id || post.id,
url: post.url, url: post.url,
title: post.title, title: post.title,
datetime: post.datetime, datetime: post.datetime,
type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg', type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg',
original: post original: post,
}] }],
}); };
}; }
module.exports = redditImage; module.exports = redditImage;

View File

@ -1,33 +1,26 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const path = require('path');
const fetch = require('node-fetch');
const mime = require('mime-types'); const mime = require('mime-types');
const urlPattern = require('url-pattern');
function redditPreview(post) { async function redditPreview(host, post) {
return Promise.resolve({ return {
album: post.preview.length > 1 ? { album: post.preview.length > 1 ? {
id: post.host.id || post.id, id: post.host.id || post.id,
url: post.url, url: post.url,
title: post.title, title: post.title,
datetime: post.datetime, datetime: post.datetime,
original: post original: post,
} : null, } : null,
items: post.preview.map(image => { items: post.preview.map(image => ({
return { id: post.host.id || post.id,
id: post.host.id || post.id, url: image.url,
url: image.url, title: post.title,
title: post.title, datetime: post.datetime,
datetime: post.datetime, type: mime.lookup(image.url.split('?')[0]),
type: mime.lookup(image.url.split('?')[0]), preview: true,
preview: true, original: post,
original: post })),
}; };
}) }
});
};
module.exports = redditPreview; module.exports = redditPreview;

View File

@ -1,38 +1,35 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const fs = require('fs-extra');
function redditVideo(post) { async function redditVideo(host, post) {
return fetch(`${post.permalink}.json`).then(res => res.json()).then(res => { const res = await fetch(`${post.permalink}.json`);
return res[0].data.children[0].data.media.reddit_video.fallback_url; const [{ data }] = await res.json();
}).then(videoUrl => {
const audioUrl = videoUrl.split('/').slice(0, -1).join('/') + '/audio';
return fetch(audioUrl, { const videoUrl = data.children[0].data.media.reddit_video.fallback_url;
method: 'HEAD' const audioUrl = `${videoUrl.split('/').slice(0, -1).join('/')}/audio`;
}).then(res => {
const item = {
album: null,
items: [{
id: post.host.id || post.id,
url: videoUrl,
title: post.title,
datetime: post.datetime,
type: 'video/mp4',
original: post
}]
};
if(res.status === 200) { const audioRes = await fetch(audioUrl, {
item.items[0].mux = [audioUrl]; method: 'HEAD',
}
return item;
});
}); });
};
const item = {
album: null,
items: [{
id: post.host.id || post.id,
url: videoUrl,
title: post.title,
datetime: post.datetime,
type: 'video/mp4',
original: post,
}],
};
if (audioRes.status === 200) {
item.items[0].mux = [audioUrl];
}
return item;
}
module.exports = redditVideo; module.exports = redditVideo;

View File

@ -1,55 +1,52 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config'); const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const urlPattern = require('url-pattern'); const UrlPattern = require('url-pattern');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const mime = require('mime-types'); const mime = require('mime-types');
const pattern = new urlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)'); const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)');
function vidbleAlbum(post) { async function vidbleAlbum(host, post) {
return fetch(`https://www.vidble.com/album/${post.host.id}`).then(res => { const res = await fetch(`https://www.vidble.com/album/${host.id}`);
if(res.status !== 200) {
throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`);
}
return res.text(); if (res.status !== 200) {
}).then(res => { throw new Error(`Could not fetch info for vidble album '${host.id}': '${res.error}'`);
const $ = cheerio.load(res); }
const title = $('h2').text(); const html = await res.text();
const imgUrls = $('img.img2').toArray().map(img => `https://vidble.com${img.attribs.src || img.attribs['data-original']}`); const $ = cheerio.load(html);
const extract = config.library.album.extractSingleItem && imgUrls.length === 1; const title = $('h2').text();
const imgUrls = $('img.img2').toArray().map(img => `https://vidble.com${img.attribs.src || img.attribs['data-original']}`);
const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1;
if(extract) { if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`); console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`);
} }
return { return {
album: extract ? null : { album: extract ? null : {
id: post.host.id, id: host.id,
url: post.url, url: post ? post.url : null,
title: post.title, title: post ? post.title : title,
datetime: post.datetime datetime: post ? post.datetime : null,
}, },
items: imgUrls.map(url => { items: imgUrls.map((url) => {
const components = pattern.match(url); const components = pattern.match(url);
const id = components.id.replace('_med', ''); const id = components.id.replace('_med', '');
const mimetype = mime.lookup(components.ext); const mimetype = mime.lookup(components.ext);
return { return {
extracted: extract, extracted: extract,
id: id, id,
url: `https://vidble.com/${id}.${components.ext}`, url: `https://vidble.com/${id}.${components.ext}`,
type: mimetype, type: mimetype,
datetime: post.datetime datetime: post.datetime,
}; };
}) }),
}; };
}); }
};
module.exports = vidbleAlbum; module.exports = vidbleAlbum;

View File

@ -1,34 +1,30 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const path = require('path');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const $ = require('cheerio'); const $ = require('cheerio');
const mime = require('mime-types'); const mime = require('mime-types');
function vidbleImage(post) { async function vidbleImage(host, post) {
return fetch(`https://vidble.com/${post.host.id}`).then(res => { const res = await fetch(`https://vidble.com/${host.id}`);
if(res.status !== 200) {
throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`);
}
return res.text(); if (res.status !== 200) {
}).then(res => { throw new Error(`Could not fetch info for vidble album '${host.id}': '${res.error}'`);
const resource = $('img', res).attr('src'); }
return { const html = await res.text();
album: null, const resource = $('img', html).attr('src');
items: [{
id: post.host.id, return {
url: `https://vidble.com/${resource}`, album: null,
title: post.title, items: [{
datetime: post.datetime, id: host.id,
type: mime.lookup(resource), url: `https://vidble.com/${resource}`,
original: post title: post ? post.title : null,
}] datetime: post ? post.datetime : null,
}; type: mime.lookup(resource),
}); original: post || null,
}; }],
};
}
module.exports = vidbleImage; module.exports = vidbleImage;

View File

@ -1,39 +1,37 @@
'use strict'; 'use strict';
const util = require('util');
const config = require('config');
const fetch = require('node-fetch'); const fetch = require('node-fetch');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
function vidbleVideo(post) { async function vidbleVideo(host, post) {
return fetch(`https://www.vidble.com/watch?v=${post.host.id}`).then(res => { const res = await fetch(`https://www.vidble.com/watch?v=${host.id}`);
if(res.status !== 200) {
throw new Error(`Could not fetch info for vidble video '${post.host.id}': '${res.error}'`);
}
return res.text(); if (res.status !== 200) {
}).then(res => { throw new Error(`Could not fetch info for vidble video '${host.id}': '${res.error}'`);
const $ = cheerio.load(res); }
const resource = $('video source');
const source = resource.attr('src');
const type = resource.attr('type');
if(!source || !type) { const html = await res.text();
throw new Error(`Failed to retrieve (likely removed) vidble video '${post.host.id}'`);
}
return { const $ = cheerio.load(html);
album: null, const resource = $('video source');
items: [{ const source = resource.attr('src');
id: post.host.id, const type = resource.attr('type');
url: `https://vidble.com/${source}`,
title: post.title, if (!source || !type) {
datetime: post.datetime, throw new Error(`Failed to retrieve (likely removed) vidble video '${host.id}'`);
type: type, }
original: post
}] return {
}; album: null,
}); items: [{
}; id: host.id,
url: `https://vidble.com/${source}`,
title: post ? post.title : null,
datetime: post ? post.datetime : null,
type,
original: post || null,
}],
};
}
module.exports = vidbleVideo; module.exports = vidbleVideo;

View File

@ -25,10 +25,17 @@ async function saveProfileImage(user, args) {
return null; return null;
} }
const filepath = interpolate(config.library.profile.image, user, null, { const filepath = interpolate(
config.library.profile.image,
{
// pass profile image as item to interpolate extension variable // pass profile image as item to interpolate extension variable
url: image, url: image,
}); },
null,
null,
null,
user,
);
try { try {
const stream = await fetchItem(image, 0, { permalink: `https://reddit.com/user/${user.name}` }); const stream = await fetchItem(image, 0, { permalink: `https://reddit.com/user/${user.name}` });
@ -54,7 +61,7 @@ async function saveProfileDescription(user, args) {
if (config.library.profile.description && !user.fallback && !user.deleted) { if (config.library.profile.description && !user.fallback && !user.deleted) {
if (user.profile && user.profile.description) { if (user.profile && user.profile.description) {
const filepath = interpolate(config.library.profile.description, user); const filepath = interpolate(config.library.profile.description, null, null, null, null, user);
const stream = textToStream(user.profile.description); const stream = textToStream(user.profile.description);
try { try {

View File

@ -8,7 +8,7 @@ const textToStream = require('./textToStream');
const save = require('./save'); const save = require('./save');
async function writeToIndex(posts, profilePaths, user, args) { async function writeToIndex(posts, profilePaths, user, args) {
const filepath = interpolate(config.library.index.file, user, null, false); const filepath = interpolate(config.library.index.file, null, null, null, null, user, false);
const now = new Date(); const now = new Date();
const newAndUpdatedEntries = posts.concat(user.indexed.updated, args.indexIgnored ? user.indexed.ignored : []).map((post) => { const newAndUpdatedEntries = posts.concat(user.indexed.updated, args.indexIgnored ? user.indexed.ignored : []).map((post) => {

View File

@ -7,7 +7,7 @@ const yaml = require('js-yaml');
const interpolate = require('../interpolate.js'); const interpolate = require('../interpolate.js');
async function getIndex(user) { async function getIndex(user) {
const indexFilePath = interpolate(config.library.index.file, user, null, null, false); const indexFilePath = interpolate(config.library.index.file, null, null, null, null, user, false);
try { try {
const indexFile = await fs.readFile(indexFilePath, 'utf8'); const indexFile = await fs.readFile(indexFilePath, 'utf8');