From eee3ba0abc813f65e2454aad0d9dc8eaa61680ba Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Fri, 1 Nov 2019 04:22:36 +0100 Subject: [PATCH] Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. --- config/default.js | 46 ++++++---- package-lock.json | 7 +- package.json | 1 + src/app.js | 49 +++++++++-- src/cli.js | 5 ++ src/fetch/content.js | 54 ++++++++++-- src/fetch/info.js | 14 ++- src/fetch/item.js | 28 +++--- src/interpolate.js | 165 +++++++++++++++++++++++------------ src/methods/erome.js | 36 ++++---- src/methods/eroshareAlbum.js | 64 +++++++------- src/methods/eroshareItem.js | 48 +++++----- src/methods/gfycat.js | 4 +- src/methods/imgurAlbum.js | 81 +++++++++-------- src/methods/imgurImage.js | 44 +++++----- src/methods/methods.js | 42 ++++----- src/methods/pornhub.js | 12 +-- src/methods/redditImage.js | 18 ++-- src/methods/redditPreview.js | 37 ++++---- src/methods/redditVideo.js | 57 ++++++------ src/methods/vidbleAlbum.js | 77 ++++++++-------- src/methods/vidbleImage.js | 46 +++++----- src/methods/vidbleVideo.js | 58 ++++++------ src/save/profileDetails.js | 15 +++- src/save/writeToIndex.js | 2 +- src/sources/getIndex.js | 2 +- 26 files changed, 572 insertions(+), 440 deletions(-) diff --git a/config/default.js b/config/default.js index 32fd4f1..a06bdcf 100644 --- a/config/default.js +++ b/config/default.js @@ -2,26 +2,41 @@ module.exports = { library: { - base: 'output/$user/', - image: '$base$postDate - $preview$itemId - $postTitle$ext', - video: '$base$postDate - $preview$itemId - $postTitle$ext', - text: '$base$postDate - $preview$postId - $postTitle', - album: { - image: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext', - video: '$base$postDate - $preview$albumId - $postTitle/$itemIndex - $itemId$ext', - extractSingleItem: true, + base: { + posts: 'output/{user.name}/', + direct: 'output/{host.name}/', }, + posts: { + image: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}', + video: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}', + text: '{base.posts}{post.date} - {tags.preview}{post.id} - {post.title}', + album: { + image: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}', + video: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}', + }, + }, + direct: { + image: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}', + video: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}', + text: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}', + album: { + image: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}', + video: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}', + }, + }, + extractSingleAlbumItem: true, profile: { - image: '$base$userCreated - profile$ext', - description: '$base$userCreated - profile ($userVerified$userVerifiedEmail$userGold$profileOver18)', + image: '{base.posts}{user.created} - profile{ext}', + description: '{base.posts}{user.created} - profile ({tags.verified}{tags.verifiedEmail}{tags.gold}{tags.over18})', avoidAvatar: true, }, index: { - file: '$base/index', - format: 'tsv', - keys: ['postId', 'subreddit', 'postDate', 'url', 'hostId', 'postTitle'], + file: '{base.posts}index', }, - booleans: { + meta: { + comment: '{item.description}', + }, + tags: { extracted: 'extracted-', preview: 'preview-', verified: '✔', @@ -29,9 +44,6 @@ module.exports = { gold: '★', over18: '♥', }, - meta: { - comment: '$itemDescription', - }, dateFormat: 'YYYYMMDD', truncate: { limit: 250, diff --git a/package-lock.json b/package-lock.json index 041be3f..a8280f0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -528,7 +528,7 @@ "integrity": "sha1-i9F7rB5D3qzYHZrazQ8eJMaQEA8=", "requires": { "exiftool.exe": "10.53", - "exiftool.pl": "10.53", + "exiftool.pl": "^10.53.0", "platform-dependent-modules": "0.0.14" } }, @@ -2204,6 +2204,11 @@ "string-width": "^2.1.1" } }, + "template-format": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/template-format/-/template-format-1.2.4.tgz", + "integrity": "sha512-+8ItNMtMTBbsEHyPR1l7Ke1WZfl91PAcoTvwAcx5U28CRLd7ylzDLazv0kuDTzNmdq/RAOnsxFVWzr4QwVIFVg==" + }, "text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", diff --git a/package.json b/package.json index c2264f5..9da83dc 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "object.omit": "^3.0.0", "object.pick": "^1.3.0", "snoowrap": "^1.15.2", + "template-format": "^1.2.4", "url-pattern": "^1.0.3", "yargs": "^11.0.0" }, diff --git a/src/app.js b/src/app.js index 1e41c00..465f5cd 100644 --- a/src/app.js +++ b/src/app.js @@ -14,10 +14,11 @@ require('array.prototype.flatten').shim(); const reddit = new Snoowrap(config.reddit.api); const args = require('./cli.js')(); +const dissectLink = require('./dissectLink.js'); const curatePosts = require('./curate/posts.js'); -const attachContentInfo = require('./fetch/info.js'); -const fetchSaveContent = require('./fetch/content.js'); +const { attachContentInfo, getInfo } = require('./fetch/info.js'); +const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js'); const getPosts = require('./sources/getPosts.js')(reddit, args); const getUserPosts = require('./sources/getUserPosts.js')(reddit, args); @@ -34,7 +35,7 @@ async function getFileContents(location, label) { } } -async function getCompleteUserPosts() { +async function getCompletePosts() { let userPosts = {}; let ignoreIds = []; let usernames = args.users || []; @@ -69,17 +70,51 @@ async function getCompleteUserPosts() { return attachContentInfo(curatedUserPosts); } +async function getDirectContent(link, ep) { + const host = dissectLink(link); + const info = await getInfo(host); + + console.log(info); + + return fetchSaveDirectContent(info, host, ep); + + /* + console.log(info); + + await Promise.all(info.items.map(async (item, index) => { + const stream = await fetchItem(item.url); + + if (info.album) { + const filepath = `./output/${host.label}/${host.id} - ${info.album.title}/${index + 1} - ${item.id}${path.extname(url.parse(item.url).pathname)}`; + console.log(filepath); + + return saveItem(filepath, stream, item); + } + + const filepath = `./output/${host.label}/${item.id}${path.extname(url.parse(item.url).pathname)}`; + console.log(filepath); + + return saveItem(filepath, stream, item); + })); + */ +} + function fetchSavePosts(userPosts, ep) { // don't map to apply concurrency limit and reduce network stress - return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveContent(user, ep, args), null); + return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveUserContent(user, ep, args), null); } async function initApp() { try { - const userPosts = await getCompleteUserPosts(); - const ep = new exiftool.ExiftoolProcess(exiftoolBin); + if (args.fetch) { + await getDirectContent(args.fetch, ep); + return; + } + + const userPosts = await getCompletePosts(); + await ep.open(); await fetchSavePosts(userPosts, ep); await ep.close(); @@ -94,8 +129,6 @@ async function initApp() { console.log('\x1b[31m%s\x1b[0m', error.message); } } - - return true; } initApp(); diff --git a/src/cli.js b/src/cli.js index 3ad8243..46a449e 100644 --- a/src/cli.js +++ b/src/cli.js @@ -24,6 +24,11 @@ function getArgs() { describe: 'Load reddit post IDs from file', type: 'string', }) + .option('direct', { + describe: 'Get content directly from imgur and other hosts', + type: 'string', + alias: 'fetch', + }) .option('limit', { describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts', type: 'number', diff --git a/src/fetch/content.js b/src/fetch/content.js index 054299e..3aeebd4 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -14,7 +14,7 @@ const writeToIndex = require('../save/writeToIndex.js'); async function getStreams(item, post) { if (item.self) { - return [textToStream(item.text)]; + return [textToStream(post.text)]; } const sources = item.mux ? [item.url].concat(item.mux) : [item.url]; @@ -33,7 +33,7 @@ async function addMeta(filepath, item, post, user, ep) { } const meta = Object.entries(config.library.meta).reduce((acc, [key, value]) => { - const interpolatedValue = interpolate(value, user, post, item); + const interpolatedValue = interpolate(value, item, null, null, post, user); return interpolatedValue ? { ...acc, [key]: interpolatedValue } : acc; }, {}); @@ -45,15 +45,25 @@ async function addMeta(filepath, item, post, user, ep) { return false; } -function getFilepath(item, post, user) { +function getFilepath(item, content, host, post, user) { const type = item.type.split('/')[0]; - return post.content.album - ? interpolate(config.library.album[type], user, post, item) - : interpolate(config.library[type], user, post, item); + if (post && content.album) { + return interpolate(config.library.posts.album[type], item, content, host, post, user); + } + + if (post) { + return interpolate(config.library.posts[type], item, content, host, post, user); + } + + if (content.album) { + return interpolate(config.library.direct.album[type], item, content, host); + } + + return interpolate(config.library.direct[type], item, content, host); } -async function fetchSaveContent(user, ep, args) { +async function fetchSaveUserContent(user, ep, args) { const profilePaths = await saveProfileDetails(user, args); const posts = await Promise.map(user.posts, async (post) => { @@ -66,7 +76,7 @@ async function fetchSaveContent(user, ep, args) { return accItems; } - const filepath = getFilepath(item, post, user); + const filepath = getFilepath(item, post.content, post.host, post, user); const sourcePaths = await save(filepath, streams, item, post); if (item.mux) { @@ -86,4 +96,30 @@ async function fetchSaveContent(user, ep, args) { return writeToIndex(posts, profilePaths, user, args); } -module.exports = fetchSaveContent; +async function fetchSaveDirectContent(content, host, ep) { + const data = await Promise.reduce(content.items, async (accItems, originalItem, index) => { + const item = { ...originalItem, index }; + const streams = await getStreams(item, null); + + // no streams, ignore item + if (!streams || streams.length === 0) { + return accItems; + } + + const filepath = getFilepath(item, content, host, null, null); + const sourcePaths = await save(filepath, streams, item, null); + + if (item.mux) { + await mux(filepath, sourcePaths, item); + } + + await addMeta(filepath, item, null, null, ep); + + return sourcePaths; + }, []); +} + +module.exports = { + fetchSaveUserContent, + fetchSaveDirectContent, +}; diff --git a/src/fetch/info.js b/src/fetch/info.js index c28bbfd..0cd88a8 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -2,6 +2,7 @@ const config = require('config'); const Promise = require('bluebird'); + const methods = require('../methods/methods.js'); const attachContentInfo = users => Promise.reduce(Object.values(users), async (accUsers, user) => ({ @@ -16,14 +17,14 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a } try { - return [...accPosts, { ...post, content: await methods[post.host.method](post) }]; + return [...accPosts, { ...post, content: await methods[post.host.method](post.host, post) }]; } catch (error) { console.log('\x1b[31m%s\x1b[0m', `${error} (${post.permalink})`); if (config.fetch.archives.preview && post.preview) { console.log(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); - return [...accPosts, { ...post, previewFallback: true, content: await methods.redditPreview(post) }]; + return [...accPosts, { ...post, previewFallback: true, content: await methods.redditPreview(post.host, post) }]; } return accPosts; @@ -32,4 +33,11 @@ const attachContentInfo = users => Promise.reduce(Object.values(users), async (a }, }), {}); -module.exports = attachContentInfo; +async function getInfo(host) { + return methods[host.method](host); +} + +module.exports = { + attachContentInfo, + getInfo, +}; diff --git a/src/fetch/item.js b/src/fetch/item.js index ccea093..3eb9234 100644 --- a/src/fetch/item.js +++ b/src/fetch/item.js @@ -3,30 +3,32 @@ const config = require('config'); const fetch = require('node-fetch'); -function fetchItem(url, attempt, post) { - function retry(error) { - console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post.permalink})`); +async function fetchItem(url, attempt, post) { + async function retry(error) { + console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`); - if(attempt < config.fetch.retries) { + if (attempt < config.fetch.retries) { console.log('Retrying...'); - return fetchItem(url, ++attempt, post); + return fetchItem(url, attempt + 1, post); } return null; - }; + } - return fetch(url).then(res => { - if(!res.ok) { + try { + const res = await fetch(url); + + if (!res.ok) { throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`); } - return res; - }).then(res => { - console.log(`Fetched '${url}' (${post.permalink})`); + console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`); return res.body; - }).catch(retry); -}; + } catch (error) { + return retry(error); + } +} module.exports = fetchItem; diff --git a/src/interpolate.js b/src/interpolate.js index 27e1873..aa71e7b 100644 --- a/src/interpolate.js +++ b/src/interpolate.js @@ -5,79 +5,130 @@ const path = require('path'); const url = require('url'); const dateFns = require('date-fns'); const mime = require('mime-types'); +const format = require('template-format'); -function interpolate(pattern, user, post, item, strip = true, dateFormat = config.library.dateFormat) { - const vars = { - $base: config.library.base, +function interpolate(pattern, item = null, content = null, host = null, post = null, user = null, strip = true, dateFormat = config.library.dateFormat) { + const data = { + tags: {}, }; - if (user) { - Object.assign(vars, { - $user: user.name, - $username: user.name, - $userId: user.id, - $userCreated: dateFns.format(user.created, dateFormat), - $userVerified: user.verified ? config.library.booleans.verified : '', - $userVerifiedEmail: user.verifiedEmail ? config.library.booleans.verifiedEmail : '', - $userGold: user.gold ? config.library.booleans.gold : '', + if (item) { + Object.assign(data, { + item: { + id: item.id, + title: item.title && item.title.slice(0, config.library.titleLength), + description: item.description, + date: dateFns.format(item.datetime, dateFormat), + index: item.index + config.library.indexOffset, + }, + ext: item.type + ? `.${mime.extension(item.type)}` + : path.extname(url.parse(item.url).pathname), + tags: { + ...data.tags, + extracted: item.extracted && config.library.tags.extracted, + }, }); + } - if (user.profile) { - Object.assign(vars, { - $profileId: user.profile.id, - $profileTitle: user.profile.title, - $profileDescription: user.profile.description, - $profileOver18: user.profile.over18 ? config.library.booleans.over18 : '', - }); - } + if (host) { + Object.assign(data, { + host: { + id: host.id, + label: host.label, + name: host.label, + }, + }); + } + + if (content && content.album) { + Object.assign(data, { + album: { + id: content.album.id, + title: content.album.title && content.album.title.slice(0, config.library.titleLength), + description: content.album.description, + date: dateFns.format(content.album.datetime, dateFormat), + }, + }); } if (post) { - Object.assign(vars, { - $postId: post.id, - $postTitle: (post.title || '').slice(0, config.library.titleLength), - $postUser: post.user, - $postDate: dateFns.format(post.datetime, dateFormat), - $postIndex: post.index + config.library.indexOffset, - $postHash: post.hash, - $url: post.url, - $score: post.score, - $subreddit: post.subreddit, - $hostLabel: post.host.label, - $hostId: post.host.id, + Object.assign(data, { + post: { + id: post.id, + title: post.title && post.title.slice(0, config.library.titleLength), + url: post.url, + user: post.user, + date: dateFns.format(post.datetime, dateFormat), + index: post.index + config.library.indexOffset, + hash: post.hash, + score: post.score, + subreddit: post.subreddit, + }, + }); + } + + if (user) { + Object.assign(data, { + user: { + name: user.name, + username: user.name, + id: user.id, + created: dateFns.format(user.created, dateFormat), + }, + tags: { + ...data.tags, + verified: user.verified && config.library.tags.verified, + verifiedEmail: user.verifiedEmail && config.library.tags.verifiedEmail, + gold: user.gold && config.library.tags.gold, + }, }); - if (post.content.album) { - Object.assign(vars, { - $albumId: post.content.album.id, - $albumTitle: (post.content.album.title || '').slice(0, config.library.titleLength), - $albumDescription: post.content.album.description, - $albumDate: dateFns.format(post.content.album.datetime, dateFormat), + if (user.profile) { + Object.assign(data, { + profile: { + id: user.profile.id, + title: user.profile.title, + description: user.profile.description, + }, + tags: { + ...data.tags, + over18: user.profile.over18 && config.library.tags.over18, + }, }); } } - if (item) { - Object.assign(vars, { - $itemId: item.id, - $itemTitle: (item.title || '').slice(0, config.library.titleLength), - $itemDescription: item.description, - $itemDate: dateFns.format(item.datetime, dateFormat), - $itemIndex: item.index + config.library.indexOffset, - $extracted: item.extracted ? config.library.booleans.extracted : '', - $preview: item.preview ? config.library.booleans.preview : '', - $ext: item.type ? `.${mime.extension(item.type)}` : path.extname(url.parse(item.url).pathname), - }); - } + const strippedData = strip + ? Object.entries(data).reduce((acc, [key, value]) => { + if (typeof value === 'string') { + return { + ...acc, + [key]: value && value.toString().replace(/\//g, config.library.slashSubstitute), + }; + } - return Object.entries(vars).reduce((acc, [key, value]) => { - // substitute slashes for filesystem compatability - if (key !== '$base' && strip) { - value = (value || '').toString().replace(/\//g, config.library.slashSubstitute); - } + return { + ...acc, + [key]: Object.entries(value).reduce((subacc, [subkey, subvalue]) => ({ + ...subacc, + [subkey]: subvalue && subvalue.toString().replace(/\//g, config.library.slashSubstitute), + }), {}), + }; + }, {}) + : data; - return acc.replace(key, value); - }, pattern); + const base = { + posts: format(config.library.base.posts, strippedData), + direct: format(config.library.base.direct, strippedData), + }; + + const interpolated = format(pattern, { + base, + ...strippedData, + }); + + return interpolated; } module.exports = interpolate; diff --git a/src/methods/erome.js b/src/methods/erome.js index c2a93b5..3c1c7e6 100644 --- a/src/methods/erome.js +++ b/src/methods/erome.js @@ -1,32 +1,30 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); const fetch = require('node-fetch'); const cheerio = require('cheerio'); const base = 'https://www.erome.com/'; -function erome(post) { - return fetch(`${base}a/${post.host.id}`).then(res => { - if(res.ok) { - return res.text(); - } +async function erome(host) { + const res = await fetch(`${base}a/${host.id}`); - throw new Error(`Unable to retrieve info for Erome album '${post.host.id}' :(`); - }).then(res => { - const $ = cheerio.load(res); - const videoUrls = $('video').toArray().map(videoEl => { - const sourceHd = $(videoEl).find('source[label="HD"]'); - const sourceSd = $(videoEl).find('source[label="SD"]'); + if (res.ok) { + throw new Error(`Unable to retrieve info for Erome album '${host.id}' :(`); + } - console.log(sourceHd.attr('src')); + const html = await res.text(); - return sourceHd ? base + sourceHd.attr('src') : base + sourceSd.attr('src'); - }); + const $ = cheerio.load(html); + const videoUrls = $('video').toArray().map((videoEl) => { + const sourceHd = $(videoEl).find('source[label="HD"]'); + const sourceSd = $(videoEl).find('source[label="SD"]'); - console.log(videoUrls); + console.log(sourceHd.attr('src')); + + return sourceHd ? base + sourceHd.attr('src') : base + sourceSd.attr('src'); }); -}; + + console.log(videoUrls); +} module.exports = erome; diff --git a/src/methods/eroshareAlbum.js b/src/methods/eroshareAlbum.js index 141b620..a12b833 100644 --- a/src/methods/eroshareAlbum.js +++ b/src/methods/eroshareAlbum.js @@ -1,42 +1,38 @@ - 'use strict'; +'use strict'; -const util = require('util'); const config = require('config'); const fetch = require('node-fetch'); -function eroshareAlbum(post) { - return fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/${post.host.id}`).then(res => { - if(res.ok) { - return res.text(); - } +async function eroshareAlbum(host) { + const res = await fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/${host.id}`); - return Promise.reject(`Unable to recover Eroshare album or item '${post.host.id}' :(`); - }).then(res => { - const data = JSON.parse(res.match(/var album = .*/)[0].slice(12, -1)); - const extract = config.library.album.extractSingleItem && data.items.length === 1; + if (!res.ok) { + throw new Error(`Unable to recover Eroshare album or item '${host.id}' :(`); + } - return { - album: extract ? null : { - id: data.slug, - title: data.title, - datetime: new Date(data.created_at) - }, - items: data.items.map(item => { - return { - extracted: extract, - id: item.slug, - url: item.type === 'Image' ? item.url_full_protocol : item.url_mp4, - title: data.title, - description: item.description, - type: item.type === 'Image' ? 'image/jpeg' : 'video/mp4', - datetime: new Date(data.created_at), - width: data.width, - height: data.height, - original: item - }; - }) - }; - }); -}; + const html = await res.text(); + const data = JSON.parse(html.match(/var album = .*/)[0].slice(12, -1)); + const extract = config.library.extractSingleAlbumItem && data.items.length === 1; + + return { + album: extract ? null : { + id: data.slug, + title: data.title, + datetime: new Date(data.created_at), + }, + items: data.items.map(item => ({ + extracted: extract, + id: item.slug, + url: item.type === 'Image' ? item.url_full_protocol : item.url_mp4, + title: data.title, + description: item.description, + type: item.type === 'Image' ? 'image/jpeg' : 'video/mp4', + datetime: new Date(data.created_at), + width: data.width, + height: data.height, + original: item, + })), + }; +} module.exports = eroshareAlbum; diff --git a/src/methods/eroshareItem.js b/src/methods/eroshareItem.js index aadccdb..b9078ab 100644 --- a/src/methods/eroshareItem.js +++ b/src/methods/eroshareItem.js @@ -1,34 +1,30 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); const fetch = require('node-fetch'); const cheerio = require('cheerio'); -const mime = require('mime-types'); -function eroshareItem(post) { - return fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/i/${post.host.id}`).then(res => { - if(res.ok) { - return res.text(); - } +async function eroshareItem(host, post) { + const res = await fetch(`https://web.archive.org/web/20170630040157im_/https://eroshare.com/i/${host.id}`); + if (!res.ok) { + throw new Error(`Unable to recover Eroshare item '${host.id}' :(`); + } - return Promise.reject(`Unable to recover Eroshare item '${post.host.id}' :(`); - }).then(res => { - const $ = cheerio.load(res); - const videoElement = $('source[data-default="true"]'); + const html = await res.text(); - return { - album: null, - items: [{ - id: post.host.id, - url: videoElement.attr('src'), - title: post.title, - type: videoElement.attr('type'), - datetime: post.datetime, - original: post - }] - }; - }); -}; + const $ = cheerio.load(html); + const videoElement = $('source[data-default="true"]'); + + return { + album: null, + items: [{ + id: host.id, + url: videoElement.attr('src'), + title: post ? post.title : null, + type: videoElement.attr('type'), + datetime: post ? post.datetime : null, + original: post || null, + }], + }; +} module.exports = eroshareItem; diff --git a/src/methods/gfycat.js b/src/methods/gfycat.js index 5972ae6..34df6a9 100644 --- a/src/methods/gfycat.js +++ b/src/methods/gfycat.js @@ -2,8 +2,8 @@ const fetch = require('node-fetch'); -async function gfycat(post) { - const res = await fetch(`https://api.gfycat.com/v1/gfycats/${post.host.id}`); +async function gfycat(host) { + const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`); const data = await res.json(); if (data.error) { diff --git a/src/methods/imgurAlbum.js b/src/methods/imgurAlbum.js index 1fda363..75e7fe9 100644 --- a/src/methods/imgurAlbum.js +++ b/src/methods/imgurAlbum.js @@ -2,52 +2,51 @@ const config = require('config'); const fetch = require('node-fetch'); -const mime = require('mime-types'); +// const mime = require('mime-types'); -async function imgurAlbumApi(post) { - return fetch(`https://api.imgur.com/3/album/${post.host.id}`, { +async function imgurAlbumApi(host, post) { + const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, { headers: { Authorization: `Client-ID ${config.methods.imgur.clientId}`, }, - }).then(res => res.json()).then((res) => { - if (res.status !== 200) { - throw new Error(`Could not fetch info for imgur album '${post.host.id}': ${res.data.error}`); - } - - const extract = config.library.album.extractSingleItem && res.data.images.length === 1; - - if (extract) { - console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.data.link}`); - } - - return { - album: extract ? null : { - id: res.data.id, - url: res.data.link, - title: res.data.title, - description: res.data.description, - datetime: new Date(res.data.datetime * 1000), - original: res.data, - }, - items: res.data.images.map(item => ({ - extracted: extract, - id: item.id, - url: item.animated ? item.mp4 : item.link, - title: item.title || (extract ? res.data.title : null), - description: item.description || (extract ? res.data.description : null), - type: item.animated ? 'video/mp4' : item.type, - datetime: item.datetime * 1000, - original: item, - })), - }; }); + + if (res.status !== 200) { + throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`); + } + + const { data } = await res.json(); + const extract = config.library.extractSingleAlbumItem && data.images.length === 1; + + if (extract) { + console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`); + } + + return { + album: extract ? null : { + id: data.id, + url: data.link, + title: data.title, + description: data.description, + datetime: new Date(data.datetime * 1000), + original: data, + }, + items: data.images.map(item => ({ + extracted: extract, + id: item.id, + url: item.animated ? item.mp4 : item.link, + title: item.title || data.title || null, + description: item.description || data.description || null, + type: item.animated ? 'video/mp4' : item.type, + datetime: item.datetime * 1000, + original: item, + })), + }; } -async function imgurAlbum(post) { - return imgurAlbumApi(post); - - /* - * as of late 2019, imgur requires log in to view albums and gallery images +/* + * as of late 2019, imgur requires log in to view albums and gallery images +async function imgurAlbum(host, post) { const res = await fetch(`https://imgur.com/a/${post.host.id}`); const html = await res.text(); @@ -84,7 +83,7 @@ async function imgurAlbum(post) { datetime: new Date(item.datetime), })), }; - */ } +*/ -module.exports = imgurAlbum; +module.exports = imgurAlbumApi; diff --git a/src/methods/imgurImage.js b/src/methods/imgurImage.js index 24ac738..7d8d598 100644 --- a/src/methods/imgurImage.js +++ b/src/methods/imgurImage.js @@ -3,33 +3,35 @@ const config = require('config'); const fetch = require('node-fetch'); -async function imgurImageApi(post) { - return fetch(`https://api.imgur.com/3/image/${post.host.id}`, { +async function imgurImageApi(host) { + const res = await fetch(`https://api.imgur.com/3/image/${host.id}`, { headers: { Authorization: `Client-ID ${config.methods.imgur.clientId}`, }, - }).then(res => res.json()).then((res) => { - if (res.status !== 200) { - throw new Error(`Could not fetch info for imgur image '${post.host.id}': ${res.data.error}`); - } - - return { - album: null, - items: [{ - id: res.data.id, - url: res.data.animated ? res.data.mp4 : res.data.link, - title: res.data.title, - description: res.data.description, - type: res.data.animated ? 'video/mp4' : res.data.type, - datetime: new Date(res.data.datetime * 1000), - original: res.data, - }], - }; }); + + const { data } = await res.json(); + + if (res.status !== 200) { + throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`); + } + + return { + album: null, + items: [{ + id: data.id, + url: data.animated ? data.mp4 : data.link, + title: data.title, + description: data.description, + type: data.animated ? 'video/mp4' : data.type, + datetime: new Date(data.datetime * 1000), + original: data, + }], + }; } -async function imgurImage(post) { - return imgurImageApi(post); +async function imgurImage(host, post) { + return imgurImageApi(host, post); /* * as of late 2019, imgur requires log in to view albums and gallery images diff --git a/src/methods/methods.js b/src/methods/methods.js index 82d80aa..28bc1b3 100644 --- a/src/methods/methods.js +++ b/src/methods/methods.js @@ -1,31 +1,33 @@ 'use strict'; -const self = require('./self'); -const redditImage = require('./redditImage'); -const redditVideo = require('./redditVideo'); -const redditPreview = require('./redditPreview'); -const imgurImage = require('./imgurImage'); -const imgurAlbum = require('./imgurAlbum'); -const vidbleImage = require('./vidbleImage'); -const vidbleVideo = require('./vidbleVideo'); -const vidbleAlbum = require('./vidbleAlbum'); -const gfycat = require('./gfycat'); +const erome = require('./erome'); const eroshareAlbum = require('./eroshareAlbum'); const eroshareItem = require('./eroshareItem'); +const gfycat = require('./gfycat'); +const imgurAlbum = require('./imgurAlbum'); +const imgurImage = require('./imgurImage'); const pornhub = require('./pornhub'); +const redditImage = require('./redditImage'); +const redditPreview = require('./redditPreview'); +const redditVideo = require('./redditVideo'); +const self = require('./self'); +const vidbleAlbum = require('./vidbleAlbum'); +const vidbleImage = require('./vidbleImage'); +const vidbleVideo = require('./vidbleVideo'); module.exports = { - self, - redditImage, - redditVideo, - redditPreview, - imgurImage, - imgurAlbum, - vidbleImage, - vidbleVideo, - vidbleAlbum, - gfycat, + erome, eroshareAlbum, eroshareItem, + gfycat, + imgurAlbum, + imgurImage, pornhub, + redditImage, + redditPreview, + redditVideo, + self, + vidbleAlbum, + vidbleImage, + vidbleVideo, }; diff --git a/src/methods/pornhub.js b/src/methods/pornhub.js index 6240c30..630f03c 100644 --- a/src/methods/pornhub.js +++ b/src/methods/pornhub.js @@ -2,11 +2,11 @@ const fetch = require('node-fetch'); -async function pornhub(post) { - const res = await fetch(`https://www.pornhub.com/view_video.php?viewkey=${post.host.id}`); +async function pornhub(host, post) { + const res = await fetch(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`); if (res.status !== 200) { - throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`); + throw new Error(`Could not fetch info PornHub video '${host.id}': '${res.error}'`); } const html = await res.text(); @@ -27,11 +27,11 @@ async function pornhub(post) { return { album: null, items: [{ - id: post.host.id, + id: host.id, url, - title: post.title, + title: post ? post.title : null, type: 'video/mp4', - datetime: post.datetime, + datetime: post ? post.datetime : null, }], }; } diff --git a/src/methods/redditImage.js b/src/methods/redditImage.js index fcfdb20..8db1c5f 100644 --- a/src/methods/redditImage.js +++ b/src/methods/redditImage.js @@ -1,21 +1,19 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); const mime = require('mime-types'); -function redditImage(post) { - return Promise.resolve({ +async function redditImage(host, post) { + return { album: null, items: [{ - id: post.host.id || post.id, + id: host.id || post.id, url: post.url, title: post.title, datetime: post.datetime, type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg', - original: post - }] - }); -}; + original: post, + }], + }; +} module.exports = redditImage; diff --git a/src/methods/redditPreview.js b/src/methods/redditPreview.js index 657e730..4289634 100644 --- a/src/methods/redditPreview.js +++ b/src/methods/redditPreview.js @@ -1,33 +1,26 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); -const path = require('path'); -const fetch = require('node-fetch'); const mime = require('mime-types'); -const urlPattern = require('url-pattern'); -function redditPreview(post) { - return Promise.resolve({ +async function redditPreview(host, post) { + return { album: post.preview.length > 1 ? { id: post.host.id || post.id, url: post.url, title: post.title, datetime: post.datetime, - original: post + original: post, } : null, - items: post.preview.map(image => { - return { - id: post.host.id || post.id, - url: image.url, - title: post.title, - datetime: post.datetime, - type: mime.lookup(image.url.split('?')[0]), - preview: true, - original: post - }; - }) - }); -}; + items: post.preview.map(image => ({ + id: post.host.id || post.id, + url: image.url, + title: post.title, + datetime: post.datetime, + type: mime.lookup(image.url.split('?')[0]), + preview: true, + original: post, + })), + }; +} module.exports = redditPreview; diff --git a/src/methods/redditVideo.js b/src/methods/redditVideo.js index 137aabf..889bf8c 100644 --- a/src/methods/redditVideo.js +++ b/src/methods/redditVideo.js @@ -1,38 +1,35 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); const fetch = require('node-fetch'); -const fs = require('fs-extra'); -function redditVideo(post) { - return fetch(`${post.permalink}.json`).then(res => res.json()).then(res => { - return res[0].data.children[0].data.media.reddit_video.fallback_url; - }).then(videoUrl => { - const audioUrl = videoUrl.split('/').slice(0, -1).join('/') + '/audio'; +async function redditVideo(host, post) { + const res = await fetch(`${post.permalink}.json`); + const [{ data }] = await res.json(); - return fetch(audioUrl, { - method: 'HEAD' - }).then(res => { - const item = { - album: null, - items: [{ - id: post.host.id || post.id, - url: videoUrl, - title: post.title, - datetime: post.datetime, - type: 'video/mp4', - original: post - }] - }; + const videoUrl = data.children[0].data.media.reddit_video.fallback_url; + const audioUrl = `${videoUrl.split('/').slice(0, -1).join('/')}/audio`; - if(res.status === 200) { - item.items[0].mux = [audioUrl]; - } - - return item; - }); + const audioRes = await fetch(audioUrl, { + method: 'HEAD', }); -}; + + const item = { + album: null, + items: [{ + id: post.host.id || post.id, + url: videoUrl, + title: post.title, + datetime: post.datetime, + type: 'video/mp4', + original: post, + }], + }; + + if (audioRes.status === 200) { + item.items[0].mux = [audioUrl]; + } + + return item; +} module.exports = redditVideo; diff --git a/src/methods/vidbleAlbum.js b/src/methods/vidbleAlbum.js index b70ebaf..1066a1f 100644 --- a/src/methods/vidbleAlbum.js +++ b/src/methods/vidbleAlbum.js @@ -1,55 +1,52 @@ - 'use strict'; +'use strict'; -const util = require('util'); const config = require('config'); const fetch = require('node-fetch'); -const urlPattern = require('url-pattern'); +const UrlPattern = require('url-pattern'); const cheerio = require('cheerio'); const mime = require('mime-types'); -const pattern = new urlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)'); +const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)'); -function vidbleAlbum(post) { - return fetch(`https://www.vidble.com/album/${post.host.id}`).then(res => { - if(res.status !== 200) { - throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`); - } +async function vidbleAlbum(host, post) { + const res = await fetch(`https://www.vidble.com/album/${host.id}`); - return res.text(); - }).then(res => { - const $ = cheerio.load(res); + if (res.status !== 200) { + throw new Error(`Could not fetch info for vidble album '${host.id}': '${res.error}'`); + } - const title = $('h2').text(); - const imgUrls = $('img.img2').toArray().map(img => `https://vidble.com${img.attribs.src || img.attribs['data-original']}`); + const html = await res.text(); + const $ = cheerio.load(html); - const extract = config.library.album.extractSingleItem && imgUrls.length === 1; + const title = $('h2').text(); + const imgUrls = $('img.img2').toArray().map(img => `https://vidble.com${img.attribs.src || img.attribs['data-original']}`); + const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1; - if(extract) { - console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`); - } + if (extract) { + console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`); + } - return { - album: extract ? null : { - id: post.host.id, - url: post.url, - title: post.title, - datetime: post.datetime - }, - items: imgUrls.map(url => { - const components = pattern.match(url); - const id = components.id.replace('_med', ''); - const mimetype = mime.lookup(components.ext); + return { + album: extract ? null : { + id: host.id, + url: post ? post.url : null, + title: post ? post.title : title, + datetime: post ? post.datetime : null, + }, + items: imgUrls.map((url) => { + const components = pattern.match(url); + const id = components.id.replace('_med', ''); + const mimetype = mime.lookup(components.ext); - return { - extracted: extract, - id: id, - url: `https://vidble.com/${id}.${components.ext}`, - type: mimetype, - datetime: post.datetime - }; - }) - }; - }); -}; + return { + extracted: extract, + id, + url: `https://vidble.com/${id}.${components.ext}`, + type: mimetype, + datetime: post.datetime, + }; + }), + }; +} module.exports = vidbleAlbum; diff --git a/src/methods/vidbleImage.js b/src/methods/vidbleImage.js index e817cc9..b102424 100644 --- a/src/methods/vidbleImage.js +++ b/src/methods/vidbleImage.js @@ -1,34 +1,30 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); -const path = require('path'); const fetch = require('node-fetch'); const $ = require('cheerio'); const mime = require('mime-types'); -function vidbleImage(post) { - return fetch(`https://vidble.com/${post.host.id}`).then(res => { - if(res.status !== 200) { - throw new Error(`Could not fetch info for vidble album '${post.host.id}': '${res.error}'`); - } +async function vidbleImage(host, post) { + const res = await fetch(`https://vidble.com/${host.id}`); - return res.text(); - }).then(res => { - const resource = $('img', res).attr('src'); + if (res.status !== 200) { + throw new Error(`Could not fetch info for vidble album '${host.id}': '${res.error}'`); + } - return { - album: null, - items: [{ - id: post.host.id, - url: `https://vidble.com/${resource}`, - title: post.title, - datetime: post.datetime, - type: mime.lookup(resource), - original: post - }] - }; - }); -}; + const html = await res.text(); + const resource = $('img', html).attr('src'); + + return { + album: null, + items: [{ + id: host.id, + url: `https://vidble.com/${resource}`, + title: post ? post.title : null, + datetime: post ? post.datetime : null, + type: mime.lookup(resource), + original: post || null, + }], + }; +} module.exports = vidbleImage; diff --git a/src/methods/vidbleVideo.js b/src/methods/vidbleVideo.js index 33ff2ad..d8888e4 100644 --- a/src/methods/vidbleVideo.js +++ b/src/methods/vidbleVideo.js @@ -1,39 +1,37 @@ - 'use strict'; +'use strict'; -const util = require('util'); -const config = require('config'); const fetch = require('node-fetch'); const cheerio = require('cheerio'); -function vidbleVideo(post) { - return fetch(`https://www.vidble.com/watch?v=${post.host.id}`).then(res => { - if(res.status !== 200) { - throw new Error(`Could not fetch info for vidble video '${post.host.id}': '${res.error}'`); - } +async function vidbleVideo(host, post) { + const res = await fetch(`https://www.vidble.com/watch?v=${host.id}`); - return res.text(); - }).then(res => { - const $ = cheerio.load(res); - const resource = $('video source'); - const source = resource.attr('src'); - const type = resource.attr('type'); + if (res.status !== 200) { + throw new Error(`Could not fetch info for vidble video '${host.id}': '${res.error}'`); + } - if(!source || !type) { - throw new Error(`Failed to retrieve (likely removed) vidble video '${post.host.id}'`); - } + const html = await res.text(); - return { - album: null, - items: [{ - id: post.host.id, - url: `https://vidble.com/${source}`, - title: post.title, - datetime: post.datetime, - type: type, - original: post - }] - }; - }); -}; + const $ = cheerio.load(html); + const resource = $('video source'); + const source = resource.attr('src'); + const type = resource.attr('type'); + + if (!source || !type) { + throw new Error(`Failed to retrieve (likely removed) vidble video '${host.id}'`); + } + + return { + album: null, + items: [{ + id: host.id, + url: `https://vidble.com/${source}`, + title: post ? post.title : null, + datetime: post ? post.datetime : null, + type, + original: post || null, + }], + }; +} module.exports = vidbleVideo; diff --git a/src/save/profileDetails.js b/src/save/profileDetails.js index 44097ce..f8ad4e0 100644 --- a/src/save/profileDetails.js +++ b/src/save/profileDetails.js @@ -25,10 +25,17 @@ async function saveProfileImage(user, args) { return null; } - const filepath = interpolate(config.library.profile.image, user, null, { + const filepath = interpolate( + config.library.profile.image, + { // pass profile image as item to interpolate extension variable - url: image, - }); + url: image, + }, + null, + null, + null, + user, + ); try { const stream = await fetchItem(image, 0, { permalink: `https://reddit.com/user/${user.name}` }); @@ -54,7 +61,7 @@ async function saveProfileDescription(user, args) { if (config.library.profile.description && !user.fallback && !user.deleted) { if (user.profile && user.profile.description) { - const filepath = interpolate(config.library.profile.description, user); + const filepath = interpolate(config.library.profile.description, null, null, null, null, user); const stream = textToStream(user.profile.description); try { diff --git a/src/save/writeToIndex.js b/src/save/writeToIndex.js index 1aff509..b724b27 100644 --- a/src/save/writeToIndex.js +++ b/src/save/writeToIndex.js @@ -8,7 +8,7 @@ const textToStream = require('./textToStream'); const save = require('./save'); async function writeToIndex(posts, profilePaths, user, args) { - const filepath = interpolate(config.library.index.file, user, null, false); + const filepath = interpolate(config.library.index.file, null, null, null, null, user, false); const now = new Date(); const newAndUpdatedEntries = posts.concat(user.indexed.updated, args.indexIgnored ? user.indexed.ignored : []).map((post) => { diff --git a/src/sources/getIndex.js b/src/sources/getIndex.js index c9351f8..60796eb 100644 --- a/src/sources/getIndex.js +++ b/src/sources/getIndex.js @@ -7,7 +7,7 @@ const yaml = require('js-yaml'); const interpolate = require('../interpolate.js'); async function getIndex(user) { - const indexFilePath = interpolate(config.library.index.file, user, null, null, false); + const indexFilePath = interpolate(config.library.index.file, null, null, null, null, user, false); try { const indexFile = await fs.readFile(indexFilePath, 'utf8');