diff --git a/README.md b/README.md index ff5f7df..31016fc 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,9 @@ Most features are optional and can easily be disabled! * Reddit text/self, images and videos[\*](#reddit-videos) * Imgur (requires API key as of late 2019) * Gfycat -* PornHub (videos) +* YouTube +* PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz +* Twitter, Facebook, Instagram * Erome * Vidble * Eroshare archive diff --git a/src/app.js b/src/app.js index e1c2502..e80ed78 100644 --- a/src/app.js +++ b/src/app.js @@ -27,7 +27,7 @@ async function getFileContents(location, label) { try { const fileContents = await fs.readFile(location, 'utf8'); - return fileContents.split('\n').filter(entry => entry); + return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#'); } catch (error) { console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`); @@ -73,9 +73,13 @@ async function getCompletePosts() { async function getDirectContent(links, ep) { return Promise.map(links, async (link) => { const host = dissectLink(link); - const info = await getInfo(host); + const info = await getInfo(host, reddit, link); - return fetchSaveDirectContent(info, host, ep); + if (info) { + return fetchSaveDirectContent(info, host, ep); + } + + return null; }, { concurrency: 5, }); @@ -83,7 +87,7 @@ async function getDirectContent(links, ep) { async function getCompleteContents(ep) { if (args.fetch) { - return getDirectContent([args.fetch], ep); + return getDirectContent(args.fetch, ep); } if (args.fileDirect) { diff --git a/src/cli.js b/src/cli.js index b8b1fe7..1fa7f57 100644 --- a/src/cli.js +++ b/src/cli.js @@ -26,7 +26,7 @@ function getArgs() { }) .option('direct', { describe: 'Get content directly from imgur and other hosts', - type: 'string', + type: 'array', alias: 'fetch', }) .option('file-direct', { diff --git a/src/dissectLink.js b/src/dissectLink.js index 2f19030..ea46619 100644 --- a/src/dissectLink.js +++ b/src/dissectLink.js @@ -2,66 +2,108 @@ const UrlPattern = require('url-pattern'); -const hosts = [{ - method: 'self', - label: 'self', - pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'), -}, { - method: 'redditImage', - label: 'reddit', - pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'), -}, { - method: 'redditImage', - label: 'reddit', - pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'), -}, { - method: 'redditVideo', - label: 'reddit', - pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'), -}, { - method: 'imgurImage', - label: 'imgur', - pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'), -}, { - method: 'imgurAlbum', - label: 'imgur', - pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'), -}, { - method: 'vidbleImage', - label: 'vidble', - pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'), -}, { - method: 'vidbleVideo', - label: 'vidble', - pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'), -}, { - method: 'vidbleAlbum', - label: 'vidble', - pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'), -}, { - method: 'gfycat', - label: 'gfycat', - pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'), -}, { - method: 'erome', - label: 'erome', - pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'), -}, { - method: 'eroshareAlbum', - label: 'eroshare', - pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'), -}, { - method: 'eroshareItem', - label: 'eroshare', - pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'), -}, { - method: 'pornhub', - label: 'pornhub', - pattern: new UrlPattern('http(s)\\://(www.)pornhub.com/view_video.php?viewkey=(:id)(?*)'), -}]; +const hosts = [ + { + method: 'self', + label: 'self', + pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'), + }, + { + method: 'redditImage', + label: 'reddit', + pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'), + }, + { + method: 'redditImage', + label: 'reddit', + pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'), + }, + { + method: 'redditVideo', + label: 'reddit', + pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'), + }, + { + method: 'imgurImage', + label: 'imgur', + pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'), + }, + { + method: 'imgurAlbum', + label: 'imgur', + pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'), + }, + { + method: 'vidbleImage', + label: 'vidble', + pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'), + }, + { + method: 'vidbleVideo', + label: 'vidble', + pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'), + }, + { + method: 'vidbleAlbum', + label: 'vidble', + pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'), + }, + { + method: 'gfycat', + label: 'gfycat', + pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'), + }, + { + method: 'erome', + label: 'erome', + pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'), + }, + { + method: 'eroshareAlbum', + label: 'eroshare', + pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'), + }, + { + method: 'eroshareItem', + label: 'eroshare', + pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'), + }, +]; + +const fallbacks = new Set([ + 'bbc', + 'biqle', + 'buzzfeed', + 'chaturbate', + 'dailymotion', + 'eporner', + 'instagram', + 'keezmovies', + 'liveleak', + 'mixcloud', + 'pornhd', + 'pornhub', + 'redtube', + 'soundcloud', + 'soundgasm', + 'spankbang', + 'spankwire', + 'streamable', + 'tiktok', + 'tube8', + 'tweakers', + 'twitch', + 'twitter', + 'vimeo', + 'xhamster', + 'xnxx', + 'youjizz', + 'youporn', + 'youtube', +]); module.exports = function dissectLink(url) { - return hosts.reduce((acc, host) => { + const hostMethod = hosts.reduce((acc, host) => { if (acc) { return acc; } @@ -78,4 +120,20 @@ module.exports = function dissectLink(url) { return null; }, null); + + if (hostMethod) { + return hostMethod; + } + + const { hostname } = new UrlPattern('http(s)\\://(www.):hostname(*)').match(url); + + if (hostname && fallbacks.has(hostname)) { + return { + url, + method: 'tube', + label: hostname, + }; + } + + return null; }; diff --git a/src/fetch/content.js b/src/fetch/content.js index 97cc5ed..d81f4d5 100644 --- a/src/fetch/content.js +++ b/src/fetch/content.js @@ -48,7 +48,7 @@ function selfPostToText(item, post) { return yaml.safeDump(curatedPost); } -async function getBuffers(item, post) { +async function getBuffers(item, post, host) { if (item.self) { return [{ ...Buffer.from(selfPostToText(item, post), 'utf8'), @@ -57,7 +57,7 @@ async function getBuffers(item, post) { } const sources = item.mux ? [item.url].concat(item.mux) : [item.url]; - const buffers = await Promise.map(sources, source => fetchItem(source, 0, post)); + const buffers = await Promise.map(sources, source => fetchItem(source, 0, post, host)); if (buffers.filter(buffer => buffer).length > 0) { return buffers; @@ -109,7 +109,7 @@ async function fetchSaveUserContent(user, ep, args) { const posts = await Promise.map(user.posts, async (post) => { const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => { const item = { ...originalItem, index }; - const buffers = await getBuffers(item, post); + const buffers = await getBuffers(item, post, post.host); // no buffers, ignore item if (!buffers || buffers.length === 0) { @@ -153,8 +153,10 @@ async function fetchSaveUserContent(user, ep, args) { async function fetchSaveDirectContent(content, host, ep) { return Promise.reduce(content.items, async (accItems, originalItem, index) => { + console.log(`Fetching and saving '${host.url}'`); + const item = { ...originalItem, index }; - const buffers = await getBuffers(item, null); + const buffers = await getBuffers(item, null, host); // no buffers, ignore item if (!buffers || buffers.length === 0) { diff --git a/src/fetch/info.js b/src/fetch/info.js index ad05a1e..14b5a6b 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -46,7 +46,19 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users) }, }), {}); -async function getInfo(host, reddit) { +async function getInfo(host, reddit, url) { + if (host === null) { + try { + const info = await methods.tube(host, null, reddit); + + return info; + } catch (error) { + console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`); + + return null; + } + } + return methods[host.method](host, null, reddit); } diff --git a/src/fetch/item.js b/src/fetch/item.js index 5e42064..eec1bae 100644 --- a/src/fetch/item.js +++ b/src/fetch/item.js @@ -4,7 +4,7 @@ const config = require('config'); const bhttp = require('bhttp'); const blake2 = require('blake2'); -async function fetchItem(url, attempt, post) { +async function fetchItem(url, attempt, post, host) { async function retry(error) { console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`); @@ -24,7 +24,7 @@ async function fetchItem(url, attempt, post) { throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`); } - console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`); + console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`); const hash = blake2.createHash('blake2b', { digestLength: 24 }); hash.update(res.body); diff --git a/src/methods/imgurAlbum.js b/src/methods/imgurAlbum.js index 75e7fe9..6ad4ecf 100644 --- a/src/methods/imgurAlbum.js +++ b/src/methods/imgurAlbum.js @@ -11,11 +11,12 @@ async function imgurAlbumApi(host, post) { }, }); + const { data } = await res.json(); + if (res.status !== 200) { throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`); } - const { data } = await res.json(); const extract = config.library.extractSingleAlbumItem && data.images.length === 1; if (extract) { diff --git a/src/methods/imgurImage.js b/src/methods/imgurImage.js index 7d8d598..7956239 100644 --- a/src/methods/imgurImage.js +++ b/src/methods/imgurImage.js @@ -13,7 +13,7 @@ async function imgurImageApi(host) { const { data } = await res.json(); if (res.status !== 200) { - throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`); + throw new Error(`Could not fetch info for imgur image '${host.id}': ${data.error}`); } return { diff --git a/src/methods/methods.js b/src/methods/methods.js index 28bc1b3..ce42426 100644 --- a/src/methods/methods.js +++ b/src/methods/methods.js @@ -6,7 +6,6 @@ const eroshareItem = require('./eroshareItem'); const gfycat = require('./gfycat'); const imgurAlbum = require('./imgurAlbum'); const imgurImage = require('./imgurImage'); -const pornhub = require('./pornhub'); const redditImage = require('./redditImage'); const redditPreview = require('./redditPreview'); const redditVideo = require('./redditVideo'); @@ -14,6 +13,7 @@ const self = require('./self'); const vidbleAlbum = require('./vidbleAlbum'); const vidbleImage = require('./vidbleImage'); const vidbleVideo = require('./vidbleVideo'); +const tube = require('./tube'); module.exports = { erome, @@ -22,11 +22,11 @@ module.exports = { gfycat, imgurAlbum, imgurImage, - pornhub, redditImage, redditPreview, redditVideo, self, + tube, vidbleAlbum, vidbleImage, vidbleVideo, diff --git a/src/methods/pornhub.js b/src/methods/tube.js similarity index 78% rename from src/methods/pornhub.js rename to src/methods/tube.js index 2d5dfc1..16b3e42 100644 --- a/src/methods/pornhub.js +++ b/src/methods/tube.js @@ -3,9 +3,9 @@ const youtubedl = require('youtube-dl'); const dateFns = require('date-fns'); -async function pornhub(host) { +async function tube(host) { const data = await new Promise((resolve, reject) => { - youtubedl.getInfo(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`, null, (error, info) => { + youtubedl.getInfo(host.url, null, (error, info) => { if (error) { reject(error); } @@ -21,6 +21,7 @@ async function pornhub(host) { id: data.id, url: data.url, title: data.fulltitle || data.title, + description: data.description, type: `video/${data.ext}`, datetime: dateFns.format(data.upload_date, 'YYYYMMDD'), original: data, @@ -29,4 +30,4 @@ async function pornhub(host) { }; } -module.exports = pornhub; +module.exports = tube; diff --git a/src/sources/getPosts.js b/src/sources/getPosts.js index 10f47ca..295c198 100644 --- a/src/sources/getPosts.js +++ b/src/sources/getPosts.js @@ -42,6 +42,11 @@ const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) { name: '[deleted]', deleted: true, posts: [post], + indexed: { + profile: {}, + original: [], + updated: [], + }, }, }; } diff --git a/src/sources/getUserPosts.js b/src/sources/getUserPosts.js index f9ddc83..26ec152 100644 --- a/src/sources/getUserPosts.js +++ b/src/sources/getUserPosts.js @@ -50,26 +50,57 @@ async function getArchivedPosts(username, posts, reddit) { function getUserPostsWrap(reddit, args) { return async function getUserPosts(usernames) { - const users = await Promise.map(usernames, async (username) => { - const [user, posts] = await Promise.all([ - getUser(username, reddit), - getPosts(username, reddit, args), - ]); + try { + const users = await Promise.map(usernames, async (username) => { + try { + const [user, posts] = await Promise.all([ + getUser(username, reddit), + getPosts(username, reddit, args), + ]); - const { profile, posts: indexed } = await getIndex(user); + const { profile, posts: indexed } = await getIndex(user); - if (args.archives) { - posts.push(...await getArchivedPosts(username, posts, reddit)); - } + if (args.archives) { + posts.push(...await getArchivedPosts(username, posts, reddit)); + } - if (posts.length) { - return { ...user, posts, indexed: { profile, original: indexed, updated: [] } }; - } + if (posts.length) { + return { + ...user, + posts, + indexed: { + profile, + original: indexed, + updated: [], + }, + }; + } - return null; - }); + return null; + } catch (error) { + console.log(username, error); - return users.reduce((userPosts, user) => (user ? { ...userPosts, [user.name]: user } : userPosts), {}); + return null; + } + }, { + concurrency: 5, + }); + + return users.reduce( + (userPosts, user) => (user + ? { + ...userPosts, + [user.name]: user, + } + : userPosts + ), + {}, + ); + } catch (error) { + console.log(error); + + throw error; + } }; }