Added support for various tube and social media sites via youtube-dl.
This commit is contained in:
parent
17c7acd6a2
commit
6e3bfdd175
|
@ -15,7 +15,9 @@ Most features are optional and can easily be disabled!
|
|||
* Reddit text/self, images and videos[\*](#reddit-videos)
|
||||
* Imgur (requires API key as of late 2019)
|
||||
* Gfycat
|
||||
* PornHub (videos)
|
||||
* YouTube
|
||||
* PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz
|
||||
* Twitter, Facebook, Instagram
|
||||
* Erome
|
||||
* Vidble
|
||||
* Eroshare archive
|
||||
|
|
12
src/app.js
12
src/app.js
|
@ -27,7 +27,7 @@ async function getFileContents(location, label) {
|
|||
try {
|
||||
const fileContents = await fs.readFile(location, 'utf8');
|
||||
|
||||
return fileContents.split('\n').filter(entry => entry);
|
||||
return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#');
|
||||
} catch (error) {
|
||||
console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);
|
||||
|
||||
|
@ -73,9 +73,13 @@ async function getCompletePosts() {
|
|||
async function getDirectContent(links, ep) {
|
||||
return Promise.map(links, async (link) => {
|
||||
const host = dissectLink(link);
|
||||
const info = await getInfo(host);
|
||||
const info = await getInfo(host, reddit, link);
|
||||
|
||||
return fetchSaveDirectContent(info, host, ep);
|
||||
if (info) {
|
||||
return fetchSaveDirectContent(info, host, ep);
|
||||
}
|
||||
|
||||
return null;
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
@ -83,7 +87,7 @@ async function getDirectContent(links, ep) {
|
|||
|
||||
async function getCompleteContents(ep) {
|
||||
if (args.fetch) {
|
||||
return getDirectContent([args.fetch], ep);
|
||||
return getDirectContent(args.fetch, ep);
|
||||
}
|
||||
|
||||
if (args.fileDirect) {
|
||||
|
|
|
@ -26,7 +26,7 @@ function getArgs() {
|
|||
})
|
||||
.option('direct', {
|
||||
describe: 'Get content directly from imgur and other hosts',
|
||||
type: 'string',
|
||||
type: 'array',
|
||||
alias: 'fetch',
|
||||
})
|
||||
.option('file-direct', {
|
||||
|
|
|
@ -2,66 +2,108 @@
|
|||
|
||||
const UrlPattern = require('url-pattern');
|
||||
|
||||
const hosts = [{
|
||||
method: 'self',
|
||||
label: 'self',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'),
|
||||
}, {
|
||||
method: 'redditImage',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'),
|
||||
}, {
|
||||
method: 'redditImage',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
|
||||
}, {
|
||||
method: 'redditVideo',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'),
|
||||
}, {
|
||||
method: 'imgurImage',
|
||||
label: 'imgur',
|
||||
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'),
|
||||
}, {
|
||||
method: 'imgurAlbum',
|
||||
label: 'imgur',
|
||||
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'),
|
||||
}, {
|
||||
method: 'vidbleImage',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'),
|
||||
}, {
|
||||
method: 'vidbleVideo',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'),
|
||||
}, {
|
||||
method: 'vidbleAlbum',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'),
|
||||
}, {
|
||||
method: 'gfycat',
|
||||
label: 'gfycat',
|
||||
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
|
||||
}, {
|
||||
method: 'erome',
|
||||
label: 'erome',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'),
|
||||
}, {
|
||||
method: 'eroshareAlbum',
|
||||
label: 'eroshare',
|
||||
pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'),
|
||||
}, {
|
||||
method: 'eroshareItem',
|
||||
label: 'eroshare',
|
||||
pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'),
|
||||
}, {
|
||||
method: 'pornhub',
|
||||
label: 'pornhub',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)pornhub.com/view_video.php?viewkey=(:id)(?*)'),
|
||||
}];
|
||||
const hosts = [
|
||||
{
|
||||
method: 'self',
|
||||
label: 'self',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'),
|
||||
},
|
||||
{
|
||||
method: 'redditImage',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'redditImage',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'redditVideo',
|
||||
label: 'reddit',
|
||||
pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'imgurImage',
|
||||
label: 'imgur',
|
||||
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'imgurAlbum',
|
||||
label: 'imgur',
|
||||
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'vidbleImage',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'vidbleVideo',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'vidbleAlbum',
|
||||
label: 'vidble',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'gfycat',
|
||||
label: 'gfycat',
|
||||
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'erome',
|
||||
label: 'erome',
|
||||
pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'),
|
||||
},
|
||||
{
|
||||
method: 'eroshareAlbum',
|
||||
label: 'eroshare',
|
||||
pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'),
|
||||
},
|
||||
{
|
||||
method: 'eroshareItem',
|
||||
label: 'eroshare',
|
||||
pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'),
|
||||
},
|
||||
];
|
||||
|
||||
const fallbacks = new Set([
|
||||
'bbc',
|
||||
'biqle',
|
||||
'buzzfeed',
|
||||
'chaturbate',
|
||||
'dailymotion',
|
||||
'eporner',
|
||||
'instagram',
|
||||
'keezmovies',
|
||||
'liveleak',
|
||||
'mixcloud',
|
||||
'pornhd',
|
||||
'pornhub',
|
||||
'redtube',
|
||||
'soundcloud',
|
||||
'soundgasm',
|
||||
'spankbang',
|
||||
'spankwire',
|
||||
'streamable',
|
||||
'tiktok',
|
||||
'tube8',
|
||||
'tweakers',
|
||||
'twitch',
|
||||
'twitter',
|
||||
'vimeo',
|
||||
'xhamster',
|
||||
'xnxx',
|
||||
'youjizz',
|
||||
'youporn',
|
||||
'youtube',
|
||||
]);
|
||||
|
||||
module.exports = function dissectLink(url) {
|
||||
return hosts.reduce((acc, host) => {
|
||||
const hostMethod = hosts.reduce((acc, host) => {
|
||||
if (acc) {
|
||||
return acc;
|
||||
}
|
||||
|
@ -78,4 +120,20 @@ module.exports = function dissectLink(url) {
|
|||
|
||||
return null;
|
||||
}, null);
|
||||
|
||||
if (hostMethod) {
|
||||
return hostMethod;
|
||||
}
|
||||
|
||||
const { hostname } = new UrlPattern('http(s)\\://(www.):hostname(*)').match(url);
|
||||
|
||||
if (hostname && fallbacks.has(hostname)) {
|
||||
return {
|
||||
url,
|
||||
method: 'tube',
|
||||
label: hostname,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
|
|
@ -48,7 +48,7 @@ function selfPostToText(item, post) {
|
|||
return yaml.safeDump(curatedPost);
|
||||
}
|
||||
|
||||
async function getBuffers(item, post) {
|
||||
async function getBuffers(item, post, host) {
|
||||
if (item.self) {
|
||||
return [{
|
||||
...Buffer.from(selfPostToText(item, post), 'utf8'),
|
||||
|
@ -57,7 +57,7 @@ async function getBuffers(item, post) {
|
|||
}
|
||||
|
||||
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
|
||||
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post));
|
||||
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post, host));
|
||||
|
||||
if (buffers.filter(buffer => buffer).length > 0) {
|
||||
return buffers;
|
||||
|
@ -109,7 +109,7 @@ async function fetchSaveUserContent(user, ep, args) {
|
|||
const posts = await Promise.map(user.posts, async (post) => {
|
||||
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
|
||||
const item = { ...originalItem, index };
|
||||
const buffers = await getBuffers(item, post);
|
||||
const buffers = await getBuffers(item, post, post.host);
|
||||
|
||||
// no buffers, ignore item
|
||||
if (!buffers || buffers.length === 0) {
|
||||
|
@ -153,8 +153,10 @@ async function fetchSaveUserContent(user, ep, args) {
|
|||
|
||||
async function fetchSaveDirectContent(content, host, ep) {
|
||||
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
|
||||
console.log(`Fetching and saving '${host.url}'`);
|
||||
|
||||
const item = { ...originalItem, index };
|
||||
const buffers = await getBuffers(item, null);
|
||||
const buffers = await getBuffers(item, null, host);
|
||||
|
||||
// no buffers, ignore item
|
||||
if (!buffers || buffers.length === 0) {
|
||||
|
|
|
@ -46,7 +46,19 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
|
|||
},
|
||||
}), {});
|
||||
|
||||
async function getInfo(host, reddit) {
|
||||
async function getInfo(host, reddit, url) {
|
||||
if (host === null) {
|
||||
try {
|
||||
const info = await methods.tube(host, null, reddit);
|
||||
|
||||
return info;
|
||||
} catch (error) {
|
||||
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return methods[host.method](host, null, reddit);
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ const config = require('config');
|
|||
const bhttp = require('bhttp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
async function fetchItem(url, attempt, post) {
|
||||
async function fetchItem(url, attempt, post, host) {
|
||||
async function retry(error) {
|
||||
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
|
||||
|
||||
|
@ -24,7 +24,7 @@ async function fetchItem(url, attempt, post) {
|
|||
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
|
||||
}
|
||||
|
||||
console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`);
|
||||
console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`);
|
||||
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||
hash.update(res.body);
|
||||
|
|
|
@ -11,11 +11,12 @@ async function imgurAlbumApi(host, post) {
|
|||
},
|
||||
});
|
||||
|
||||
const { data } = await res.json();
|
||||
|
||||
if (res.status !== 200) {
|
||||
throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`);
|
||||
}
|
||||
|
||||
const { data } = await res.json();
|
||||
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
|
||||
|
||||
if (extract) {
|
||||
|
|
|
@ -13,7 +13,7 @@ async function imgurImageApi(host) {
|
|||
const { data } = await res.json();
|
||||
|
||||
if (res.status !== 200) {
|
||||
throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`);
|
||||
throw new Error(`Could not fetch info for imgur image '${host.id}': ${data.error}`);
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
|
@ -6,7 +6,6 @@ const eroshareItem = require('./eroshareItem');
|
|||
const gfycat = require('./gfycat');
|
||||
const imgurAlbum = require('./imgurAlbum');
|
||||
const imgurImage = require('./imgurImage');
|
||||
const pornhub = require('./pornhub');
|
||||
const redditImage = require('./redditImage');
|
||||
const redditPreview = require('./redditPreview');
|
||||
const redditVideo = require('./redditVideo');
|
||||
|
@ -14,6 +13,7 @@ const self = require('./self');
|
|||
const vidbleAlbum = require('./vidbleAlbum');
|
||||
const vidbleImage = require('./vidbleImage');
|
||||
const vidbleVideo = require('./vidbleVideo');
|
||||
const tube = require('./tube');
|
||||
|
||||
module.exports = {
|
||||
erome,
|
||||
|
@ -22,11 +22,11 @@ module.exports = {
|
|||
gfycat,
|
||||
imgurAlbum,
|
||||
imgurImage,
|
||||
pornhub,
|
||||
redditImage,
|
||||
redditPreview,
|
||||
redditVideo,
|
||||
self,
|
||||
tube,
|
||||
vidbleAlbum,
|
||||
vidbleImage,
|
||||
vidbleVideo,
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
const youtubedl = require('youtube-dl');
|
||||
const dateFns = require('date-fns');
|
||||
|
||||
async function pornhub(host) {
|
||||
async function tube(host) {
|
||||
const data = await new Promise((resolve, reject) => {
|
||||
youtubedl.getInfo(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`, null, (error, info) => {
|
||||
youtubedl.getInfo(host.url, null, (error, info) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ async function pornhub(host) {
|
|||
id: data.id,
|
||||
url: data.url,
|
||||
title: data.fulltitle || data.title,
|
||||
description: data.description,
|
||||
type: `video/${data.ext}`,
|
||||
datetime: dateFns.format(data.upload_date, 'YYYYMMDD'),
|
||||
original: data,
|
||||
|
@ -29,4 +30,4 @@ async function pornhub(host) {
|
|||
};
|
||||
}
|
||||
|
||||
module.exports = pornhub;
|
||||
module.exports = tube;
|
|
@ -42,6 +42,11 @@ const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
|
|||
name: '[deleted]',
|
||||
deleted: true,
|
||||
posts: [post],
|
||||
indexed: {
|
||||
profile: {},
|
||||
original: [],
|
||||
updated: [],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -50,26 +50,57 @@ async function getArchivedPosts(username, posts, reddit) {
|
|||
|
||||
function getUserPostsWrap(reddit, args) {
|
||||
return async function getUserPosts(usernames) {
|
||||
const users = await Promise.map(usernames, async (username) => {
|
||||
const [user, posts] = await Promise.all([
|
||||
getUser(username, reddit),
|
||||
getPosts(username, reddit, args),
|
||||
]);
|
||||
try {
|
||||
const users = await Promise.map(usernames, async (username) => {
|
||||
try {
|
||||
const [user, posts] = await Promise.all([
|
||||
getUser(username, reddit),
|
||||
getPosts(username, reddit, args),
|
||||
]);
|
||||
|
||||
const { profile, posts: indexed } = await getIndex(user);
|
||||
const { profile, posts: indexed } = await getIndex(user);
|
||||
|
||||
if (args.archives) {
|
||||
posts.push(...await getArchivedPosts(username, posts, reddit));
|
||||
}
|
||||
if (args.archives) {
|
||||
posts.push(...await getArchivedPosts(username, posts, reddit));
|
||||
}
|
||||
|
||||
if (posts.length) {
|
||||
return { ...user, posts, indexed: { profile, original: indexed, updated: [] } };
|
||||
}
|
||||
if (posts.length) {
|
||||
return {
|
||||
...user,
|
||||
posts,
|
||||
indexed: {
|
||||
profile,
|
||||
original: indexed,
|
||||
updated: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.log(username, error);
|
||||
|
||||
return users.reduce((userPosts, user) => (user ? { ...userPosts, [user.name]: user } : userPosts), {});
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
return users.reduce(
|
||||
(userPosts, user) => (user
|
||||
? {
|
||||
...userPosts,
|
||||
[user.name]: user,
|
||||
}
|
||||
: userPosts
|
||||
),
|
||||
{},
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue