Added support for various tube and social media sites via youtube-dl.

This commit is contained in:
ThePendulum 2019-11-12 03:38:26 +01:00
parent 0838607996
commit e1cc615a99
13 changed files with 209 additions and 93 deletions

View File

@ -15,7 +15,9 @@ Most features are optional and can easily be disabled!
* Reddit text/self, images and videos[\*](#reddit-videos)
* Imgur (requires API key as of late 2019)
* Gfycat
* PornHub (videos)
* YouTube
* PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz
* Twitter, Facebook, Instagram
* Erome
* Vidble
* Eroshare archive

View File

@ -27,7 +27,7 @@ async function getFileContents(location, label) {
try {
const fileContents = await fs.readFile(location, 'utf8');
return fileContents.split('\n').filter(entry => entry);
return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#');
} catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);
@ -73,9 +73,13 @@ async function getCompletePosts() {
async function getDirectContent(links, ep) {
return Promise.map(links, async (link) => {
const host = dissectLink(link);
const info = await getInfo(host);
const info = await getInfo(host, reddit, link);
return fetchSaveDirectContent(info, host, ep);
if (info) {
return fetchSaveDirectContent(info, host, ep);
}
return null;
}, {
concurrency: 5,
});
@ -83,7 +87,7 @@ async function getDirectContent(links, ep) {
async function getCompleteContents(ep) {
if (args.fetch) {
return getDirectContent([args.fetch], ep);
return getDirectContent(args.fetch, ep);
}
if (args.fileDirect) {

View File

@ -26,7 +26,7 @@ function getArgs() {
})
.option('direct', {
describe: 'Get content directly from imgur and other hosts',
type: 'string',
type: 'array',
alias: 'fetch',
})
.option('file-direct', {

View File

@ -2,66 +2,108 @@
const UrlPattern = require('url-pattern');
const hosts = [{
method: 'self',
label: 'self',
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'),
}, {
method: 'redditImage',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'),
}, {
method: 'redditImage',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
}, {
method: 'redditVideo',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'),
}, {
method: 'imgurImage',
label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'),
}, {
method: 'imgurAlbum',
label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'),
}, {
method: 'vidbleImage',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'),
}, {
method: 'vidbleVideo',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'),
}, {
method: 'vidbleAlbum',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'),
}, {
method: 'gfycat',
label: 'gfycat',
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
}, {
method: 'erome',
label: 'erome',
pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'),
}, {
method: 'eroshareAlbum',
label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'),
}, {
method: 'eroshareItem',
label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'),
}, {
method: 'pornhub',
label: 'pornhub',
pattern: new UrlPattern('http(s)\\://(www.)pornhub.com/view_video.php?viewkey=(:id)(?*)'),
}];
const hosts = [
{
method: 'self',
label: 'self',
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'),
},
{
method: 'redditImage',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'),
},
{
method: 'redditImage',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
},
{
method: 'redditVideo',
label: 'reddit',
pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'),
},
{
method: 'imgurImage',
label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'),
},
{
method: 'imgurAlbum',
label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'),
},
{
method: 'vidbleImage',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'),
},
{
method: 'vidbleVideo',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'),
},
{
method: 'vidbleAlbum',
label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'),
},
{
method: 'gfycat',
label: 'gfycat',
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
},
{
method: 'erome',
label: 'erome',
pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'),
},
{
method: 'eroshareAlbum',
label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'),
},
{
method: 'eroshareItem',
label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'),
},
];
const fallbacks = new Set([
'bbc',
'biqle',
'buzzfeed',
'chaturbate',
'dailymotion',
'eporner',
'instagram',
'keezmovies',
'liveleak',
'mixcloud',
'pornhd',
'pornhub',
'redtube',
'soundcloud',
'soundgasm',
'spankbang',
'spankwire',
'streamable',
'tiktok',
'tube8',
'tweakers',
'twitch',
'twitter',
'vimeo',
'xhamster',
'xnxx',
'youjizz',
'youporn',
'youtube',
]);
module.exports = function dissectLink(url) {
return hosts.reduce((acc, host) => {
const hostMethod = hosts.reduce((acc, host) => {
if (acc) {
return acc;
}
@ -78,4 +120,20 @@ module.exports = function dissectLink(url) {
return null;
}, null);
if (hostMethod) {
return hostMethod;
}
const { hostname } = new UrlPattern('http(s)\\://(www.):hostname(*)').match(url);
if (hostname && fallbacks.has(hostname)) {
return {
url,
method: 'tube',
label: hostname,
};
}
return null;
};

View File

@ -48,7 +48,7 @@ function selfPostToText(item, post) {
return yaml.safeDump(curatedPost);
}
async function getBuffers(item, post) {
async function getBuffers(item, post, host) {
if (item.self) {
return [{
...Buffer.from(selfPostToText(item, post), 'utf8'),
@ -57,7 +57,7 @@ async function getBuffers(item, post) {
}
const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post));
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post, host));
if (buffers.filter(buffer => buffer).length > 0) {
return buffers;
@ -109,7 +109,7 @@ async function fetchSaveUserContent(user, ep, args) {
const posts = await Promise.map(user.posts, async (post) => {
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index };
const buffers = await getBuffers(item, post);
const buffers = await getBuffers(item, post, post.host);
// no buffers, ignore item
if (!buffers || buffers.length === 0) {
@ -153,8 +153,10 @@ async function fetchSaveUserContent(user, ep, args) {
async function fetchSaveDirectContent(content, host, ep) {
return Promise.reduce(content.items, async (accItems, originalItem, index) => {
console.log(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index };
const buffers = await getBuffers(item, null);
const buffers = await getBuffers(item, null, host);
// no buffers, ignore item
if (!buffers || buffers.length === 0) {

View File

@ -46,7 +46,19 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
},
}), {});
async function getInfo(host, reddit) {
async function getInfo(host, reddit, url) {
if (host === null) {
try {
const info = await methods.tube(host, null, reddit);
return info;
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`);
return null;
}
}
return methods[host.method](host, null, reddit);
}

View File

@ -4,7 +4,7 @@ const config = require('config');
const bhttp = require('bhttp');
const blake2 = require('blake2');
async function fetchItem(url, attempt, post) {
async function fetchItem(url, attempt, post, host) {
async function retry(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
@ -24,7 +24,7 @@ async function fetchItem(url, attempt, post) {
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
}
console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`);
console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`);
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(res.body);

View File

@ -11,11 +11,12 @@ async function imgurAlbumApi(host, post) {
},
});
const { data } = await res.json();
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`);
}
const { data } = await res.json();
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) {

View File

@ -13,7 +13,7 @@ async function imgurImageApi(host) {
const { data } = await res.json();
if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`);
throw new Error(`Could not fetch info for imgur image '${host.id}': ${data.error}`);
}
return {

View File

@ -6,7 +6,6 @@ const eroshareItem = require('./eroshareItem');
const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage');
const pornhub = require('./pornhub');
const redditImage = require('./redditImage');
const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo');
@ -14,6 +13,7 @@ const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage');
const vidbleVideo = require('./vidbleVideo');
const tube = require('./tube');
module.exports = {
erome,
@ -22,11 +22,11 @@ module.exports = {
gfycat,
imgurAlbum,
imgurImage,
pornhub,
redditImage,
redditPreview,
redditVideo,
self,
tube,
vidbleAlbum,
vidbleImage,
vidbleVideo,

View File

@ -3,9 +3,9 @@
const youtubedl = require('youtube-dl');
const dateFns = require('date-fns');
async function pornhub(host) {
async function tube(host) {
const data = await new Promise((resolve, reject) => {
youtubedl.getInfo(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`, null, (error, info) => {
youtubedl.getInfo(host.url, null, (error, info) => {
if (error) {
reject(error);
}
@ -21,6 +21,7 @@ async function pornhub(host) {
id: data.id,
url: data.url,
title: data.fulltitle || data.title,
description: data.description,
type: `video/${data.ext}`,
datetime: dateFns.format(data.upload_date, 'YYYYMMDD'),
original: data,
@ -29,4 +30,4 @@ async function pornhub(host) {
};
}
module.exports = pornhub;
module.exports = tube;

View File

@ -42,6 +42,11 @@ const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
name: '[deleted]',
deleted: true,
posts: [post],
indexed: {
profile: {},
original: [],
updated: [],
},
},
};
}

View File

@ -50,26 +50,57 @@ async function getArchivedPosts(username, posts, reddit) {
function getUserPostsWrap(reddit, args) {
return async function getUserPosts(usernames) {
const users = await Promise.map(usernames, async (username) => {
const [user, posts] = await Promise.all([
getUser(username, reddit),
getPosts(username, reddit, args),
]);
try {
const users = await Promise.map(usernames, async (username) => {
try {
const [user, posts] = await Promise.all([
getUser(username, reddit),
getPosts(username, reddit, args),
]);
const { profile, posts: indexed } = await getIndex(user);
const { profile, posts: indexed } = await getIndex(user);
if (args.archives) {
posts.push(...await getArchivedPosts(username, posts, reddit));
}
if (args.archives) {
posts.push(...await getArchivedPosts(username, posts, reddit));
}
if (posts.length) {
return { ...user, posts, indexed: { profile, original: indexed, updated: [] } };
}
if (posts.length) {
return {
...user,
posts,
indexed: {
profile,
original: indexed,
updated: [],
},
};
}
return null;
});
return null;
} catch (error) {
console.log(username, error);
return users.reduce((userPosts, user) => (user ? { ...userPosts, [user.name]: user } : userPosts), {});
return null;
}
}, {
concurrency: 5,
});
return users.reduce(
(userPosts, user) => (user
? {
...userPosts,
[user.name]: user,
}
: userPosts
),
{},
);
} catch (error) {
console.log(error);
throw error;
}
};
}