Added support for various tube and social media sites via youtube-dl.

This commit is contained in:
DebaucheryLibrarian 2024-09-11 05:16:58 +02:00
parent 17c7acd6a2
commit 6e3bfdd175
13 changed files with 209 additions and 93 deletions

View File

@ -15,7 +15,9 @@ Most features are optional and can easily be disabled!
* Reddit text/self, images and videos[\*](#reddit-videos) * Reddit text/self, images and videos[\*](#reddit-videos)
* Imgur (requires API key as of late 2019) * Imgur (requires API key as of late 2019)
* Gfycat * Gfycat
* PornHub (videos) * YouTube
* PornHub, YouPorn, xHamster, RedTube, xnxx, YouJizz
* Twitter, Facebook, Instagram
* Erome * Erome
* Vidble * Vidble
* Eroshare archive * Eroshare archive

View File

@ -27,7 +27,7 @@ async function getFileContents(location, label) {
try { try {
const fileContents = await fs.readFile(location, 'utf8'); const fileContents = await fs.readFile(location, 'utf8');
return fileContents.split('\n').filter(entry => entry); return fileContents.split('\n').filter(entry => entry && entry.slice(0, 1) !== '#');
} catch (error) { } catch (error) {
console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`); console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);
@ -73,9 +73,13 @@ async function getCompletePosts() {
async function getDirectContent(links, ep) { async function getDirectContent(links, ep) {
return Promise.map(links, async (link) => { return Promise.map(links, async (link) => {
const host = dissectLink(link); const host = dissectLink(link);
const info = await getInfo(host); const info = await getInfo(host, reddit, link);
if (info) {
return fetchSaveDirectContent(info, host, ep); return fetchSaveDirectContent(info, host, ep);
}
return null;
}, { }, {
concurrency: 5, concurrency: 5,
}); });
@ -83,7 +87,7 @@ async function getDirectContent(links, ep) {
async function getCompleteContents(ep) { async function getCompleteContents(ep) {
if (args.fetch) { if (args.fetch) {
return getDirectContent([args.fetch], ep); return getDirectContent(args.fetch, ep);
} }
if (args.fileDirect) { if (args.fileDirect) {

View File

@ -26,7 +26,7 @@ function getArgs() {
}) })
.option('direct', { .option('direct', {
describe: 'Get content directly from imgur and other hosts', describe: 'Get content directly from imgur and other hosts',
type: 'string', type: 'array',
alias: 'fetch', alias: 'fetch',
}) })
.option('file-direct', { .option('file-direct', {

View File

@ -2,66 +2,108 @@
const UrlPattern = require('url-pattern'); const UrlPattern = require('url-pattern');
const hosts = [{ const hosts = [
{
method: 'self', method: 'self',
label: 'self', label: 'self',
pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'), pattern: new UrlPattern('http(s)\\://(www.)reddit.com/r/:subreddit/comments/:id/:uri/'),
}, { },
{
method: 'redditImage', method: 'redditImage',
label: 'reddit', label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'), pattern: new UrlPattern('http(s)\\://i.redd.it/:id.:ext(?*)'),
}, { },
{
method: 'redditImage', method: 'redditImage',
label: 'reddit', label: 'reddit',
pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'), pattern: new UrlPattern('http(s)\\://i.reddituploads.com/:id(?*)'),
}, { },
{
method: 'redditVideo', method: 'redditVideo',
label: 'reddit', label: 'reddit',
pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'), pattern: new UrlPattern('http(s)\\://v.redd.it/:id(?*)'),
}, { },
{
method: 'imgurImage', method: 'imgurImage',
label: 'imgur', label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'), pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/(:id_d)(:id)(.:ext)(?*)'),
}, { },
{
method: 'imgurAlbum', method: 'imgurAlbum',
label: 'imgur', label: 'imgur',
pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'), pattern: new UrlPattern('http(s)\\://(:subdomain.)imgur.com/:type/:id(#:focus)(?*)'),
}, { },
{
method: 'vidbleImage', method: 'vidbleImage',
label: 'vidble', label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'), pattern: new UrlPattern('http(s)\\://(www.)vidble.com/(show/):id(.:ext)(?*)'),
}, { },
{
method: 'vidbleVideo', method: 'vidbleVideo',
label: 'vidble', label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'), pattern: new UrlPattern('http(s)\\://(www.)vidble.com/watch?v=:id(?*)'),
}, { },
{
method: 'vidbleAlbum', method: 'vidbleAlbum',
label: 'vidble', label: 'vidble',
pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'), pattern: new UrlPattern('http(s)\\://(www.)vidble.com/album/:id(?*)'),
}, { },
{
method: 'gfycat', method: 'gfycat',
label: 'gfycat', label: 'gfycat',
pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'), pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'),
}, { },
{
method: 'erome', method: 'erome',
label: 'erome', label: 'erome',
pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'), pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'),
}, { },
{
method: 'eroshareAlbum', method: 'eroshareAlbum',
label: 'eroshare', label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'), pattern: new UrlPattern('http(s)\\://eroshare.com/:id(#)(:query)'),
}, { },
{
method: 'eroshareItem', method: 'eroshareItem',
label: 'eroshare', label: 'eroshare',
pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'), pattern: new UrlPattern('http(s)\\://eroshare.com/i/:id(#)(:query)'),
}, { },
method: 'pornhub', ];
label: 'pornhub',
pattern: new UrlPattern('http(s)\\://(www.)pornhub.com/view_video.php?viewkey=(:id)(?*)'), const fallbacks = new Set([
}]; 'bbc',
'biqle',
'buzzfeed',
'chaturbate',
'dailymotion',
'eporner',
'instagram',
'keezmovies',
'liveleak',
'mixcloud',
'pornhd',
'pornhub',
'redtube',
'soundcloud',
'soundgasm',
'spankbang',
'spankwire',
'streamable',
'tiktok',
'tube8',
'tweakers',
'twitch',
'twitter',
'vimeo',
'xhamster',
'xnxx',
'youjizz',
'youporn',
'youtube',
]);
module.exports = function dissectLink(url) { module.exports = function dissectLink(url) {
return hosts.reduce((acc, host) => { const hostMethod = hosts.reduce((acc, host) => {
if (acc) { if (acc) {
return acc; return acc;
} }
@ -78,4 +120,20 @@ module.exports = function dissectLink(url) {
return null; return null;
}, null); }, null);
if (hostMethod) {
return hostMethod;
}
const { hostname } = new UrlPattern('http(s)\\://(www.):hostname(*)').match(url);
if (hostname && fallbacks.has(hostname)) {
return {
url,
method: 'tube',
label: hostname,
};
}
return null;
}; };

View File

@ -48,7 +48,7 @@ function selfPostToText(item, post) {
return yaml.safeDump(curatedPost); return yaml.safeDump(curatedPost);
} }
async function getBuffers(item, post) { async function getBuffers(item, post, host) {
if (item.self) { if (item.self) {
return [{ return [{
...Buffer.from(selfPostToText(item, post), 'utf8'), ...Buffer.from(selfPostToText(item, post), 'utf8'),
@ -57,7 +57,7 @@ async function getBuffers(item, post) {
} }
const sources = item.mux ? [item.url].concat(item.mux) : [item.url]; const sources = item.mux ? [item.url].concat(item.mux) : [item.url];
const buffers = await Promise.map(sources, source => fetchItem(source, 0, post)); const buffers = await Promise.map(sources, source => fetchItem(source, 0, post, host));
if (buffers.filter(buffer => buffer).length > 0) { if (buffers.filter(buffer => buffer).length > 0) {
return buffers; return buffers;
@ -109,7 +109,7 @@ async function fetchSaveUserContent(user, ep, args) {
const posts = await Promise.map(user.posts, async (post) => { const posts = await Promise.map(user.posts, async (post) => {
const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => { const hash = await Promise.reduce(post.content.items, async (accItems, originalItem, index) => {
const item = { ...originalItem, index }; const item = { ...originalItem, index };
const buffers = await getBuffers(item, post); const buffers = await getBuffers(item, post, post.host);
// no buffers, ignore item // no buffers, ignore item
if (!buffers || buffers.length === 0) { if (!buffers || buffers.length === 0) {
@ -153,8 +153,10 @@ async function fetchSaveUserContent(user, ep, args) {
async function fetchSaveDirectContent(content, host, ep) { async function fetchSaveDirectContent(content, host, ep) {
return Promise.reduce(content.items, async (accItems, originalItem, index) => { return Promise.reduce(content.items, async (accItems, originalItem, index) => {
console.log(`Fetching and saving '${host.url}'`);
const item = { ...originalItem, index }; const item = { ...originalItem, index };
const buffers = await getBuffers(item, null); const buffers = await getBuffers(item, null, host);
// no buffers, ignore item // no buffers, ignore item
if (!buffers || buffers.length === 0) { if (!buffers || buffers.length === 0) {

View File

@ -46,7 +46,19 @@ const attachContentInfo = (users, reddit) => Promise.reduce(Object.values(users)
}, },
}), {}); }), {});
async function getInfo(host, reddit) { async function getInfo(host, reddit, url) {
if (host === null) {
try {
const info = await methods.tube(host, null, reddit);
return info;
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring unsupported content '${url}'`);
return null;
}
}
return methods[host.method](host, null, reddit); return methods[host.method](host, null, reddit);
} }

View File

@ -4,7 +4,7 @@ const config = require('config');
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const blake2 = require('blake2'); const blake2 = require('blake2');
async function fetchItem(url, attempt, post) { async function fetchItem(url, attempt, post, host) {
async function retry(error) { async function retry(error) {
console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`); console.log('\x1b[31m%s\x1b[0m', `Failed to fetch '${url}': ${error.message} (${post ? post.permalink : 'no post'})`);
@ -24,7 +24,7 @@ async function fetchItem(url, attempt, post) {
throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`); throw new Error(`Response not OK for '${url}', HTTP code '${res.status}'`);
} }
console.log(`Fetched '${url}' (${post ? post.permalink : 'no post'})`); console.log(`Fetched '${host.url}' (${post ? post.permalink : 'no post'})`);
const hash = blake2.createHash('blake2b', { digestLength: 24 }); const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(res.body); hash.update(res.body);

View File

@ -11,11 +11,12 @@ async function imgurAlbumApi(host, post) {
}, },
}); });
const { data } = await res.json();
if (res.status !== 200) { if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`); throw new Error(`Could not fetch info for imgur album '${host.id}': ${res.data.error}`);
} }
const { data } = await res.json();
const extract = config.library.extractSingleAlbumItem && data.images.length === 1; const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) { if (extract) {

View File

@ -13,7 +13,7 @@ async function imgurImageApi(host) {
const { data } = await res.json(); const { data } = await res.json();
if (res.status !== 200) { if (res.status !== 200) {
throw new Error(`Could not fetch info for imgur image '${host.id}': ${res.data.error}`); throw new Error(`Could not fetch info for imgur image '${host.id}': ${data.error}`);
} }
return { return {

View File

@ -6,7 +6,6 @@ const eroshareItem = require('./eroshareItem');
const gfycat = require('./gfycat'); const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum'); const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage'); const imgurImage = require('./imgurImage');
const pornhub = require('./pornhub');
const redditImage = require('./redditImage'); const redditImage = require('./redditImage');
const redditPreview = require('./redditPreview'); const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo'); const redditVideo = require('./redditVideo');
@ -14,6 +13,7 @@ const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum'); const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage'); const vidbleImage = require('./vidbleImage');
const vidbleVideo = require('./vidbleVideo'); const vidbleVideo = require('./vidbleVideo');
const tube = require('./tube');
module.exports = { module.exports = {
erome, erome,
@ -22,11 +22,11 @@ module.exports = {
gfycat, gfycat,
imgurAlbum, imgurAlbum,
imgurImage, imgurImage,
pornhub,
redditImage, redditImage,
redditPreview, redditPreview,
redditVideo, redditVideo,
self, self,
tube,
vidbleAlbum, vidbleAlbum,
vidbleImage, vidbleImage,
vidbleVideo, vidbleVideo,

View File

@ -3,9 +3,9 @@
const youtubedl = require('youtube-dl'); const youtubedl = require('youtube-dl');
const dateFns = require('date-fns'); const dateFns = require('date-fns');
async function pornhub(host) { async function tube(host) {
const data = await new Promise((resolve, reject) => { const data = await new Promise((resolve, reject) => {
youtubedl.getInfo(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`, null, (error, info) => { youtubedl.getInfo(host.url, null, (error, info) => {
if (error) { if (error) {
reject(error); reject(error);
} }
@ -21,6 +21,7 @@ async function pornhub(host) {
id: data.id, id: data.id,
url: data.url, url: data.url,
title: data.fulltitle || data.title, title: data.fulltitle || data.title,
description: data.description,
type: `video/${data.ext}`, type: `video/${data.ext}`,
datetime: dateFns.format(data.upload_date, 'YYYYMMDD'), datetime: dateFns.format(data.upload_date, 'YYYYMMDD'),
original: data, original: data,
@ -29,4 +30,4 @@ async function pornhub(host) {
}; };
} }
module.exports = pornhub; module.exports = tube;

View File

@ -42,6 +42,11 @@ const getPostsWrap = reddit => function getPosts(postIds, userPosts = {}) {
name: '[deleted]', name: '[deleted]',
deleted: true, deleted: true,
posts: [post], posts: [post],
indexed: {
profile: {},
original: [],
updated: [],
},
}, },
}; };
} }

View File

@ -50,7 +50,9 @@ async function getArchivedPosts(username, posts, reddit) {
function getUserPostsWrap(reddit, args) { function getUserPostsWrap(reddit, args) {
return async function getUserPosts(usernames) { return async function getUserPosts(usernames) {
try {
const users = await Promise.map(usernames, async (username) => { const users = await Promise.map(usernames, async (username) => {
try {
const [user, posts] = await Promise.all([ const [user, posts] = await Promise.all([
getUser(username, reddit), getUser(username, reddit),
getPosts(username, reddit, args), getPosts(username, reddit, args),
@ -63,13 +65,42 @@ function getUserPostsWrap(reddit, args) {
} }
if (posts.length) { if (posts.length) {
return { ...user, posts, indexed: { profile, original: indexed, updated: [] } }; return {
...user,
posts,
indexed: {
profile,
original: indexed,
updated: [],
},
};
} }
return null; return null;
} catch (error) {
console.log(username, error);
return null;
}
}, {
concurrency: 5,
}); });
return users.reduce((userPosts, user) => (user ? { ...userPosts, [user.name]: user } : userPosts), {}); return users.reduce(
(userPosts, user) => (user
? {
...userPosts,
[user.name]: user,
}
: userPosts
),
{},
);
} catch (error) {
console.log(error);
throw error;
}
}; };
} }