Added support for RedGifs and Reddit albums. Improved command line logger. Added rate limiters for reddit and host URLs.

This commit is contained in:
2024-09-11 05:16:58 +02:00
parent bb06fe9763
commit de50d609f3
33 changed files with 586 additions and 256 deletions

View File

@@ -4,6 +4,8 @@ const config = require('config');
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const logger = require('../logger')(__filename);
const base = 'https://www.erome.com/';
async function erome(host, post) {
@@ -50,7 +52,7 @@ async function erome(host, post) {
const extract = config.library.extractSingleAlbumItem && (items.length === 1);
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${url}' (${post ? post.url : 'no post'})`);
}
return {

View File

@@ -1,13 +1,28 @@
'use strict';
const fetch = require('node-fetch');
const bhttp = require('bhttp');
const redgifs = require('./redgifs');
async function gfycat(host) {
const res = await fetch(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.json();
const res = await bhttp.get(`https://api.gfycat.com/v1/gfycats/${host.id}`);
const data = await res.body;
if (data.error) {
throw new Error(data.error);
if (data.errorMessage) {
const redirectRes = await bhttp.head(host.url, {
followRedirects: false,
});
if (redirectRes.statusCode === 301) {
// Gfycat redirects all NSFW gifs to RedGifs, likely the case
return redgifs({
...host,
url: `https://www.redgifs.com/watch/${host.id}`,
method: 'redgifs',
label: 'redgifs',
});
}
throw new Error(`Gfycat API returned error for source '${host.url}' (${res.status}): ${data.errorMessage}`);
}
return {

View File

@@ -2,7 +2,8 @@
const config = require('config');
const fetch = require('node-fetch');
// const mime = require('mime-types');
const logger = require('../logger')(__filename);
async function imgurAlbumApi(host, post) {
const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, {
@@ -20,7 +21,7 @@ async function imgurAlbumApi(host, post) {
const extract = config.library.extractSingleAlbumItem && data.images.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
logger.verbose(`Extracting single item from album '${data.link}' (${post ? post.url : 'no post'})`);
}
return {
@@ -45,46 +46,4 @@ async function imgurAlbumApi(host, post) {
};
}
/*
* as of late 2019, imgur requires log in to view albums and gallery images
async function imgurAlbum(host, post) {
const res = await fetch(`https://imgur.com/a/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not fetch info for direct imgur album '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurAlbumApi(post);
}
throw new Error(`Could not fetch info for imgur album '${post.host.id}' (${res.statusText}) no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
const extract = config.library.album.extractSingleItem && data.album_images.images.length === 1;
return {
album: extract ? null : {
id: data.id,
url: `https://imgur.com/a/${post.host.id}`,
title: data.title,
description: data.description,
datetime: new Date(data.datetime),
},
items: data.album_images.images.map(item => ({
extracted: extract,
id: item.hash,
url: data.animated ? `https://i.imgur.com/${item.hash}.mp4` : `https://i.imgur.com/${item.hash}${item.ext}`,
title: item.title || (extract ? data.title : null),
description: item.description || (extract ? data.description : null),
type: item.animated ? 'video/mp4' : mime.lookup(item.ext.split('?')[0]),
datetime: new Date(item.datetime),
})),
};
}
*/
module.exports = imgurAlbumApi;

View File

@@ -10,6 +10,10 @@ async function imgurImageApi(host) {
},
});
if (res.status !== 200) {
throw new Error(`Imgur API returned HTTP ${res.status} for source '${host.url}'`);
}
const { data } = await res.json();
if (res.status !== 200) {
@@ -32,37 +36,6 @@ async function imgurImageApi(host) {
async function imgurImage(host, post) {
return imgurImageApi(host, post);
/*
* as of late 2019, imgur requires log in to view albums and gallery images
const res = await fetch(`https://imgur.com/${post.host.id}`);
const html = await res.text();
if (res.status !== 200) {
if (config.methods.imgur.clientId) {
console.log('\x1b[31m%s\x1b[0m', `Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), trying API fallback (${post.permalink})`);
return imgurImageApi(post);
}
throw new Error(`Could not scrape info for imgur image '${post.host.id}' (${res.statusText}), no API fallback configured`);
}
const dataString = html.replace(/\s+/g, ' ').match(/}}, item:(.*)}; var PREBID_TIMEOUT/)[1];
const data = JSON.parse(dataString);
return {
album: null,
items: [{
id: data.hash,
url: data.animated ? `https://i.imgur.com/${post.host.id}.mp4` : `https://i.imgur.com/${post.host.id}${data.ext}`,
title: data.title,
description: data.description,
type: data.animated ? 'video/mp4' : data.mimetype,
datetime: new Date(data.timestamp || data.datetime),
}],
};
*/
}
module.exports = imgurImage;

View File

@@ -7,8 +7,10 @@ const gfycat = require('./gfycat');
const imgurAlbum = require('./imgurAlbum');
const imgurImage = require('./imgurImage');
const redditImage = require('./redditImage');
const redditAlbum = require('./redditAlbum');
const redditPreview = require('./redditPreview');
const redditVideo = require('./redditVideo');
const redgifs = require('./redgifs');
const self = require('./self');
const vidbleAlbum = require('./vidbleAlbum');
const vidbleImage = require('./vidbleImage');
@@ -23,8 +25,10 @@ module.exports = {
imgurAlbum,
imgurImage,
redditImage,
redditAlbum,
redditPreview,
redditVideo,
redgifs,
self,
tube,
vidbleAlbum,

View File

@@ -0,0 +1,32 @@
'use strict';
const mime = require('mime');
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
async function redditAlbum(host, post) {
const res = await bhttp.get(host.url);
if (res.statusCode !== 200) {
throw new Error(res.body.toString());
}
const { document } = new JSDOM(res.body.toString(), { runScripts: 'dangerously' }).window;
const items = Array.from(document.querySelectorAll('li a'), el => el.href);
return {
album: {
id: host.id,
url: host.url,
title: post.title,
},
items: items.map(url => ({
id: new URL(url).pathname.match(/\/(.*).jpg/)[1],
url,
datetime: post.datetime,
type: mime.getType(url) || 'image/jpeg',
})),
};
}
module.exports = redditAlbum;

View File

@@ -1,6 +1,6 @@
'use strict';
const mime = require('mime-types');
const mime = require('mime');
async function redditImage(host, post) {
return {
@@ -10,7 +10,7 @@ async function redditImage(host, post) {
url: post.url,
title: post.title,
datetime: post.datetime,
type: mime.lookup(post.url.split('/.')[0]) || 'image/jpeg',
type: mime.getType(post.url) || 'image/jpeg',
original: post,
}],
};

69
src/methods/redgifs.js Normal file
View File

@@ -0,0 +1,69 @@
'use strict';
const fetch = require('node-fetch');
const mime = require('mime');
function scrapeGallery(data) {
const oldestDate = Math.min(...data.gifs.map(gif => gif.createDate));
return {
album: {
id: data.id,
datetime: new Date(oldestDate * 1000),
},
items: data.gifs.map(gif => ({
id: gif.id,
url: gif.urls.hd,
description: gif.tags.join(', '),
type: mime.getType(gif.urls.hd),
datetime: new Date(gif.createDate * 1000),
original: gif,
})),
};
}
async function fetchGallery(galleryId) {
const res = await fetch(`https://api.redgifs.com/v2/gallery/${galleryId}`);
const data = await res.json();
if (!data.gifs) {
return null;
}
return scrapeGallery(data);
}
async function redgifs(host) {
const res = await fetch(`https://api.redgifs.com/v2/gifs/${host.id.toLowerCase()}`);
const data = await res.json();
if (data.errorMessage) {
throw new Error(`RedGifs API returned error for source '${host.url}' (${res.status}): ${data.errorMessage.description}`);
}
if (data.id && data.gifs) {
return scrapeGallery(data);
}
if (!data.gif) {
return null;
}
if (data.gif.gallery) {
return fetchGallery(data.gif.gallery);
}
return {
album: null,
items: [{
id: data.gif.id,
url: data.gif.urls.hd,
description: data.gif.tags.join(', '),
type: mime.getType(data.gif.urls.hd),
datetime: new Date(data.gif.createDate * 1000),
original: data.gif,
}],
};
}
module.exports = redgifs;

View File

@@ -3,6 +3,8 @@
const youtubedl = require('youtube-dl');
const dateFns = require('date-fns');
const logger = require('../logger')(__filename);
async function tube(host, post) {
try {
const data = await new Promise((resolve, reject) => {
@@ -15,7 +17,7 @@ async function tube(host, post) {
});
});
host.id = data.display_id;
host.id = data.display_id; // eslint-disable-line no-param-reassign
return {
album: null,
@@ -32,7 +34,7 @@ async function tube(host, post) {
],
};
} catch (error) {
console.log('\x1b[33m%s\x1b[0m', `Ignoring possible profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
logger.warn(`Ignoring possible image or profile page '${host.url}' (${post ? post.permalink : 'no post'})`);
return null;
}

View File

@@ -6,6 +6,8 @@ const UrlPattern = require('url-pattern');
const cheerio = require('cheerio');
const mime = require('mime-types');
const logger = require('../logger')(__filename);
const pattern = new UrlPattern('https\\://(www.)vidble.com/:id(_med)(.:ext)');
async function vidbleAlbum(host, post) {
@@ -23,7 +25,7 @@ async function vidbleAlbum(host, post) {
const extract = config.library.extractSingleAlbumItem && imgUrls.length === 1;
if (extract) {
console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${post.title}' - ${res.link}`);
logger.verbose(`Extracting single item from album '${post.title}' - ${res.link}`);
}
return {
@@ -43,7 +45,7 @@ async function vidbleAlbum(host, post) {
id,
url: `https://vidble.com/${id}.${components.ext}`,
type: mimetype,
datetime: post.datetime,
datetime: post ? post.datetime : null,
};
}),
};