From 7d633c31b421533ed1ecca84d4ad1ac66ca58cd8 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 11 Sep 2024 05:16:58 +0200 Subject: [PATCH] Using Imgur API rate limit feedback to prevent exceeding it. --- src/app.js | 6 ++-- src/cli.js | 2 ++ src/fetch/info.js | 76 ++++++++++++++++++++------------------- src/methods/imgurAlbum.js | 20 +++++++++-- src/methods/imgurImage.js | 51 ++++++++++++++++++++++---- src/methods/redgifs.js | 44 +++++++++++------------ 6 files changed, 130 insertions(+), 69 deletions(-) diff --git a/src/app.js b/src/app.js index daf326f..6e4a075 100644 --- a/src/app.js +++ b/src/app.js @@ -106,10 +106,12 @@ async function getDirectContent(links, ep) { }; }); - // const predata = await fetchPredata(hosts.map(({ host }) => host)); + const predata = await fetchPredata(hosts.map(({ host }) => host)); + + console.log('app predata', predata); return Promise.map(hosts, async ({ link, host }) => { - const info = await getInfo(host, reddit, link); + const info = await getInfo(host, { reddit, link, predata }); if (info) { return fetchSaveDirectContent(info, host, ep); diff --git a/src/cli.js b/src/cli.js index d5cc6fa..e2db831 100644 --- a/src/cli.js +++ b/src/cli.js @@ -102,10 +102,12 @@ function getArgs() { choices: ['oldest', 'latest'], }) .option('redownload', { + alias: 'force', describe: 'Ignore index file and force a redownload of everything in the selection. Does not affect [before|after]-indexed', type: 'boolean', }) .option('redownload-profile', { + alias: 'force-profile', describe: 'Ignore index file and force a redownload of the profile image and description', type: 'boolean', }) diff --git a/src/fetch/info.js b/src/fetch/info.js index bd5db0d..57da353 100644 --- a/src/fetch/info.js +++ b/src/fetch/info.js @@ -6,54 +6,58 @@ const Promise = require('bluebird'); const logger = require('../logger')(__filename); const methods = require('../methods/methods'); -const attachContentInfo = (users, { reddit, predata }) => Promise.reduce(Object.values(users), async (accUsers, user) => ({ - ...accUsers, - [user.name]: { - ...user, - posts: await Promise.reduce(user.posts, async (accPosts, post) => { - if (!post.host || !methods[post.host.method]) { - logger.warn(`Ignoring unsupported content '${post.url}' (${post.permalink})`); +async function attachContentInfo(users, { reddit, predata }) { + return Promise.reduce(Object.values(users), async (accUsers, user) => ({ + ...accUsers, + [user.name]: { + ...user, + posts: await Promise.reduce(user.posts, async (accPosts, post) => { + if (!post.host || !methods[post.host.method]) { + logger.warn(`Ignoring unsupported content '${post.url}' (${post.permalink})`); - return accPosts; - } + return accPosts; + } - try { - return [ - ...accPosts, - { - ...post, - content: await (methods[post.host.method].fetchInfo || methods[post.host.method])(post.host, post, { - predata: predata[post.host.method], - reddit, - }), - }, - ]; - } catch (error) { - logger.warn(`${error.message} (${post.permalink})`); - - if (config.fetch.archives.preview && post.preview) { - logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); + console.log('attach predata', predata[post.host.method]); + try { return [ ...accPosts, { ...post, - previewFallback: true, - content: await methods.redditPreview(post.host, post, { - predata: predata.redditPreview, + content: await (methods[post.host.method].fetchInfo || methods[post.host.method])(post.host, post, { + predata: predata[post.host.method], reddit, }), }, ]; + } catch (error) { + logger.warn(`${error.message} (${post.permalink})`); + + if (config.fetch.archives.preview && post.preview) { + logger.info(`Found preview images for unavailable source '${post.url}' (${post.permalink})`); + + return [ + ...accPosts, + { + ...post, + previewFallback: true, + content: await methods.redditPreview(post.host, post, { + predata: predata.redditPreview, + reddit, + }), + }, + ]; + } + + return accPosts; } + }, []), + }, + }), {}); +} - return accPosts; - } - }, []), - }, -}), {}); - -async function getInfo(host, reddit, url) { +async function getInfo(host, { reddit, url, predata }) { if (host === null) { try { const info = await methods.tube(host, null, reddit); @@ -66,7 +70,7 @@ async function getInfo(host, reddit, url) { } } - return (methods[host.method].fetchInfo || methods[host.method])(host, null, reddit); + return (methods[host.method].fetchInfo || methods[host.method])(host, null, { reddit, predata }); } module.exports = { diff --git a/src/methods/imgurAlbum.js b/src/methods/imgurAlbum.js index 03e6262..bcbae07 100644 --- a/src/methods/imgurAlbum.js +++ b/src/methods/imgurAlbum.js @@ -4,14 +4,25 @@ const config = require('config'); const fetch = require('node-fetch'); const logger = require('../logger')(__filename); +const { fetchPredata } = require('./imgurImage'); + +async function imgurAlbumApi(host, post, { predata }) { + if (predata.remaining === 10) { // keep a buffer + throw new Error(`Reached Imgur API rate limit with source '${host.url}'`); + } -async function imgurAlbumApi(host, post) { const res = await fetch(`https://api.imgur.com/3/album/${host.id}`, { headers: { Authorization: `Client-ID ${config.methods.imgur.clientId}`, }, }); + const rateRemaining = Number(res.headers.get('x-ratelimit-userremaining')); + + if (rateRemaining) { + predata.setRemaining(rateRemaining); + } + const { data } = await res.json(); if (res.status !== 200) { @@ -33,7 +44,7 @@ async function imgurAlbumApi(host, post) { datetime: new Date(data.datetime * 1000), original: data, }, - items: data.images.map(item => ({ + items: data.images.map((item) => ({ extracted: extract, id: item.id, url: item.animated ? item.mp4 : item.link, @@ -46,4 +57,7 @@ async function imgurAlbumApi(host, post) { }; } -module.exports = imgurAlbumApi; +module.exports = { + fetchInfo: imgurAlbumApi, + fetchPredata, +}; diff --git a/src/methods/imgurImage.js b/src/methods/imgurImage.js index f130666..8cb2c2f 100644 --- a/src/methods/imgurImage.js +++ b/src/methods/imgurImage.js @@ -3,16 +3,52 @@ const config = require('config'); const fetch = require('node-fetch'); -async function imgurImageApi(host) { +async function fetchPredata() { + const data = { + limit: 0, + remaining: 0, + }; + + data.setRemaining = (remaining) => { + data.remaining = remaining; + }; + + const res = await fetch('https://api.imgur.com/3/credits', { + headers: { + Authorization: `Client-ID ${config.methods.imgur.clientId}`, + }, + }); + + if (res.ok) { + const body = await res.json(); + + if (body.success) { + data.limit = body.data.UserLimit; + data.remaining = body.data.UserRemaining; + } + } + + return data; +} + +async function imgurImageApi(host, post, { predata } = {}) { + if (predata.remaining === 10) { // keep a buffer + throw new Error(`Reached Imgur API rate limit with source '${host.url}'`); + } + const res = await fetch(`https://api.imgur.com/3/image/${host.id}`, { headers: { Authorization: `Client-ID ${config.methods.imgur.clientId}`, }, }); - console.log('imgur headers', res.headers); + const rateRemaining = Number(res.headers.get('x-ratelimit-userremaining')); - if (res.status !== 200) { + if (rateRemaining) { + predata.setRemaining(rateRemaining); + } + + if (!res.ok) { throw new Error(`Imgur API returned HTTP ${res.status} for source '${host.url}'`); } @@ -36,8 +72,11 @@ async function imgurImageApi(host) { }; } -async function imgurImage(host, post) { - return imgurImageApi(host, post); +async function imgurImage(host, post, context) { + return imgurImageApi(host, post, context); } -module.exports = imgurImage; +module.exports = { + fetchInfo: imgurImage, + fetchPredata, +}; diff --git a/src/methods/redgifs.js b/src/methods/redgifs.js index d267566..8cf80eb 100644 --- a/src/methods/redgifs.js +++ b/src/methods/redgifs.js @@ -6,6 +6,28 @@ const mime = require('mime'); const { version } = require('../../package.json'); +async function fetchPredata() { + const userAgent = `ripunzel/${version}`; + const res = await fetch('https://api.redgifs.com/v2/auth/temporary', { + headers: { + 'user-agent': userAgent, + }, + }); + + const data = await res.json(); + + if (res.ok) { + return { + address: data.addr, + agent: data.agent, + token: data.token, + userAgent, + }; + } + + return null; +} + function scrapeGallery(data) { const oldestDate = Math.min(...data.gifs.map((gif) => gif.createDate)); @@ -125,28 +147,6 @@ async function redgifs(host, post, { predata }) { */ } -async function fetchPredata() { - const userAgent = `ripunzel/${version}`; - const res = await fetch('https://api.redgifs.com/v2/auth/temporary', { - headers: { - 'user-agent': userAgent, - }, - }); - - const data = await res.json(); - - if (res.ok) { - return { - address: data.addr, - agent: data.agent, - token: data.token, - userAgent, - }; - } - - return null; -} - module.exports = { fetchInfo: redgifs, fetchPredata,