ripunzel/src/fetch/item.js

56 lines
1.7 KiB
JavaScript

'use strict';
const config = require('config');
const bhttp = require('bhttp');
const blake2 = require('blake2');
const phash = require('sharp-phash');
// const phashDistance = require('sharp-phash/distance');
const logger = require('../logger')(__filename);
const limiter = require('../limiter').items;
async function fetchItem(url, attempt, context) {
async function retry(error) {
logger.warn(`Failed to fetch '${url}', ${attempt < config.fetch.retries ? 'retrying' : 'giving up'}: ${error.message} (${context.post ? context.post.permalink : 'no post'})`);
if (attempt < config.fetch.retries) {
return fetchItem(url, attempt + 1, context);
}
return null;
}
try {
// throw new Error('Failed it!');
const res = await limiter.schedule(async () => bhttp.get(url, { headers: context.headers }));
if (res.statusCode !== 200) {
throw new Error(`Response not OK for ${url} (${res.statusCode}): ${res.body.toString()}`);
}
if (!Buffer.isBuffer(res.body)) {
throw new Error(`Unexpected response for ${url} (${res.statusCode}): ${res.body}`);
}
logger.debug(`Fetched '${context.host?.url || url}' (${context.post?.permalink || 'no post'})`);
const hash = blake2.createHash('blake2b', { digestLength: 24 });
hash.update(res.body);
const contentHash = hash.digest('hex');
const phashResult = context.item?.type?.includes('image/')
? await phash(res.body)
: null;
return {
buffer: res.body,
hash: contentHash,
phash: phashResult,
};
} catch (error) {
return retry(error);
}
}
module.exports = fetchItem;