Separated media request limits.

This commit is contained in:
DebaucheryLibrarian
2024-10-29 22:42:30 +01:00
parent 070ef182db
commit ea02ec3943
8 changed files with 75 additions and 13 deletions

View File

@@ -36,10 +36,11 @@ unprint.options({
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
},
limits: config.limits,
proxyAddress: `http://${config.proxy.host}:${config.proxy.port}`,
});
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p) ${event.url}`));
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.method} ${event.url} (${event.status}): ${event.statusText}`));
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.proxy ? '' : ' proxied'}) ${event.url}`));
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.proxied ? ' proxied' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
function logActive() {
setTimeout(() => {

View File

@@ -189,6 +189,16 @@ const { argv } = yargs
type: 'number',
// don't set default, because argument has to override config, but config has to override default
})
.option('media-interval', {
describe: 'Minimum wait time between HTTP media requests',
type: 'number',
// don't set default, because argument has to override config, but config has to override default
})
.option('media-concurrency', {
describe: 'Maximum amount of parallel HTTP media requests',
type: 'number',
// don't set default, because argument has to override config, but config has to override default
})
.option('save', {
describe: 'Save fetched releases to database',
type: 'boolean',

View File

@@ -619,6 +619,7 @@ async function storeFile(media, options) {
async function fetchHttpSource(source, tempFileTarget, hashStream) {
const res = await http.get(source.src, {
limits: 'media',
headers: {
...(source.referer && { referer: source.referer }),
...(source.host && { host: source.host }),

View File

@@ -91,7 +91,9 @@ function scrapeProfile({ query }, url, channel) {
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
const res = await unprint.get(`${channel.url}/latest/${page}`, {
selectAll: '.card-scene', // studios as channels
});
if (res.ok) {
return scrapeAll(res.context, channel);
@@ -100,6 +102,39 @@ async function fetchLatest(channel, page) {
return res.status;
}
/*
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
const url = `${channel.url}/latest/${page}`; // studios as channels
const { tab } = await http.getBrowserSession('analvids', {
bypass: {
headless: false,
},
});
const res = await tab.goto(url);
const status = res.status();
console.log('STATUS', status);
if (status === 200) {
const html = await tab.content();
const context = unprint.initAll(html, '.card-scene'); // studios as channels
const scenes = scrapeAll(context, channel);
tab.close();
return scenes;
}
return res.status;
}
*/
async function getActorUrl(actor, channel) {
if (actor.url) {
return actor.url;

View File

@@ -108,20 +108,29 @@ function useCloudflareBypass(url, options) {
return null;
}
const propMap = {
media: {
interval: 'mediaInterval',
concurrency: 'mediaConcurrency',
},
};
function getLimiterValue(prop, options, hostname) {
if (argv[prop] !== undefined) {
const mappedProp = propMap[options.limits]?.[prop] || prop;
if (typeof argv[mappedProp] !== 'undefined') {
return argv[prop];
}
if (options[prop] !== undefined) {
if (typeof options[prop] !== 'undefined') {
return options[prop];
}
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) {
if (config.limits[hostname]?.enable !== false && typeof config.limits[hostname]?.[prop] !== 'undefined') {
return config.limits[hostname][prop];
}
return config.limits.default[prop];
return config.limits[options.limits || 'default'][prop];
}
function getLimiter(options = {}, url) {