Separated media request limits.

This commit is contained in:
DebaucheryLibrarian 2024-10-29 22:42:30 +01:00
parent 070ef182db
commit ea02ec3943
8 changed files with 75 additions and 13 deletions

View File

@ -301,6 +301,7 @@ module.exports = {
}, },
proxy: { proxy: {
enable: false, enable: false,
protocol: 'http',
host: '', host: '',
port: 8888, port: 8888,
hostnames: [ hostnames: [
@ -376,6 +377,10 @@ module.exports = {
interval: 50, interval: 50,
concurrency: 20, concurrency: 20,
}, },
media: {
interval: 50,
concurrency: 20,
},
'www.kink.com': { 'www.kink.com': {
interval: 1000, interval: 1000,
concurrency: 1, concurrency: 1,

11
package-lock.json generated
View File

@ -89,7 +89,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.37", "ua-parser-js": "^1.0.37",
"undici": "^5.28.1", "undici": "^5.28.1",
"unprint": "^0.13.3", "unprint": "^0.14.1",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3", "v-tooltip": "^2.1.3",
"video.js": "^8.6.1", "video.js": "^8.6.1",
@ -18312,9 +18312,9 @@
} }
}, },
"node_modules/unprint": { "node_modules/unprint": {
"version": "0.13.3", "version": "0.14.1",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.13.3.tgz", "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.14.1.tgz",
"integrity": "sha512-HRpW+OdKmtW+cLnvLqYNVL2voH3aGvene8fxzAQzw2O0zPQrgv2iz5YivfQpxNyKsF+2jeUUma2ttWH8IttkHg==", "integrity": "sha512-LpsktR7NK3iDaYfy1HpNOiYoKGzLSq6wDhQN7RcwTQVJMz9kE0qQ8DS+ru2L76j52lq4v6oBktpnghbe//s3Mw==",
"dependencies": { "dependencies": {
"axios": "^0.27.2", "axios": "^0.27.2",
"bottleneck": "^2.19.5", "bottleneck": "^2.19.5",
@ -18323,7 +18323,8 @@
"eslint-config-airbnb": "^19.0.4", "eslint-config-airbnb": "^19.0.4",
"eslint-config-airbnb-base": "^15.0.0", "eslint-config-airbnb-base": "^15.0.0",
"jsdom": "^17.0.0", "jsdom": "^17.0.0",
"moment-timezone": "^0.5.34" "moment-timezone": "^0.5.34",
"tunnel": "^0.0.6"
} }
}, },
"node_modules/unprint/node_modules/@tootallnate/once": { "node_modules/unprint/node_modules/@tootallnate/once": {

View File

@ -148,7 +148,7 @@
"tunnel": "0.0.6", "tunnel": "0.0.6",
"ua-parser-js": "^1.0.37", "ua-parser-js": "^1.0.37",
"undici": "^5.28.1", "undici": "^5.28.1",
"unprint": "^0.13.3", "unprint": "^0.14.1",
"url-pattern": "^1.0.3", "url-pattern": "^1.0.3",
"v-tooltip": "^2.1.3", "v-tooltip": "^2.1.3",
"video.js": "^8.6.1", "video.js": "^8.6.1",

View File

@ -36,10 +36,11 @@ unprint.options({
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
}, },
limits: config.limits, limits: config.limits,
proxyAddress: `http://${config.proxy.host}:${config.proxy.port}`,
}); });
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p) ${event.url}`)); unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.proxy ? '' : ' proxied'}) ${event.url}`));
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.method} ${event.url} (${event.status}): ${event.statusText}`)); unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.proxied ? ' proxied' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
function logActive() { function logActive() {
setTimeout(() => { setTimeout(() => {

View File

@ -189,6 +189,16 @@ const { argv } = yargs
type: 'number', type: 'number',
// don't set default, because argument has to override config, but config has to override default // don't set default, because argument has to override config, but config has to override default
}) })
.option('media-interval', {
describe: 'Minimum wait time between HTTP media requests',
type: 'number',
// don't set default, because argument has to override config, but config has to override default
})
.option('media-concurrency', {
describe: 'Maximum amount of parallel HTTP media requests',
type: 'number',
// don't set default, because argument has to override config, but config has to override default
})
.option('save', { .option('save', {
describe: 'Save fetched releases to database', describe: 'Save fetched releases to database',
type: 'boolean', type: 'boolean',

View File

@ -619,6 +619,7 @@ async function storeFile(media, options) {
async function fetchHttpSource(source, tempFileTarget, hashStream) { async function fetchHttpSource(source, tempFileTarget, hashStream) {
const res = await http.get(source.src, { const res = await http.get(source.src, {
limits: 'media',
headers: { headers: {
...(source.referer && { referer: source.referer }), ...(source.referer && { referer: source.referer }),
...(source.host && { host: source.host }), ...(source.host && { host: source.host }),

View File

@ -91,7 +91,9 @@ function scrapeProfile({ query }, url, channel) {
async function fetchLatest(channel, page) { async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel // const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels const res = await unprint.get(`${channel.url}/latest/${page}`, {
selectAll: '.card-scene', // studios as channels
});
if (res.ok) { if (res.ok) {
return scrapeAll(res.context, channel); return scrapeAll(res.context, channel);
@ -100,6 +102,39 @@ async function fetchLatest(channel, page) {
return res.status; return res.status;
} }
/*
async function fetchLatest(channel, page) {
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
const url = `${channel.url}/latest/${page}`; // studios as channels
const { tab } = await http.getBrowserSession('analvids', {
bypass: {
headless: false,
},
});
const res = await tab.goto(url);
const status = res.status();
console.log('STATUS', status);
if (status === 200) {
const html = await tab.content();
const context = unprint.initAll(html, '.card-scene'); // studios as channels
const scenes = scrapeAll(context, channel);
tab.close();
return scenes;
}
return res.status;
}
*/
async function getActorUrl(actor, channel) { async function getActorUrl(actor, channel) {
if (actor.url) { if (actor.url) {
return actor.url; return actor.url;

View File

@ -108,20 +108,29 @@ function useCloudflareBypass(url, options) {
return null; return null;
} }
const propMap = {
media: {
interval: 'mediaInterval',
concurrency: 'mediaConcurrency',
},
};
function getLimiterValue(prop, options, hostname) { function getLimiterValue(prop, options, hostname) {
if (argv[prop] !== undefined) { const mappedProp = propMap[options.limits]?.[prop] || prop;
if (typeof argv[mappedProp] !== 'undefined') {
return argv[prop]; return argv[prop];
} }
if (options[prop] !== undefined) { if (typeof options[prop] !== 'undefined') {
return options[prop]; return options[prop];
} }
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) { if (config.limits[hostname]?.enable !== false && typeof config.limits[hostname]?.[prop] !== 'undefined') {
return config.limits[hostname][prop]; return config.limits[hostname][prop];
} }
return config.limits.default[prop]; return config.limits[options.limits || 'default'][prop];
} }
function getLimiter(options = {}, url) { function getLimiter(options = {}, url) {