Separated media request limits.
This commit is contained in:
parent
070ef182db
commit
ea02ec3943
|
@ -301,6 +301,7 @@ module.exports = {
|
|||
},
|
||||
proxy: {
|
||||
enable: false,
|
||||
protocol: 'http',
|
||||
host: '',
|
||||
port: 8888,
|
||||
hostnames: [
|
||||
|
@ -376,6 +377,10 @@ module.exports = {
|
|||
interval: 50,
|
||||
concurrency: 20,
|
||||
},
|
||||
media: {
|
||||
interval: 50,
|
||||
concurrency: 20,
|
||||
},
|
||||
'www.kink.com': {
|
||||
interval: 1000,
|
||||
concurrency: 1,
|
||||
|
|
|
@ -89,7 +89,7 @@
|
|||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.13.3",
|
||||
"unprint": "^0.14.1",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
|
@ -18312,9 +18312,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/unprint": {
|
||||
"version": "0.13.3",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.13.3.tgz",
|
||||
"integrity": "sha512-HRpW+OdKmtW+cLnvLqYNVL2voH3aGvene8fxzAQzw2O0zPQrgv2iz5YivfQpxNyKsF+2jeUUma2ttWH8IttkHg==",
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.14.1.tgz",
|
||||
"integrity": "sha512-LpsktR7NK3iDaYfy1HpNOiYoKGzLSq6wDhQN7RcwTQVJMz9kE0qQ8DS+ru2L76j52lq4v6oBktpnghbe//s3Mw==",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"bottleneck": "^2.19.5",
|
||||
|
@ -18323,7 +18323,8 @@
|
|||
"eslint-config-airbnb": "^19.0.4",
|
||||
"eslint-config-airbnb-base": "^15.0.0",
|
||||
"jsdom": "^17.0.0",
|
||||
"moment-timezone": "^0.5.34"
|
||||
"moment-timezone": "^0.5.34",
|
||||
"tunnel": "^0.0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/unprint/node_modules/@tootallnate/once": {
|
||||
|
|
|
@ -148,7 +148,7 @@
|
|||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.13.3",
|
||||
"unprint": "^0.14.1",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
|
|
|
@ -36,10 +36,11 @@ unprint.options({
|
|||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||
},
|
||||
limits: config.limits,
|
||||
proxyAddress: `http://${config.proxy.host}:${config.proxy.port}`,
|
||||
});
|
||||
|
||||
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p) ${event.url}`));
|
||||
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
||||
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.proxy ? '' : ' proxied'}) ${event.url}`));
|
||||
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.proxied ? ' proxied' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
||||
|
||||
function logActive() {
|
||||
setTimeout(() => {
|
||||
|
|
10
src/argv.js
10
src/argv.js
|
@ -189,6 +189,16 @@ const { argv } = yargs
|
|||
type: 'number',
|
||||
// don't set default, because argument has to override config, but config has to override default
|
||||
})
|
||||
.option('media-interval', {
|
||||
describe: 'Minimum wait time between HTTP media requests',
|
||||
type: 'number',
|
||||
// don't set default, because argument has to override config, but config has to override default
|
||||
})
|
||||
.option('media-concurrency', {
|
||||
describe: 'Maximum amount of parallel HTTP media requests',
|
||||
type: 'number',
|
||||
// don't set default, because argument has to override config, but config has to override default
|
||||
})
|
||||
.option('save', {
|
||||
describe: 'Save fetched releases to database',
|
||||
type: 'boolean',
|
||||
|
|
|
@ -619,6 +619,7 @@ async function storeFile(media, options) {
|
|||
|
||||
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
const res = await http.get(source.src, {
|
||||
limits: 'media',
|
||||
headers: {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
|
|
|
@ -91,7 +91,9 @@ function scrapeProfile({ query }, url, channel) {
|
|||
|
||||
async function fetchLatest(channel, page) {
|
||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||
const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
|
||||
const res = await unprint.get(`${channel.url}/latest/${page}`, {
|
||||
selectAll: '.card-scene', // studios as channels
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
|
@ -100,6 +102,39 @@ async function fetchLatest(channel, page) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
/*
|
||||
async function fetchLatest(channel, page) {
|
||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
|
||||
const url = `${channel.url}/latest/${page}`; // studios as channels
|
||||
|
||||
const { tab } = await http.getBrowserSession('analvids', {
|
||||
bypass: {
|
||||
headless: false,
|
||||
},
|
||||
});
|
||||
|
||||
const res = await tab.goto(url);
|
||||
|
||||
const status = res.status();
|
||||
|
||||
console.log('STATUS', status);
|
||||
|
||||
if (status === 200) {
|
||||
const html = await tab.content();
|
||||
const context = unprint.initAll(html, '.card-scene'); // studios as channels
|
||||
|
||||
const scenes = scrapeAll(context, channel);
|
||||
|
||||
tab.close();
|
||||
|
||||
return scenes;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
*/
|
||||
|
||||
async function getActorUrl(actor, channel) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
|
|
|
@ -108,20 +108,29 @@ function useCloudflareBypass(url, options) {
|
|||
return null;
|
||||
}
|
||||
|
||||
const propMap = {
|
||||
media: {
|
||||
interval: 'mediaInterval',
|
||||
concurrency: 'mediaConcurrency',
|
||||
},
|
||||
};
|
||||
|
||||
function getLimiterValue(prop, options, hostname) {
|
||||
if (argv[prop] !== undefined) {
|
||||
const mappedProp = propMap[options.limits]?.[prop] || prop;
|
||||
|
||||
if (typeof argv[mappedProp] !== 'undefined') {
|
||||
return argv[prop];
|
||||
}
|
||||
|
||||
if (options[prop] !== undefined) {
|
||||
if (typeof options[prop] !== 'undefined') {
|
||||
return options[prop];
|
||||
}
|
||||
|
||||
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) {
|
||||
if (config.limits[hostname]?.enable !== false && typeof config.limits[hostname]?.[prop] !== 'undefined') {
|
||||
return config.limits[hostname][prop];
|
||||
}
|
||||
|
||||
return config.limits.default[prop];
|
||||
return config.limits[options.limits || 'default'][prop];
|
||||
}
|
||||
|
||||
function getLimiter(options = {}, url) {
|
||||
|
|
Loading…
Reference in New Issue