Separated media request limits.
This commit is contained in:
parent
070ef182db
commit
ea02ec3943
|
@ -301,6 +301,7 @@ module.exports = {
|
||||||
},
|
},
|
||||||
proxy: {
|
proxy: {
|
||||||
enable: false,
|
enable: false,
|
||||||
|
protocol: 'http',
|
||||||
host: '',
|
host: '',
|
||||||
port: 8888,
|
port: 8888,
|
||||||
hostnames: [
|
hostnames: [
|
||||||
|
@ -376,6 +377,10 @@ module.exports = {
|
||||||
interval: 50,
|
interval: 50,
|
||||||
concurrency: 20,
|
concurrency: 20,
|
||||||
},
|
},
|
||||||
|
media: {
|
||||||
|
interval: 50,
|
||||||
|
concurrency: 20,
|
||||||
|
},
|
||||||
'www.kink.com': {
|
'www.kink.com': {
|
||||||
interval: 1000,
|
interval: 1000,
|
||||||
concurrency: 1,
|
concurrency: 1,
|
||||||
|
|
|
@ -89,7 +89,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.13.3",
|
"unprint": "^0.14.1",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
@ -18312,9 +18312,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/unprint": {
|
"node_modules/unprint": {
|
||||||
"version": "0.13.3",
|
"version": "0.14.1",
|
||||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.13.3.tgz",
|
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.14.1.tgz",
|
||||||
"integrity": "sha512-HRpW+OdKmtW+cLnvLqYNVL2voH3aGvene8fxzAQzw2O0zPQrgv2iz5YivfQpxNyKsF+2jeUUma2ttWH8IttkHg==",
|
"integrity": "sha512-LpsktR7NK3iDaYfy1HpNOiYoKGzLSq6wDhQN7RcwTQVJMz9kE0qQ8DS+ru2L76j52lq4v6oBktpnghbe//s3Mw==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^0.27.2",
|
"axios": "^0.27.2",
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
|
@ -18323,7 +18323,8 @@
|
||||||
"eslint-config-airbnb": "^19.0.4",
|
"eslint-config-airbnb": "^19.0.4",
|
||||||
"eslint-config-airbnb-base": "^15.0.0",
|
"eslint-config-airbnb-base": "^15.0.0",
|
||||||
"jsdom": "^17.0.0",
|
"jsdom": "^17.0.0",
|
||||||
"moment-timezone": "^0.5.34"
|
"moment-timezone": "^0.5.34",
|
||||||
|
"tunnel": "^0.0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/unprint/node_modules/@tootallnate/once": {
|
"node_modules/unprint/node_modules/@tootallnate/once": {
|
||||||
|
|
|
@ -148,7 +148,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.13.3",
|
"unprint": "^0.14.1",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
|
@ -36,10 +36,11 @@ unprint.options({
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||||
},
|
},
|
||||||
limits: config.limits,
|
limits: config.limits,
|
||||||
|
proxyAddress: `http://${config.proxy.host}:${config.proxy.port}`,
|
||||||
});
|
});
|
||||||
|
|
||||||
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p) ${event.url}`));
|
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.proxy ? '' : ' proxied'}) ${event.url}`));
|
||||||
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.proxied ? ' proxied' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
||||||
|
|
||||||
function logActive() {
|
function logActive() {
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
|
|
10
src/argv.js
10
src/argv.js
|
@ -189,6 +189,16 @@ const { argv } = yargs
|
||||||
type: 'number',
|
type: 'number',
|
||||||
// don't set default, because argument has to override config, but config has to override default
|
// don't set default, because argument has to override config, but config has to override default
|
||||||
})
|
})
|
||||||
|
.option('media-interval', {
|
||||||
|
describe: 'Minimum wait time between HTTP media requests',
|
||||||
|
type: 'number',
|
||||||
|
// don't set default, because argument has to override config, but config has to override default
|
||||||
|
})
|
||||||
|
.option('media-concurrency', {
|
||||||
|
describe: 'Maximum amount of parallel HTTP media requests',
|
||||||
|
type: 'number',
|
||||||
|
// don't set default, because argument has to override config, but config has to override default
|
||||||
|
})
|
||||||
.option('save', {
|
.option('save', {
|
||||||
describe: 'Save fetched releases to database',
|
describe: 'Save fetched releases to database',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
|
|
|
@ -619,6 +619,7 @@ async function storeFile(media, options) {
|
||||||
|
|
||||||
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||||
const res = await http.get(source.src, {
|
const res = await http.get(source.src, {
|
||||||
|
limits: 'media',
|
||||||
headers: {
|
headers: {
|
||||||
...(source.referer && { referer: source.referer }),
|
...(source.referer && { referer: source.referer }),
|
||||||
...(source.host && { host: source.host }),
|
...(source.host && { host: source.host }),
|
||||||
|
|
|
@ -91,7 +91,9 @@ function scrapeProfile({ query }, url, channel) {
|
||||||
|
|
||||||
async function fetchLatest(channel, page) {
|
async function fetchLatest(channel, page) {
|
||||||
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||||
const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
|
const res = await unprint.get(`${channel.url}/latest/${page}`, {
|
||||||
|
selectAll: '.card-scene', // studios as channels
|
||||||
|
});
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeAll(res.context, channel);
|
return scrapeAll(res.context, channel);
|
||||||
|
@ -100,6 +102,39 @@ async function fetchLatest(channel, page) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
async function fetchLatest(channel, page) {
|
||||||
|
// const res = await unprint.get(`https://www.analvids.com/new-videos/${page}`, { selectAll: '.card-scene' }); // analvids as channel
|
||||||
|
// const res = await unprint.get(`${channel.url}/latest/${page}`, { selectAll: '.card-scene' }); // studios as channels
|
||||||
|
const url = `${channel.url}/latest/${page}`; // studios as channels
|
||||||
|
|
||||||
|
const { tab } = await http.getBrowserSession('analvids', {
|
||||||
|
bypass: {
|
||||||
|
headless: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const res = await tab.goto(url);
|
||||||
|
|
||||||
|
const status = res.status();
|
||||||
|
|
||||||
|
console.log('STATUS', status);
|
||||||
|
|
||||||
|
if (status === 200) {
|
||||||
|
const html = await tab.content();
|
||||||
|
const context = unprint.initAll(html, '.card-scene'); // studios as channels
|
||||||
|
|
||||||
|
const scenes = scrapeAll(context, channel);
|
||||||
|
|
||||||
|
tab.close();
|
||||||
|
|
||||||
|
return scenes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
async function getActorUrl(actor, channel) {
|
async function getActorUrl(actor, channel) {
|
||||||
if (actor.url) {
|
if (actor.url) {
|
||||||
return actor.url;
|
return actor.url;
|
||||||
|
|
|
@ -108,20 +108,29 @@ function useCloudflareBypass(url, options) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const propMap = {
|
||||||
|
media: {
|
||||||
|
interval: 'mediaInterval',
|
||||||
|
concurrency: 'mediaConcurrency',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
function getLimiterValue(prop, options, hostname) {
|
function getLimiterValue(prop, options, hostname) {
|
||||||
if (argv[prop] !== undefined) {
|
const mappedProp = propMap[options.limits]?.[prop] || prop;
|
||||||
|
|
||||||
|
if (typeof argv[mappedProp] !== 'undefined') {
|
||||||
return argv[prop];
|
return argv[prop];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options[prop] !== undefined) {
|
if (typeof options[prop] !== 'undefined') {
|
||||||
return options[prop];
|
return options[prop];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.limits[hostname]?.enable !== false && config.limits[hostname]?.[prop] !== undefined) {
|
if (config.limits[hostname]?.enable !== false && typeof config.limits[hostname]?.[prop] !== 'undefined') {
|
||||||
return config.limits[hostname][prop];
|
return config.limits[hostname][prop];
|
||||||
}
|
}
|
||||||
|
|
||||||
return config.limits.default[prop];
|
return config.limits[options.limits || 'default'][prop];
|
||||||
}
|
}
|
||||||
|
|
||||||
function getLimiter(options = {}, url) {
|
function getLimiter(options = {}, url) {
|
||||||
|
|
Loading…
Reference in New Issue