diff --git a/config/default.js b/config/default.js index da36d7fc..8054830e 100755 --- a/config/default.js +++ b/config/default.js @@ -30,6 +30,12 @@ module.exports = { }, }, }, + redis: { + host: 'localhost', + port: 6379, + username: null, + password: null, + }, location: { userAgent: 'contact via https://traxxx.me/', }, diff --git a/package-lock.json b/package-lock.json index 5f9984a7..97fc3600 100755 --- a/package-lock.json +++ b/package-lock.json @@ -71,6 +71,7 @@ "puppeteer": "^20.5.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", + "redis": "^4.6.7", "sharp": "^0.29.2", "showdown": "^1.9.1", "source-map-support": "^0.5.16", @@ -3228,6 +3229,64 @@ } } }, + "node_modules/@redis/bloom": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.2.0.tgz", + "integrity": "sha512-HG2DFjYKbpNmVXsa0keLHp/3leGJz1mjh09f2RLGGLQZzSHpkmZWuwJbAvo3QcRY8p80m5+ZdXZdYOSBLlp7Cg==", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/client": { + "version": "1.5.8", + "resolved": "https://registry.npmjs.org/@redis/client/-/client-1.5.8.tgz", + "integrity": "sha512-xzElwHIO6rBAqzPeVnCzgvrnBEcFL1P0w8P65VNLRkdVW8rOE58f52hdj0BDgmsdOm4f1EoXPZtH4Fh7M/qUpw==", + "dependencies": { + "cluster-key-slot": "1.1.2", + "generic-pool": "3.9.0", + "yallist": "4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@redis/client/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + }, + "node_modules/@redis/graph": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@redis/graph/-/graph-1.1.0.tgz", + "integrity": "sha512-16yZWngxyXPd+MJxeSr0dqh2AIOi8j9yXKcKCwVaKDbH3HTuETpDVPcLujhFYVPtYrngSco31BUcSa9TH31Gqg==", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/json": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@redis/json/-/json-1.0.4.tgz", + "integrity": "sha512-LUZE2Gdrhg0Rx7AN+cZkb1e6HjoSKaeeW8rYnt89Tly13GBI5eP4CwDVr+MY8BAYfCg4/N15OUrtLoona9uSgw==", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/search": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@redis/search/-/search-1.1.3.tgz", + "integrity": "sha512-4Dg1JjvCevdiCBTZqjhKkGoC5/BcB7k9j99kdMnaXFXg8x4eyOIVg9487CMv7/BUVkFLZCaIh8ead9mU15DNng==", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/time-series": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@redis/time-series/-/time-series-1.0.4.tgz", + "integrity": "sha512-ThUIgo2U/g7cCuZavucQTQzA9g9JbDDY2f64u3AbAoz/8vE2lt2U37LamDUVChhaDA3IRT9R6VvJwqnUfTJzng==", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, "node_modules/@tensorflow/tfjs-core": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-core/-/tfjs-core-1.7.0.tgz", @@ -5412,6 +5471,14 @@ "request": "^2.88.0" } }, + "node_modules/cluster-key-slot": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz", + "integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/cmake-js": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/cmake-js/-/cmake-js-6.1.0.tgz", @@ -9112,6 +9179,14 @@ "node": ">= 4.0.0" } }, + "node_modules/generic-pool": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/generic-pool/-/generic-pool-3.9.0.tgz", + "integrity": "sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==", + "engines": { + "node": ">= 4" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -14804,6 +14879,19 @@ "node": ">=8" } }, + "node_modules/redis": { + "version": "4.6.7", + "resolved": "https://registry.npmjs.org/redis/-/redis-4.6.7.tgz", + "integrity": "sha512-KrkuNJNpCwRm5vFJh0tteMxW8SaUzkm5fBH7eL5hd/D0fAkzvapxbfGPP/r+4JAXdQuX7nebsBkBqA2RHB7Usw==", + "dependencies": { + "@redis/bloom": "1.2.0", + "@redis/client": "1.5.8", + "@redis/graph": "1.1.0", + "@redis/json": "1.0.4", + "@redis/search": "1.1.3", + "@redis/time-series": "1.0.4" + } + }, "node_modules/regenerate": { "version": "1.4.2", "resolved": "https://registry.npmjs.org/regenerate/-/regenerate-1.4.2.tgz", @@ -21448,6 +21536,53 @@ } } }, + "@redis/bloom": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.2.0.tgz", + "integrity": "sha512-HG2DFjYKbpNmVXsa0keLHp/3leGJz1mjh09f2RLGGLQZzSHpkmZWuwJbAvo3QcRY8p80m5+ZdXZdYOSBLlp7Cg==", + "requires": {} + }, + "@redis/client": { + "version": "1.5.8", + "resolved": "https://registry.npmjs.org/@redis/client/-/client-1.5.8.tgz", + "integrity": "sha512-xzElwHIO6rBAqzPeVnCzgvrnBEcFL1P0w8P65VNLRkdVW8rOE58f52hdj0BDgmsdOm4f1EoXPZtH4Fh7M/qUpw==", + "requires": { + "cluster-key-slot": "1.1.2", + "generic-pool": "3.9.0", + "yallist": "4.0.0" + }, + "dependencies": { + "yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + } + } + }, + "@redis/graph": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@redis/graph/-/graph-1.1.0.tgz", + "integrity": "sha512-16yZWngxyXPd+MJxeSr0dqh2AIOi8j9yXKcKCwVaKDbH3HTuETpDVPcLujhFYVPtYrngSco31BUcSa9TH31Gqg==", + "requires": {} + }, + "@redis/json": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@redis/json/-/json-1.0.4.tgz", + "integrity": "sha512-LUZE2Gdrhg0Rx7AN+cZkb1e6HjoSKaeeW8rYnt89Tly13GBI5eP4CwDVr+MY8BAYfCg4/N15OUrtLoona9uSgw==", + "requires": {} + }, + "@redis/search": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@redis/search/-/search-1.1.3.tgz", + "integrity": "sha512-4Dg1JjvCevdiCBTZqjhKkGoC5/BcB7k9j99kdMnaXFXg8x4eyOIVg9487CMv7/BUVkFLZCaIh8ead9mU15DNng==", + "requires": {} + }, + "@redis/time-series": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@redis/time-series/-/time-series-1.0.4.tgz", + "integrity": "sha512-ThUIgo2U/g7cCuZavucQTQzA9g9JbDDY2f64u3AbAoz/8vE2lt2U37LamDUVChhaDA3IRT9R6VvJwqnUfTJzng==", + "requires": {} + }, "@tensorflow/tfjs-core": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/@tensorflow/tfjs-core/-/tfjs-core-1.7.0.tgz", @@ -23213,6 +23348,11 @@ "request-promise": "^4.2.4" } }, + "cluster-key-slot": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz", + "integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==" + }, "cmake-js": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/cmake-js/-/cmake-js-6.1.0.tgz", @@ -26051,6 +26191,11 @@ "globule": "^1.0.0" } }, + "generic-pool": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/generic-pool/-/generic-pool-3.9.0.tgz", + "integrity": "sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==" + }, "gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -30274,6 +30419,19 @@ "strip-indent": "^3.0.0" } }, + "redis": { + "version": "4.6.7", + "resolved": "https://registry.npmjs.org/redis/-/redis-4.6.7.tgz", + "integrity": "sha512-KrkuNJNpCwRm5vFJh0tteMxW8SaUzkm5fBH7eL5hd/D0fAkzvapxbfGPP/r+4JAXdQuX7nebsBkBqA2RHB7Usw==", + "requires": { + "@redis/bloom": "1.2.0", + "@redis/client": "1.5.8", + "@redis/graph": "1.1.0", + "@redis/json": "1.0.4", + "@redis/search": "1.1.3", + "@redis/time-series": "1.0.4" + } + }, "regenerate": { "version": "1.4.2", "resolved": "https://registry.npmjs.org/regenerate/-/regenerate-1.4.2.tgz", diff --git a/package.json b/package.json index 1dd63df7..6bcdfaab 100755 --- a/package.json +++ b/package.json @@ -130,6 +130,7 @@ "puppeteer": "^20.5.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", + "redis": "^4.6.7", "sharp": "^0.29.2", "showdown": "^1.9.1", "source-map-support": "^0.5.16", diff --git a/src/redis.js b/src/redis.js new file mode 100644 index 00000000..ef932be8 --- /dev/null +++ b/src/redis.js @@ -0,0 +1,15 @@ +'use strict'; + +const config = require('config'); +const redis = require('redis'); +const logger = require('./logger')(__filename); + +logger.verbose('Redis module initialized'); + +const redisClient = redis.createClient({ + socket: config.redis, +}); + +redisClient.connect(); + +module.exports = redisClient; diff --git a/src/scrapers/modelmedia.js b/src/scrapers/modelmedia.js index 76249511..556250a7 100644 --- a/src/scrapers/modelmedia.js +++ b/src/scrapers/modelmedia.js @@ -14,7 +14,7 @@ function scrapeAll(scenes) { const { origin, pathname, searchParams } = new URL(url); release.url = `${origin}${pathname}`; - release.shootId = pathname.match(/((LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))\w*-\w+((EP)?\d+)?/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives + release.shootId = pathname.match(/((HP)|(LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))\w*-\w+((EP)?\d+)?/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives release.actors = searchParams.get('models_name')?.split(',').map((actor) => { const [han, english] = actor.split('/').map((name) => name.trim()); diff --git a/src/utils/resolve-place.js b/src/utils/resolve-place.js index 2e1bf970..ee788a03 100755 --- a/src/utils/resolve-place.js +++ b/src/utils/resolve-place.js @@ -5,12 +5,26 @@ const config = require('config'); const knex = require('../knex'); const logger = require('../logger')(__filename); const http = require('./http'); +const slugify = require('./slugify'); +const argv = require('../argv'); +const redis = require('../redis'); async function resolvePlace(query) { if (!query) { return null; } + const cacheKey = `place-${slugify(query)}`; + const cachedPlace = await redis.hGetAll(cacheKey); + + if (cachedPlace && argv.placeCache !== false) { + await redis.expire(cacheKey, 3600 * 24 * 30); + + logger.debug(`Using cached place '${cacheKey}' for query '${query}': ${JSON.stringify(cachedPlace)}`); + + return cachedPlace; + } + // query is a nationality, lookup would get weird results (British resolves to British, Northern Ireland) const country = await knex('countries') .where('nationality', 'ilike', `%${query}%`) @@ -27,7 +41,7 @@ async function resolvePlace(query) { try { // https://operations.osmfoundation.org/policies/nominatim/ - const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, { + const res = await http.get(`https://nominatim.openstreetmap.org/search?q=${encodeURI(query)}&format=json&accept-language=en&addressdetails=1`, { headers: { 'User-Agent': config.location.userAgent, }, @@ -54,6 +68,11 @@ async function resolvePlace(query) { if (rawPlace.country_code) place.country = rawPlace.country_code.toUpperCase(); if (rawPlace.continent) place.continent = rawPlace.continent; + logger.debug(`Resolved place '${query}' to ${JSON.stringify(place)}`); + + await redis.hSet(cacheKey, place); + await redis.expire(cacheKey, 3600 * 24 * 30); + return place; } } catch (error) {