Refactored media fetch with undici for http/2 support.
This commit is contained in:
29
package-lock.json
generated
29
package-lock.json
generated
@@ -93,7 +93,7 @@
|
||||
"tough-cookie": "^4.1.3",
|
||||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"undici": "^7.24.7",
|
||||
"unprint": "^0.19.13",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
@@ -3064,14 +3064,6 @@
|
||||
"npm": ">=6.14.13"
|
||||
}
|
||||
},
|
||||
"node_modules/@fastify/busboy": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.0.tgz",
|
||||
"integrity": "sha512-+KpH+QxZU7O4675t3mnkQKcZZg56u+K/Ct2K+N2AZYNVK8kyeo/bI18tI8aPm3tvNNRyTWfj6s5tnGNlcbQRsA==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@gar/promisify": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@gar/promisify/-/promisify-1.1.3.tgz",
|
||||
@@ -20576,14 +20568,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/undici": {
|
||||
"version": "5.28.1",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-5.28.1.tgz",
|
||||
"integrity": "sha512-xcIIvj1LOQH9zAL54iWFkuDEaIVEjLrru7qRpa3GrEEHk6OBhb/LycuUY2m7VCcTuDeLziXCxobQVyKExyGeIA==",
|
||||
"dependencies": {
|
||||
"@fastify/busboy": "^2.0.0"
|
||||
},
|
||||
"version": "7.24.7",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-7.24.7.tgz",
|
||||
"integrity": "sha512-H/nlJ/h0ggGC+uRL3ovD+G0i4bqhvsDOpbDv7At5eFLlj2b41L8QliGbnl2H7SnDiYhENphh1tQFJZf+MyfLsQ==",
|
||||
"engines": {
|
||||
"node": ">=14.0"
|
||||
"node": ">=20.18.1"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
@@ -21305,14 +21294,6 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/unprint/node_modules/undici": {
|
||||
"version": "7.18.2",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-7.18.2.tgz",
|
||||
"integrity": "sha512-y+8YjDFzWdQlSE9N5nzKMT3g4a5UBX1HKowfdXh0uvAnTaqqwqB92Jt4UXBAeKekDs5IaDKyJFR4X1gYVCgXcw==",
|
||||
"engines": {
|
||||
"node": ">=20.18.1"
|
||||
}
|
||||
},
|
||||
"node_modules/unprint/node_modules/w3c-xmlserializer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-2.0.0.tgz",
|
||||
|
||||
@@ -152,7 +152,7 @@
|
||||
"tough-cookie": "^4.1.3",
|
||||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"undici": "^7.24.7",
|
||||
"unprint": "^0.19.13",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
|
||||
@@ -796,6 +796,9 @@ const networks = [
|
||||
slug: 'teencoreclub',
|
||||
name: 'Teen Core Club',
|
||||
url: 'https://teencoreclub.com',
|
||||
parameters: {
|
||||
studioId: 1624,
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'teenmegaworld',
|
||||
|
||||
@@ -13510,7 +13510,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 178,
|
||||
legacySiteId: 178,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13520,7 +13520,7 @@ const sites = [
|
||||
parent: 'teencoreclub',
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 482,
|
||||
legacySiteId: 482,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13537,7 +13537,8 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 180,
|
||||
legacySiteId: 180,
|
||||
siteId: 17,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13552,7 +13553,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 182,
|
||||
legacySiteId: 182,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13564,7 +13565,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 184,
|
||||
legacySiteId: 184,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13579,7 +13580,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 362,
|
||||
legacySiteId: 362,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13591,7 +13592,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 278,
|
||||
legacySiteId: 278,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13608,7 +13609,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 186,
|
||||
legacySiteId: 186,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13620,7 +13621,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 280,
|
||||
legacySiteId: 280,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13632,7 +13633,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 282,
|
||||
legacySiteId: 282,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13644,7 +13645,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 188,
|
||||
legacySiteId: 188,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13656,7 +13657,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 284,
|
||||
legacySiteId: 284,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13672,7 +13673,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 190,
|
||||
legacySiteId: 190,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13689,7 +13690,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 192,
|
||||
legacySiteId: 192,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13706,7 +13707,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 288,
|
||||
legacySiteId: 288,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13718,7 +13719,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 290,
|
||||
legacySiteId: 290,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13733,7 +13734,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 292,
|
||||
legacySiteId: 292,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13745,7 +13746,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 194,
|
||||
legacySiteId: 194,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13757,7 +13758,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 196,
|
||||
legacySiteId: 196,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13769,7 +13770,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 198,
|
||||
legacySiteId: 198,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13784,7 +13785,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 294,
|
||||
legacySiteId: 294,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13795,7 +13796,7 @@ const sites = [
|
||||
visible: false,
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 566,
|
||||
legacySiteId: 566,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13815,7 +13816,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 200,
|
||||
legacySiteId: 200,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13830,7 +13831,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 296,
|
||||
legacySiteId: 296,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13842,7 +13843,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 298,
|
||||
legacySiteId: 298,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13857,7 +13858,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 300,
|
||||
legacySiteId: 300,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13872,7 +13873,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 302,
|
||||
legacySiteId: 302,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13888,7 +13889,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 304,
|
||||
legacySiteId: 304,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13903,7 +13904,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 306,
|
||||
legacySiteId: 306,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13915,7 +13916,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 308,
|
||||
legacySiteId: 308,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13926,7 +13927,7 @@ const sites = [
|
||||
visible: false,
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 568,
|
||||
legacySiteId: 568,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13937,7 +13938,7 @@ const sites = [
|
||||
visible: false,
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 570,
|
||||
legacySiteId: 570,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13950,7 +13951,7 @@ const sites = [
|
||||
parent: 'teencoreclub',
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 360,
|
||||
legacySiteId: 360,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13962,7 +13963,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 310,
|
||||
legacySiteId: 310,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13976,7 +13977,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 202,
|
||||
legacySiteId: 202,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -13988,7 +13989,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 312,
|
||||
legacySiteId: 312,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14003,7 +14004,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 314,
|
||||
legacySiteId: 314,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14014,7 +14015,7 @@ const sites = [
|
||||
visible: false,
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 556,
|
||||
legacySiteId: 556,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14029,7 +14030,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 316,
|
||||
legacySiteId: 316,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14043,7 +14044,7 @@ const sites = [
|
||||
parent: 'teencoreclub',
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 418,
|
||||
legacySiteId: 418,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14055,7 +14056,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 318,
|
||||
legacySiteId: 318,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14067,7 +14068,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 204,
|
||||
legacySiteId: 204,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14083,7 +14084,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 320,
|
||||
legacySiteId: 320,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14095,7 +14096,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 322,
|
||||
legacySiteId: 322,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14107,7 +14108,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 324,
|
||||
legacySiteId: 324,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14119,7 +14120,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 366,
|
||||
legacySiteId: 366,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14132,7 +14133,7 @@ const sites = [
|
||||
parent: 'teencoreclub',
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 176,
|
||||
legacySiteId: 176,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14147,7 +14148,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 368,
|
||||
legacySiteId: 368,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14162,7 +14163,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 326,
|
||||
legacySiteId: 326,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14180,7 +14181,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 206,
|
||||
legacySiteId: 206,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14195,7 +14196,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 208,
|
||||
legacySiteId: 208,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14210,7 +14211,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 210,
|
||||
legacySiteId: 210,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14227,7 +14228,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 328,
|
||||
legacySiteId: 328,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14242,7 +14243,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 212,
|
||||
legacySiteId: 212,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14258,7 +14259,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 330,
|
||||
legacySiteId: 330,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14275,7 +14276,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 214,
|
||||
legacySiteId: 214,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14291,7 +14292,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 332,
|
||||
legacySiteId: 332,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14310,7 +14311,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 216,
|
||||
legacySiteId: 216,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14325,7 +14326,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 334,
|
||||
legacySiteId: 334,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14336,7 +14337,7 @@ const sites = [
|
||||
visible: false,
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
siteId: 558,
|
||||
legacySiteId: 558,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14352,7 +14353,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 336,
|
||||
legacySiteId: 336,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14369,7 +14370,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 218,
|
||||
legacySiteId: 218,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -14386,7 +14387,7 @@ const sites = [
|
||||
],
|
||||
parent: 'teencoreclub',
|
||||
parameters: {
|
||||
siteId: 220,
|
||||
legacySiteId: 220,
|
||||
},
|
||||
},
|
||||
/* TCC VOD services and unused brands
|
||||
|
||||
@@ -651,9 +651,10 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
const res = await http.get(source.src, {
|
||||
limits: 'media',
|
||||
headers: {
|
||||
host: new URL(source.src).hostname,
|
||||
// explicit host not allowed in HTTP/2
|
||||
// host: new URL(source.src).hostname,
|
||||
// ...(source.host && { host: source.host }),
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
},
|
||||
stream: true, // sources are fetched in parallel, don't gobble up memory
|
||||
followRedirects: source.followRedirects,
|
||||
|
||||
@@ -85,7 +85,7 @@ async function scrapeScene({ query: pageQuery, html }, { url, entity, include })
|
||||
}
|
||||
|
||||
if (include.photos && capsUrl) {
|
||||
release.caps = await fetchCaps(capsUrl);
|
||||
release.caps = await fetchCaps(capsUrl, entity);
|
||||
}
|
||||
|
||||
release.trailer = pageQuery.video('#download_select option[value*=".mp4"]', { attribute: 'value' });
|
||||
|
||||
@@ -1,155 +1,84 @@
|
||||
'use strict';
|
||||
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { prefixUrl } = require('../utils/qu');
|
||||
|
||||
function scrapeAll(scenes, entity) {
|
||||
return scenes.map((scene) => {
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = `${new URL(entity.url).origin}/video/${scene.id}/${scene.slug}`;
|
||||
release.url = query.url('.title a');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
if (/bic/i.test(scene.title)) {
|
||||
release.shootId = scene.title.toUpperCase().replace('-', '_');
|
||||
} else {
|
||||
release.title = scene.title;
|
||||
}
|
||||
release.title = query.content('.title a');
|
||||
|
||||
release.description = scene.description;
|
||||
release.date = moment.utc(scene.year, 'YYYY').toDate();
|
||||
release.datePrecision = 'year';
|
||||
release.date = query.date('.date', 'MMM DD, YYYY');
|
||||
release.duration = query.duration('.duration');
|
||||
|
||||
release.actors = scene.actors.map((actor) => ({
|
||||
name: actor.name.trim(),
|
||||
avatar: actor.image || null,
|
||||
})).filter((actor) => actor.name && slugify(actor.name) !== 'amateur-girl');
|
||||
release.actors = query.all('.models a.model').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.duration = scene.duration;
|
||||
release.stars = scene.video_rating_score;
|
||||
release.poster = query.img('img.poster');
|
||||
release.teaser = query.video('.teaser video');
|
||||
|
||||
[release.poster, ...release.photos] = scene.screenshots.map((url) => prefixUrl(url));
|
||||
|
||||
if (scene.is_gay) {
|
||||
release.tags = ['gay'];
|
||||
}
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, url) {
|
||||
const release = {};
|
||||
const { pathname, origin, host } = new URL(url);
|
||||
|
||||
const entryId = pathname.match(/\/video\/(\d+)/)[1];
|
||||
release.entryId = entryId;
|
||||
|
||||
const title = query.meta('name=title');
|
||||
|
||||
if (/bic/i.test(title)) {
|
||||
release.shootId = title.toUpperCase().replace('-', '_');
|
||||
} else {
|
||||
release.title = title;
|
||||
}
|
||||
|
||||
release.date = query.date('.detail-meta li:nth-child(2)', 'YYYY');
|
||||
release.datePrecision = 'year';
|
||||
|
||||
release.description = query.q('.detail-description', true);
|
||||
release.duration = query.dur('.detail-meta li:first-child');
|
||||
|
||||
const actors = [query.q('.detail-hero-title h1', true)?.trim()].filter((name) => name && slugify(name) !== 'amateur-girl');
|
||||
|
||||
if (actors.length > 0) {
|
||||
release.actors = actors;
|
||||
}
|
||||
|
||||
release.poster = query.q('.detail-hero').style['background-image'].match(/url\((.+)\)/)[1];
|
||||
release.photos = query.imgs('.detail-grabs img');
|
||||
|
||||
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
|
||||
headers: {
|
||||
host,
|
||||
referer: url,
|
||||
},
|
||||
}, {
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
});
|
||||
|
||||
if (streamData.ok && streamData.body.status === 'success') {
|
||||
release.trailer = {
|
||||
stream: streamData.body.link,
|
||||
};
|
||||
} else {
|
||||
logger.warn(`Failed to fetch trailer for ${url}: ${streamData.ok ? streamData.body.status : streamData.status }`);
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeProfile(actor, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
if (actor.image) {
|
||||
profile.avatar = `https://teencoreclub.com${actor.image}`;
|
||||
}
|
||||
|
||||
if (include.releases) {
|
||||
const res = await http.get(`https://teencoreclub.com/browsevideos/api/all?actor=${actor.id}`);
|
||||
|
||||
if (res.ok) {
|
||||
profile.releases = scrapeAll(res.body.data, entity);
|
||||
}
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(entity, page = 1) {
|
||||
// console.log(entity, page);
|
||||
|
||||
if (entity.parameters?.siteId) {
|
||||
const res = await http.get(`https://teencoreclub.com/browsevideos/api/all?resType=latest&page=${page}&label=${entity.parameters.siteId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.body.data, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, entity) {
|
||||
const { pathname } = new URL(url);
|
||||
const res = await qu.get(`https://teencoreclub.com${pathname}`);
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.scene' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, entity);
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity }, include) {
|
||||
const res = await http.get(`https://teencoreclub.com/api/actors?query=${actorName}`);
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
release.title = query.content('h3.title');
|
||||
release.description = query.content('p.description');
|
||||
|
||||
release.date = query.date('.date', 'MMMM D, YYYY');
|
||||
release.duration = query.duration('.duration');
|
||||
|
||||
[release.poster, ...release.photos] = query.imgs('.preview-thumb');
|
||||
release.trailer = query.video('.trailer video');
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
profile.description = query.content('.bio-text');
|
||||
profile.birthPlace = query.content('.birth-place span');
|
||||
|
||||
profile.avatar = query.img('.actor-photo img');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity) {
|
||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.data.find((item) => slugify(item.name) === slugify(actorName));
|
||||
|
||||
if (actor) {
|
||||
return scrapeProfile(actor, entity, include);
|
||||
}
|
||||
|
||||
return null;
|
||||
return scrapeProfile(res.context, entity);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -157,6 +86,6 @@ async function fetchProfile({ name: actorName }, { entity }, include) {
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
};
|
||||
|
||||
@@ -1,655 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const moment = require('moment');
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const { nanoid } = require('nanoid/non-secure');
|
||||
const { Upload } = require('@aws-sdk/lib-storage');
|
||||
const { S3Client } = require('@aws-sdk/client-s3');
|
||||
|
||||
const { graphql } = require('../web/graphql');
|
||||
const knex = require('../knex');
|
||||
const args = require('../argv');
|
||||
|
||||
const s3 = new S3Client({
|
||||
region: 'eu-central-1',
|
||||
endpoint: 'https://s3.eu-central-1.wasabisys.com',
|
||||
credentials: {
|
||||
accessKeyId: config.s3.accessKey,
|
||||
secretAccessKey: config.s3.secretKey,
|
||||
},
|
||||
});
|
||||
|
||||
// NOT TRANSFERRED, unutilized on old server: production location, availabile qualities, actor alias for, actor entry id, chapter posters, chapter photos
|
||||
|
||||
const sceneFields = `
|
||||
entryId
|
||||
shootId
|
||||
title
|
||||
url
|
||||
date
|
||||
datePrecision
|
||||
productionDate
|
||||
description
|
||||
duration
|
||||
entity {
|
||||
slug
|
||||
type
|
||||
}
|
||||
studio {
|
||||
slug
|
||||
}
|
||||
movies: moviesScenesBySceneId {
|
||||
movie {
|
||||
title
|
||||
entryId
|
||||
entity {
|
||||
slug
|
||||
type
|
||||
}
|
||||
}
|
||||
}
|
||||
actors: releasesActors {
|
||||
actor {
|
||||
name
|
||||
slug
|
||||
entryId
|
||||
entity {
|
||||
slug
|
||||
type
|
||||
}
|
||||
}
|
||||
}
|
||||
directors: releasesDirectors {
|
||||
director {
|
||||
name
|
||||
slug
|
||||
entryId
|
||||
entity {
|
||||
slug
|
||||
type
|
||||
}
|
||||
}
|
||||
}
|
||||
tags: releasesTags {
|
||||
tag {
|
||||
slug
|
||||
}
|
||||
}
|
||||
chapters(orderBy: TIME_ASC) {
|
||||
index
|
||||
time
|
||||
duration
|
||||
title
|
||||
description
|
||||
tags: chaptersTags {
|
||||
tag {
|
||||
slug
|
||||
}
|
||||
}
|
||||
}
|
||||
poster: releasesPoster {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
photos: releasesPhotos {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
covers: releasesCovers {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
trailer: releasesTrailer {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
teaser: releasesTeaser {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
createdAt
|
||||
`;
|
||||
|
||||
const movieFields = `
|
||||
entryId
|
||||
title
|
||||
url
|
||||
date
|
||||
datePrecision
|
||||
entity {
|
||||
slug
|
||||
type
|
||||
}
|
||||
poster: moviesPoster {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
covers: moviesCovers {
|
||||
media {
|
||||
hash
|
||||
path
|
||||
thumbnail
|
||||
lazy
|
||||
s3: isS3
|
||||
mime
|
||||
index
|
||||
width
|
||||
height
|
||||
size
|
||||
source
|
||||
sourcePage
|
||||
}
|
||||
}
|
||||
createdAt
|
||||
`;
|
||||
|
||||
async function save() {
|
||||
const limit = args.limit || 1000;
|
||||
const offset = args.start || 0;
|
||||
|
||||
const { releases } = await graphql(`
|
||||
query SearchScenes(
|
||||
$limit: Int = 20
|
||||
$offset: Int = 0
|
||||
) {
|
||||
releases(
|
||||
first: $limit
|
||||
offset: $offset
|
||||
orderBy: DATE_DESC
|
||||
) {
|
||||
${sceneFields}
|
||||
}
|
||||
}
|
||||
`, {
|
||||
limit,
|
||||
offset,
|
||||
}, 'owner');
|
||||
|
||||
const { movies } = await graphql(`
|
||||
query SearchScenes(
|
||||
$limit: Int = 20
|
||||
$offset: Int = 0
|
||||
) {
|
||||
movies(
|
||||
first: $limit
|
||||
offset: $offset
|
||||
orderBy: DATE_DESC
|
||||
) {
|
||||
${movieFields}
|
||||
}
|
||||
}
|
||||
`, {
|
||||
limit,
|
||||
offset,
|
||||
}, 'owner');
|
||||
|
||||
const filename = `export-${offset}-${offset + limit}-${moment().format('YYYY-MM-DD_hh_mm_ss')}.json`;
|
||||
|
||||
let savedScenes = 0;
|
||||
let savedMovies = 0;
|
||||
|
||||
await releases.reduce(async (chain, release) => {
|
||||
await chain;
|
||||
|
||||
const entry = JSON.stringify({
|
||||
...release,
|
||||
type: 'release',
|
||||
actors: release.actors.filter(Boolean).map(({ actor }) => actor),
|
||||
directors: release.directors.filter(Boolean).map(({ director }) => director),
|
||||
studio: release.studio?.slug,
|
||||
tags: release.tags.map(({ tag }) => tag?.slug).filter(Boolean),
|
||||
movies: release.movies?.map(({ movie }) => movie) || [],
|
||||
chapters: release.chapters.filter(Boolean).map((chapter) => ({
|
||||
...chapter,
|
||||
tags: chapter.tags.map(({ tag }) => tag?.slug).filter(Boolean),
|
||||
})),
|
||||
poster: release.poster?.media,
|
||||
trailer: release.trailer?.media,
|
||||
teaser: release.teaser?.media,
|
||||
photos: release.photos.filter(Boolean).map(({ media }) => media),
|
||||
covers: release.covers.filter(Boolean).map(({ media }) => media),
|
||||
});
|
||||
|
||||
await fs.promises.appendFile(filename, `${entry}\n`);
|
||||
|
||||
savedScenes += 1;
|
||||
}, Promise.resolve());
|
||||
|
||||
await movies.reduce(async (chain, movie) => {
|
||||
await chain;
|
||||
|
||||
const entry = JSON.stringify({
|
||||
...movie,
|
||||
type: 'movie',
|
||||
poster: movie.poster?.media,
|
||||
covers: movie.covers.filter(Boolean).map(({ media }) => media),
|
||||
});
|
||||
|
||||
await fs.promises.appendFile(filename, `${entry}\n`);
|
||||
|
||||
savedMovies += 1;
|
||||
}, Promise.resolve());
|
||||
|
||||
console.log(`Saved ${savedScenes} scenes and ${savedMovies} movies to ${filename}`);
|
||||
|
||||
process.exit();
|
||||
}
|
||||
|
||||
async function addReleaseTags(release, context) {
|
||||
if (release.tags.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
await knex('releases_tags').insert(release.tags.map((tag) => ({
|
||||
tag_id: context.tagIdsBySlug[tag],
|
||||
release_id: release.id,
|
||||
original_tag: tag,
|
||||
})));
|
||||
}
|
||||
|
||||
async function addNewActor(actor, entity, context) {
|
||||
const [{ id: actorId }] = await knex('actors')
|
||||
.insert({
|
||||
name: actor.name,
|
||||
slug: actor.slug,
|
||||
entity_id: entity?.id,
|
||||
batch_id: context.batchId,
|
||||
})
|
||||
.returning('id');
|
||||
|
||||
return actorId;
|
||||
}
|
||||
|
||||
async function addReleaseActors(release, context, target = 'actor') {
|
||||
await release[`${target}s`].reduce(async (chain, actor) => {
|
||||
await chain;
|
||||
|
||||
const entity = actor.entity
|
||||
? await knex('entities').where(actor.entity).first()
|
||||
: null;
|
||||
|
||||
if (actor.entity && !entity) {
|
||||
throw new Error(`Actor ${actor.slug} contains non-existent ${release.entity.type} '${release.entity.slug}'`);
|
||||
}
|
||||
|
||||
const existingActor = await knex('actors')
|
||||
.where('slug', actor.slug)
|
||||
.where((builder) => {
|
||||
if (entity) {
|
||||
builder.where('entity_id', entity.id);
|
||||
return;
|
||||
}
|
||||
|
||||
builder.whereNull('entity_id');
|
||||
})
|
||||
.first();
|
||||
|
||||
const actorId = existingActor?.id
|
||||
|| await addNewActor(actor, entity, context);
|
||||
|
||||
await knex(`releases_${target}s`).insert({
|
||||
release_id: release.id,
|
||||
[`${target}_id`]: actorId,
|
||||
});
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function addReleaseDirectors(release, context) {
|
||||
return addReleaseActors(release, context, 'director');
|
||||
}
|
||||
|
||||
async function addReleaseChapters(release, context) {
|
||||
await release.chapters.reduce(async (chain, chapter) => {
|
||||
await chain;
|
||||
|
||||
const [{ id: chapterId }] = await knex('chapters')
|
||||
.insert({
|
||||
release_id: release.id,
|
||||
index: chapter.index,
|
||||
time: chapter.time,
|
||||
duration: chapter.duration,
|
||||
description: chapter.description,
|
||||
})
|
||||
.returning('id');
|
||||
|
||||
if (chapter.tags.length > 0) {
|
||||
await knex('chapters_tags').insert(chapter.tags.map((tag) => ({
|
||||
tag_id: context.tagIdsBySlug[tag],
|
||||
chapter_id: chapterId,
|
||||
original_tag: tag,
|
||||
})));
|
||||
}
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
const dirs = {
|
||||
path: '',
|
||||
thumbnail: 'thumbs',
|
||||
lazy: 'lazy',
|
||||
};
|
||||
|
||||
async function transferMedia(media, target) {
|
||||
return ['path', 'thumbnail', 'lazy'].reduce(async (chain, type) => {
|
||||
await chain;
|
||||
|
||||
const filename = `${media.hash}${path.extname(media[type])}`;
|
||||
const filepath = path.join(target, dirs[type], filename);
|
||||
const temp = path.join('media/temp', filepath);
|
||||
const url = new URL(media[type], `${media.s3 ? config.media.transferSources.s3 : config.media.transferSources.local}/`).href;
|
||||
|
||||
if (args.logLevel === 'debug') {
|
||||
console.log('Transferring media', url);
|
||||
}
|
||||
|
||||
const res = await bhttp.get(url, { stream: true });
|
||||
|
||||
if (res.statusCode !== 200) {
|
||||
console.warn(`Missing ${target} ${url}`);
|
||||
return;
|
||||
}
|
||||
|
||||
await fs.promises.mkdir(path.dirname(temp), { recursive: true });
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
const fileStream = fs.createWriteStream(temp);
|
||||
|
||||
res.pipe(fileStream);
|
||||
|
||||
res.on('error', () => { reject(); });
|
||||
|
||||
fileStream.on('finish', () => { resolve(); });
|
||||
fileStream.on('error', () => { reject(); });
|
||||
});
|
||||
|
||||
await new Upload({
|
||||
client: s3,
|
||||
params: {
|
||||
Bucket: config.s3.bucket,
|
||||
Body: fs.createReadStream(temp),
|
||||
Key: filepath,
|
||||
ContentType: media.mime,
|
||||
},
|
||||
}).done();
|
||||
|
||||
await fs.promises.unlink(temp);
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function addReleaseMedia(medias, release, target) {
|
||||
await medias.filter(Boolean).reduce(async (chain, media) => {
|
||||
await chain;
|
||||
|
||||
const existingMedia = await knex('media')
|
||||
.where('hash', media.hash)
|
||||
.orWhere('source', media.source)
|
||||
.first();
|
||||
|
||||
const id = existingMedia?.id || nanoid();
|
||||
|
||||
if (!existingMedia) {
|
||||
await knex('media').insert({
|
||||
id,
|
||||
hash: media.hash,
|
||||
path: path.join(target, '', `${media.hash}${path.extname(media.path)}`),
|
||||
thumbnail: path.join(target, 'thumbs', `${media.hash}${path.extname(media.thumbnail)}`),
|
||||
lazy: path.join(target, 'lazy', `${media.hash}${path.extname(media.lazy)}`),
|
||||
// is_s3: media.s3,
|
||||
is_s3: true,
|
||||
index: media.index,
|
||||
mime: media.mime,
|
||||
size: media.size,
|
||||
width: media.width,
|
||||
height: media.height,
|
||||
source: media.source,
|
||||
source_page: media.sourcePage,
|
||||
});
|
||||
|
||||
await transferMedia(media, target);
|
||||
}
|
||||
|
||||
try {
|
||||
await knex(`${release.type}s_${target}`).insert({
|
||||
[`${release.type}_id`]: release.id,
|
||||
media_id: id,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(`Ignored duplicate ${release.type} ${target} association ${media.hash} with ${release.id} "${release.title}"`);
|
||||
}
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function linkMovieScenes(release, context) {
|
||||
await release.movies.reduce(async (chain, linkedMovie) => {
|
||||
await chain;
|
||||
|
||||
const movie = context.movies.find((storedMovie) => storedMovie.entryId === linkedMovie.entryId
|
||||
&& storedMovie.entity.slug === linkedMovie.entity.slug
|
||||
&& storedMovie.entity.type === linkedMovie.entity.type);
|
||||
|
||||
if (!movie) {
|
||||
throw new Error(`Missing ${linkedMovie.entity.slug} movie '${linkedMovie.title}' in '${release.title}'`);
|
||||
}
|
||||
|
||||
await knex('movies_scenes').insert({
|
||||
movie_id: movie.id,
|
||||
scene_id: release.id,
|
||||
});
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function addRelease(release, context) {
|
||||
const existingRelease = await knex(`${release.type}s`)
|
||||
.select(`${release.type}s.*`, 'entities.name as entity_name')
|
||||
.leftJoin('entities', 'entities.id', `${release.type}s.entity_id`)
|
||||
.where('entry_id', release.entryId)
|
||||
.where('entities.slug', release.entity.slug)
|
||||
.where('entities.type', release.entity.type)
|
||||
.first();
|
||||
|
||||
if (existingRelease) {
|
||||
console.log(`Skipping ${release.entity.slug} release "${release.title}", already in database`);
|
||||
|
||||
return {
|
||||
...release,
|
||||
skipped: true,
|
||||
id: existingRelease.id,
|
||||
entityName: existingRelease.entity_name,
|
||||
};
|
||||
}
|
||||
|
||||
const [entity] = await Promise.all([
|
||||
knex('entities').select(['id', 'name']).where(release.entity).first(),
|
||||
]);
|
||||
|
||||
if (!entity) {
|
||||
throw new Error(`Release "${release.title}" contains non-existent ${release.entity.type} '${release.entity.slug}'`);
|
||||
}
|
||||
|
||||
const [releaseEntry] = await knex(`${release.type}s`)
|
||||
.insert({
|
||||
entry_id: release.entryId,
|
||||
entity_id: entity.id,
|
||||
url: release.url,
|
||||
title: release.title,
|
||||
slug: release.slug,
|
||||
date: release.date,
|
||||
date_precision: release.datePrecision,
|
||||
created_batch_id: context.batchId,
|
||||
updated_batch_id: context.batchId,
|
||||
...(release.type === 'scene' && {
|
||||
shoot_id: release.shootId,
|
||||
studio_id: context.studioIdsBySlug[release.studio],
|
||||
production_date: release.productionDate,
|
||||
description: release.description,
|
||||
duration: release.duration,
|
||||
}),
|
||||
})
|
||||
.returning(['id', 'entry_id']);
|
||||
|
||||
const releaseWithId = {
|
||||
...release,
|
||||
id: releaseEntry.id,
|
||||
entityName: entity.name,
|
||||
};
|
||||
|
||||
await addReleaseMedia([releaseWithId.poster], releaseWithId, 'posters', context);
|
||||
|
||||
if (release.type === 'release') {
|
||||
await Promise.all([
|
||||
addReleaseTags(releaseWithId, context),
|
||||
addReleaseActors(releaseWithId, context),
|
||||
addReleaseDirectors(releaseWithId, context),
|
||||
addReleaseChapters(releaseWithId, context),
|
||||
linkMovieScenes(releaseWithId, context),
|
||||
addReleaseMedia(releaseWithId.photos, releaseWithId, 'photos', context),
|
||||
]);
|
||||
}
|
||||
|
||||
if (release.type === 'movie') {
|
||||
await addReleaseMedia(releaseWithId.covers, releaseWithId, 'covers', context);
|
||||
}
|
||||
|
||||
return releaseWithId;
|
||||
}
|
||||
|
||||
async function load() {
|
||||
const file = await fs.promises.readFile(args.file, 'utf8');
|
||||
const start = args.start || 0;
|
||||
const end = args.limit ? start + args.limit : Infinity;
|
||||
|
||||
const releases = file.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((data) => JSON.parse(data))
|
||||
.filter((release) => (args.entity ? release.entity.slug === args.entity : true))
|
||||
.slice(start, end);
|
||||
|
||||
if (releases.length === 0) {
|
||||
console.log('Nothing to load');
|
||||
return;
|
||||
}
|
||||
|
||||
const [{ id: batchId }] = await knex('batches').insert({ comment: `import ${args.file}` }).returning('id');
|
||||
|
||||
const aggTags = Array.from(new Set(releases.filter((release) => release.type === 'release').flatMap((release) => [...release.tags, ...release.chapters.flatMap((chapter) => chapter.tags)]).filter(Boolean)));
|
||||
const aggStudios = Array.from(new Set(releases.map((release) => release.studio).filter(Boolean)));
|
||||
|
||||
const tags = await knex('tags')
|
||||
.select('id', 'slug')
|
||||
.whereIn('slug', aggTags);
|
||||
|
||||
const studios = await knex('entities')
|
||||
.select('id', 'slug')
|
||||
.where('type', 'studio')
|
||||
.whereIn('slug', aggStudios);
|
||||
|
||||
const tagIdsBySlug = Object.fromEntries(tags.map((tag) => [tag.slug, tag.id]));
|
||||
const studioIdsBySlug = Object.fromEntries(studios.map((studio) => [studio.slug, studio.id]));
|
||||
|
||||
const addedMovies = await releases.filter((release) => release.type === 'movie').reduce(async (chain, release, index, array) => {
|
||||
const acc = await chain;
|
||||
const movie = await addRelease(release, { batchId, tagIdsBySlug, studioIdsBySlug });
|
||||
|
||||
if (!movie.skipped) {
|
||||
console.log(`Loaded ${index}/${array.length} '${movie.entityName}' movie "${movie.title}"`);
|
||||
}
|
||||
|
||||
return acc.concat(movie);
|
||||
}, Promise.resolve([]));
|
||||
|
||||
const addedScenes = await releases.filter((release) => release.type === 'release').reduce(async (chain, release, index, array) => {
|
||||
const acc = await chain;
|
||||
const scene = await addRelease(release, { batchId, movies: addedMovies, tagIdsBySlug, studioIdsBySlug });
|
||||
|
||||
if (!scene.skipped) {
|
||||
console.log(`Loaded ${index}/${array.length} '${scene.entityName}' scene "${scene.title}"`);
|
||||
}
|
||||
|
||||
return acc.concat((!!scene && !scene.skipped));
|
||||
}, Promise.resolve([]));
|
||||
|
||||
console.log(`Loaded ${addedMovies.filter((movie) => movie && !movie.skipped).length}/${releases.filter((release) => release.type === 'movie').length} movies in batch ${batchId}`);
|
||||
console.log(`Loaded ${addedScenes.filter((scene) => scene && !scene.skipped).length}/${releases.filter((release) => release.type === 'release').length} scenes in batch ${batchId}`);
|
||||
|
||||
process.exit();
|
||||
}
|
||||
|
||||
({
|
||||
save,
|
||||
load,
|
||||
})[args._]();
|
||||
@@ -3,9 +3,11 @@
|
||||
const config = require('config');
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const undici = require('undici');
|
||||
const fs = require('fs').promises;
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
// const util = require('util');
|
||||
// const stream = require('stream');
|
||||
const { pipeline } = require('stream/promises');
|
||||
const tunnel = require('tunnel');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { JSDOM, toughCookie } = require('jsdom');
|
||||
@@ -18,7 +20,7 @@ const logger = require('../logger')(__filename);
|
||||
const virtualConsole = require('./virtual-console')(__filename);
|
||||
const argv = require('../argv');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
// const pipeline = util.promisify(stream.pipeline);
|
||||
|
||||
const limiters = {
|
||||
bypass: new Bottleneck({
|
||||
@@ -47,13 +49,6 @@ const defaultOptions = {
|
||||
},
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
@@ -326,87 +321,103 @@ async function bypassCloudflareRequest(url, method, body, cloudflareBypass, opti
|
||||
};
|
||||
}
|
||||
|
||||
async function request(method = 'get', url, body, requestOptions = {}, limiter) {
|
||||
const http = requestOptions.session || bhttp;
|
||||
const defaultAgent = new undici.Agent({
|
||||
allowH2: true,
|
||||
connect: {
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
});
|
||||
|
||||
const options = {
|
||||
...requestOptions,
|
||||
session: null,
|
||||
};
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
async function request(method = 'get', url, body, requestOptions = {}, limiter, redirects = 0) {
|
||||
const withProxy = useProxy(url);
|
||||
const withBrowserBypass = useBrowserBypass(url, options);
|
||||
const withCloudflareBypass = useCloudflareBypass(url, options);
|
||||
const withBrowserBypass = useBrowserBypass(url, requestOptions);
|
||||
const withCloudflareBypass = useCloudflareBypass(url, requestOptions);
|
||||
|
||||
if (withProxy) {
|
||||
options.agent = proxyAgent;
|
||||
}
|
||||
|
||||
logger.debug(`${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}${withBrowserBypass || withCloudflareBypass ? ' bypass' : ''}) ${url}`);
|
||||
logger.debug(`${redirects > 0 ? 'REDIRECT ' : ''}${method.toUpperCase()} (${limiter._store.storeOptions.minTime}ms/${limiter._store.storeOptions.maxConcurrent}p${withProxy ? ' proxy' : ''}${withBrowserBypass || withCloudflareBypass ? ' bypass' : ''}) ${url}`);
|
||||
|
||||
if (withBrowserBypass) {
|
||||
if (method !== 'get') {
|
||||
throw new Error('Browser bypass only supports GET');
|
||||
}
|
||||
|
||||
return bypassBrowserRequest(url, options);
|
||||
if (method !== 'get') throw new Error('Browser bypass only supports GET');
|
||||
return bypassBrowserRequest(url, requestOptions);
|
||||
}
|
||||
|
||||
if (withCloudflareBypass) {
|
||||
return bypassCloudflareRequest(url, method, body, withCloudflareBypass, options);
|
||||
return bypassCloudflareRequest(url, method, body, withCloudflareBypass, requestOptions);
|
||||
}
|
||||
|
||||
const res = await (body
|
||||
? http[method](url, body, options)
|
||||
: http[method](url, options));
|
||||
const headers = {
|
||||
...requestOptions.headers,
|
||||
};
|
||||
|
||||
const res = await undici.request(url, {
|
||||
method: method.toUpperCase(),
|
||||
headers,
|
||||
body: body ?? null,
|
||||
dispatcher: withProxy
|
||||
? proxyAgent
|
||||
: defaultAgent,
|
||||
maxRedirections: 0, // handle manually
|
||||
});
|
||||
|
||||
if (res.headers.location && redirects < 3) {
|
||||
// Drain the body to free the socket before redirecting
|
||||
await res.body.dump();
|
||||
const nextUrl = new URL(res.headers.location, url).href;
|
||||
return request(method, nextUrl, body, requestOptions, limiter, redirects + 1);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
async function finalizeResult(res, options) {
|
||||
async function finalizeResult(res, url, options) {
|
||||
if (options.destination) {
|
||||
// res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`));
|
||||
|
||||
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||
}
|
||||
|
||||
if (Buffer.isBuffer(res.body)) {
|
||||
const html = res.body.toString();
|
||||
const window = options?.parse ? new JSDOM(html, { virtualConsole, ...options.extract }).window : null;
|
||||
const pathname = new URL(res.request.url).pathname.replace(/\//g, '_');
|
||||
|
||||
// allow window.close to be called after scraping is done, only for deep scrapes where the URL is known outside the scraper
|
||||
if (window && /fetchScene|fetchMovie/.test(new Error().stack)) {
|
||||
windows.set(pathname, window);
|
||||
}
|
||||
|
||||
if (argv.saveHtml) {
|
||||
await fs.writeFile(`./html/${pathname}.html`, html);
|
||||
}
|
||||
await pipeline(
|
||||
res.body,
|
||||
...(options.transforms || []),
|
||||
options.destination,
|
||||
);
|
||||
|
||||
return {
|
||||
...res,
|
||||
body: html,
|
||||
html,
|
||||
status: res.statusCode,
|
||||
statusCode: res.statusCode,
|
||||
headers: res.headers,
|
||||
document: window?.document || null,
|
||||
window,
|
||||
status: res.statusCode,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
};
|
||||
}
|
||||
|
||||
const buffer = await res.body.arrayBuffer();
|
||||
const html = Buffer.from(buffer).toString();
|
||||
const window = options?.parse ? new JSDOM(html, { virtualConsole, ...options.extract }).window : null;
|
||||
const pathname = new URL(url).pathname.replace(/\//g, '_');
|
||||
|
||||
if (window && /fetchScene|fetchMovie/.test(new Error().stack)) {
|
||||
windows.set(pathname, window);
|
||||
}
|
||||
|
||||
if (argv.saveHtml) {
|
||||
await fs.writeFile(`./html/${pathname}.html`, html);
|
||||
}
|
||||
|
||||
return {
|
||||
...res,
|
||||
body: res.body,
|
||||
statusCode: res.statusCode,
|
||||
status: res.statusCode,
|
||||
headers: res.headers,
|
||||
body: html,
|
||||
html,
|
||||
document: window?.document || null,
|
||||
window,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
};
|
||||
}
|
||||
|
||||
function getTimeout(options, url) {
|
||||
return new Promise((resolve, reject, onCancel) => {
|
||||
return new Promise((_resolve, reject, onCancel) => {
|
||||
const timeout = setTimeout(() => {
|
||||
logger.debug(`Canceled timed out request to ${url}`);
|
||||
reject(new Error(`URL ${url} timed out`));
|
||||
@@ -441,7 +452,7 @@ async function scheduleRequest(method = 'get', url, body, requestOptions = {}) {
|
||||
|
||||
timeout.cancel();
|
||||
|
||||
const curatedResult = await finalizeResult(result, options);
|
||||
const curatedResult = await finalizeResult(result, url, options);
|
||||
|
||||
logger.silly(`Response ${curatedResult.status} for ${method.toUpperCase()} ${url}`);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user