Compare commits
225 Commits
07c30108d0
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0560fac1ff | ||
|
|
108bf3b168 | ||
|
|
155e235246 | ||
|
|
bff665c6ec | ||
|
|
c7111329dc | ||
|
|
d7c1c0ae5c | ||
|
|
ea298d7edb | ||
|
|
99dfcae920 | ||
|
|
24cba1e1fa | ||
|
|
076bdad310 | ||
|
|
d432d291dd | ||
|
|
220f7e787d | ||
|
|
f1caa77e4b | ||
|
|
ff633436cb | ||
|
|
6860072a51 | ||
|
|
2c7b4cfc22 | ||
|
|
7d9e1be8d4 | ||
|
|
00db4b1b5b | ||
|
|
9f1cf1575a | ||
|
|
4f13e4ed28 | ||
|
|
9805aa7b5b | ||
|
|
0cc6ebc305 | ||
|
|
016c24af28 | ||
|
|
2158550091 | ||
|
|
68ddc8cb78 | ||
|
|
bc5693e44a | ||
|
|
7276d90629 | ||
|
|
1a1af95a10 | ||
|
|
bcb7a56588 | ||
|
|
16648d50f6 | ||
|
|
062dc0e75e | ||
|
|
42effd53fc | ||
|
|
3a3403bb1f | ||
|
|
6fb4989256 | ||
|
|
9750ca4b79 | ||
|
|
0500f7eda8 | ||
|
|
19beff7dbc | ||
|
|
dfe1b84992 | ||
|
|
3d3b544cb4 | ||
|
|
65fa6027ee | ||
|
|
b3a0ba72eb | ||
|
|
f3e2143b45 | ||
|
|
d289f95d3d | ||
|
|
d8b41ec9b5 | ||
|
|
05f7d8b814 | ||
|
|
c2fc09fdaa | ||
|
|
8a7210a3b9 | ||
|
|
e029ca7fd0 | ||
|
|
ffcfae69d5 | ||
|
|
dcaee01ce8 | ||
|
|
7561a4577e | ||
|
|
98b735dbae | ||
|
|
d2daed788c | ||
|
|
23257745a7 | ||
|
|
156954553d | ||
|
|
eb20af14a6 | ||
|
|
ae247c7a91 | ||
|
|
d49e6ef488 | ||
|
|
2b20d98ee0 | ||
|
|
b8cf6a3e71 | ||
|
|
af57f412c9 | ||
|
|
3696b81e69 | ||
|
|
5b6fefd43b | ||
|
|
a863ab888d | ||
|
|
209a81ef71 | ||
|
|
bd91dcbc77 | ||
|
|
b89f25405a | ||
|
|
198f08cb3a | ||
|
|
febaac3865 | ||
|
|
f82167656b | ||
|
|
6e20d7d216 | ||
|
|
612a489cdf | ||
|
|
db2e5b2da4 | ||
|
|
d81310ed25 | ||
|
|
ec86aa9286 | ||
|
|
5d58ddcd49 | ||
|
|
c515c8aeb3 | ||
|
|
debf92afd7 | ||
|
|
601f930324 | ||
|
|
e77ced44c7 | ||
|
|
9f37f54634 | ||
|
|
dc7f325d13 | ||
|
|
35c941488e | ||
|
|
fc32843c5a | ||
|
|
26b31fb10a | ||
|
|
9aa6c9c6c5 | ||
|
|
855a15bc73 | ||
|
|
3329661135 | ||
|
|
791bd6bf27 | ||
|
|
d6be985c4b | ||
|
|
7286846308 | ||
|
|
81dfce8b3d | ||
|
|
aff0e27c55 | ||
|
|
68fe786cb7 | ||
|
|
9a0b0a8989 | ||
|
|
60b8271e4f | ||
|
|
a52042b56c | ||
|
|
7a3dac865e | ||
|
|
74e0fb721d | ||
|
|
ba366df7a5 | ||
|
|
d4e6082d2e | ||
|
|
ea325b8ec5 | ||
|
|
41b1f39752 | ||
|
|
c75c3e3ed9 | ||
|
|
ee495a5cde | ||
|
|
b52e871cfe | ||
|
|
0fc725873e | ||
|
|
db14657101 | ||
|
|
5524efb3ba | ||
|
|
1397349058 | ||
|
|
25cac4d0ab | ||
|
|
5a282cc372 | ||
|
|
3e6592d1f3 | ||
|
|
b95e2fadf7 | ||
|
|
e3b922da6c | ||
|
|
9bf42ff6c0 | ||
|
|
ba127ee53d | ||
|
|
3ee73d2f77 | ||
|
|
4e68a69e02 | ||
|
|
570be66a91 | ||
|
|
0092aab579 | ||
|
|
78e9088ee5 | ||
|
|
cdb4644b42 | ||
|
|
f80f73d0d4 | ||
|
|
e70acdfe79 | ||
|
|
75c5a37567 | ||
|
|
9aa7fd77c4 | ||
|
|
6b768b6828 | ||
|
|
0f44b4d410 | ||
|
|
3368e2b343 | ||
|
|
8e77a5ef46 | ||
|
|
29dc94d77d | ||
|
|
03b039c937 | ||
|
|
f4f1afdf3b | ||
|
|
fefb165274 | ||
|
|
a3633c2f9f | ||
|
|
a007593390 | ||
|
|
f31c69f255 | ||
|
|
9a088599e6 | ||
|
|
01341b80d6 | ||
|
|
fec70db20b | ||
|
|
7bbb7cb43d | ||
|
|
e7a1ad42f5 | ||
|
|
be90982420 | ||
|
|
ce94456494 | ||
|
|
fe3fcd0741 | ||
|
|
5ca7537e24 | ||
|
|
35449ef906 | ||
|
|
bebebf8447 | ||
|
|
101c84763a | ||
|
|
36ca313a89 | ||
|
|
d07d3c2c8d | ||
|
|
4dfda71a94 | ||
|
|
75c2a77aea | ||
|
|
5bea829acb | ||
|
|
66acacc71a | ||
|
|
c646b1e29b | ||
|
|
4bd6e39a8c | ||
|
|
4154246ff2 | ||
|
|
e46607a5df | ||
|
|
ce6ad5c14a | ||
|
|
28ded0b4b9 | ||
|
|
f2c2c7856b | ||
|
|
74884a500e | ||
|
|
8b8f7fddd9 | ||
|
|
371e97f695 | ||
|
|
a77ce63548 | ||
|
|
85a5f13ec1 | ||
|
|
5bbfbc90a8 | ||
|
|
a6014a5d1a | ||
|
|
cc45502119 | ||
|
|
241e314c68 | ||
|
|
4843f0ebc7 | ||
|
|
79ff9eb58b | ||
|
|
e3126b1201 | ||
|
|
392d1daa2a | ||
|
|
d360854a6d | ||
|
|
786ef491ac | ||
|
|
f62e8d6bf6 | ||
|
|
b62c705b76 | ||
|
|
4dd236d5da | ||
|
|
d0f12c774d | ||
|
|
13745159c6 | ||
|
|
a41134555a | ||
|
|
7a7c6e92f6 | ||
|
|
9a8527a780 | ||
|
|
33179c0829 | ||
|
|
222be390ec | ||
|
|
3aa53e431d | ||
|
|
b79cf4fb78 | ||
|
|
93df88768c | ||
|
|
d4b73b6dd3 | ||
|
|
acb114012c | ||
|
|
e8d6345400 | ||
|
|
7062f04e78 | ||
|
|
d85e917935 | ||
|
|
5a210451e0 | ||
|
|
e91ff659e9 | ||
|
|
dc21571a6f | ||
|
|
b01913690e | ||
|
|
a05928e399 | ||
|
|
dfc9a88ba8 | ||
|
|
7562584830 | ||
|
|
26d8864659 | ||
|
|
cb0b0547cb | ||
|
|
7a70129b29 | ||
|
|
a373a899e3 | ||
|
|
ca925d4d7f | ||
|
|
43358550a9 | ||
|
|
506db492bc | ||
|
|
8d1b2975da | ||
|
|
ff4ac301c5 | ||
|
|
baa35b3c09 | ||
|
|
4f3b152b17 | ||
|
|
a7e2a2e5c3 | ||
|
|
b481ded6b3 | ||
|
|
fd205aed88 | ||
|
|
39cdea17f3 | ||
|
|
48af5939db | ||
|
|
a1be68366c | ||
|
|
37b92209f0 | ||
|
|
9754f9e9af | ||
|
|
ccd833665f | ||
|
|
a77e13c7de | ||
|
|
46dbbc75fc |
@@ -27,7 +27,7 @@
|
||||
"require-await": "off",
|
||||
"no-param-reassign": ["error", {
|
||||
"props": true,
|
||||
"ignorePropertyModificationsFor": ["state", "acc", "req"]
|
||||
"ignorePropertyModificationsFor": ["state", "acc", "req", "error"]
|
||||
}]
|
||||
},
|
||||
"globals": {
|
||||
|
||||
2
common
2
common
Submodule common updated: ec4b15ce33...1374f90397
@@ -186,6 +186,10 @@ module.exports = {
|
||||
'voyeurcams',
|
||||
'wifesluts',
|
||||
'wishescumtrue',
|
||||
// hentaied
|
||||
'somegore',
|
||||
// digital playground
|
||||
'digitalplayground', // no longer updates, produces a bunch of garbage for some reason
|
||||
],
|
||||
networks: [
|
||||
// dummy network for testing
|
||||
@@ -194,144 +198,17 @@ module.exports = {
|
||||
'forbondage',
|
||||
],
|
||||
},
|
||||
profiles: [
|
||||
[
|
||||
'evilangel',
|
||||
'famedigital',
|
||||
'devilsfilm',
|
||||
'roccosiffredi',
|
||||
profiles: null,
|
||||
interpolation: {
|
||||
excludeAvatarCredits: [ // never allow
|
||||
'Pierre Woodman',
|
||||
],
|
||||
[
|
||||
// Gamma; Evil Angel + Devil's Film, Pure Taboo (unavailable), (sometimes) Burning Angel and Wicked have their own assets
|
||||
'xempire',
|
||||
'blowpass',
|
||||
avoidAvatarCredits: [ // only allow as last resort
|
||||
'AnalVids',
|
||||
'Bang!',
|
||||
'Cherry Pimps',
|
||||
],
|
||||
[
|
||||
// MindGeek; Mile High Media has its own assets
|
||||
'brazzers',
|
||||
'realitykings',
|
||||
'mofos',
|
||||
'digitalplayground',
|
||||
'twistys',
|
||||
'babes',
|
||||
'fakehub',
|
||||
'sexyhub',
|
||||
'metrohd',
|
||||
'iconmale',
|
||||
'men',
|
||||
'transangels',
|
||||
],
|
||||
'wicked',
|
||||
'burningangel',
|
||||
'milehighmedia',
|
||||
[
|
||||
'vixen',
|
||||
'tushy',
|
||||
'blacked',
|
||||
'tushyraw',
|
||||
'blackedraw',
|
||||
'deeper',
|
||||
],
|
||||
[
|
||||
// Nubiles
|
||||
'nubiles',
|
||||
'nubilesporn',
|
||||
'deeplush',
|
||||
'brattysis',
|
||||
'nfbusty',
|
||||
'anilos',
|
||||
'hotcrazymess',
|
||||
'thatsitcomshow',
|
||||
],
|
||||
'21sextury',
|
||||
'dogfartnetwork',
|
||||
'adultempire',
|
||||
'julesjordan',
|
||||
'dorcelclub',
|
||||
'bang',
|
||||
'pervcity',
|
||||
'kink',
|
||||
'peternorth',
|
||||
'naughtyamerica',
|
||||
'cherrypimps',
|
||||
'pimpxxx',
|
||||
'18vr',
|
||||
'babevr',
|
||||
'badoinkvr',
|
||||
'realvr',
|
||||
'vrcosplayx',
|
||||
'teamskeet',
|
||||
'mylf',
|
||||
'spermmania',
|
||||
[
|
||||
'letsdoeit',
|
||||
'mamacitaz',
|
||||
'forbondage',
|
||||
'amateureuro',
|
||||
'vipsexvault',
|
||||
'transbella',
|
||||
],
|
||||
[
|
||||
'hussiepass',
|
||||
'hushpass',
|
||||
'interracialpass',
|
||||
'interracialpovs',
|
||||
'povpornstars',
|
||||
'seehimfuck',
|
||||
'eyeontheguy',
|
||||
],
|
||||
[
|
||||
// Full Porn Network
|
||||
'analized',
|
||||
'hergape',
|
||||
'jamesdeen',
|
||||
'dtfsluts',
|
||||
'analbbc',
|
||||
'analviolation',
|
||||
'baddaddypov',
|
||||
'girlfaction',
|
||||
'homemadeanalwhores',
|
||||
'mugfucked',
|
||||
'onlyprince',
|
||||
'pervertgallery',
|
||||
'povperverts',
|
||||
],
|
||||
'wankzvr',
|
||||
'milfvr',
|
||||
'tranzvr',
|
||||
'topwebmodels',
|
||||
'pascalssubsluts',
|
||||
'kellymadison',
|
||||
'5kporn',
|
||||
'private',
|
||||
'bangbros',
|
||||
'hitzefrei',
|
||||
'porncz',
|
||||
'czechav',
|
||||
'angelogodshackoriginal',
|
||||
'littlecapricedreams',
|
||||
'missyx',
|
||||
'gangbangcreampie',
|
||||
'gloryholesecrets',
|
||||
'aziani',
|
||||
[
|
||||
'firstanalquest',
|
||||
'doubleviewcasting',
|
||||
],
|
||||
[
|
||||
'silverstonedvd',
|
||||
'silviasaint',
|
||||
],
|
||||
[
|
||||
'analvids',
|
||||
'pornworld',
|
||||
],
|
||||
'pierrewoodman',
|
||||
'score',
|
||||
'boobpedia',
|
||||
'pornhub',
|
||||
'freeones',
|
||||
],
|
||||
},
|
||||
options: {
|
||||
traxxx: {
|
||||
// source: 'http://nsfw.unknown.name/random',
|
||||
@@ -339,6 +216,7 @@ module.exports = {
|
||||
},
|
||||
proxy: {
|
||||
enable: false,
|
||||
test: 'https://api.ipify.org?format=json',
|
||||
host: '',
|
||||
port: 8888,
|
||||
hostnames: [],
|
||||
@@ -346,6 +224,7 @@ module.exports = {
|
||||
bypass: {
|
||||
browser: {
|
||||
enable: false,
|
||||
clientRetirement: 20,
|
||||
hostnames: [ // these can run in the same browser session
|
||||
'www.kink.com',
|
||||
'store2.psmcdn.net', // Team Skeet API
|
||||
@@ -402,10 +281,14 @@ module.exports = {
|
||||
trailerQuality: [540, 720, 960, 480, 1080, 360, 320, 1440, 1600, 1920, 2160, 270, 240, 180],
|
||||
limit: 25, // max number of photos per release
|
||||
attempts: 2,
|
||||
fetchStreams: true,
|
||||
streamConcurrency: 2, // max number of video streams (m3u8 etc.) to fetch and process at once
|
||||
flushOrphaned: true,
|
||||
flushOrphaned: false,
|
||||
flushWindow: 1000,
|
||||
streams: {
|
||||
enabled: true, // fetch streams
|
||||
concurrency: 2,
|
||||
excludeHostnames: [],
|
||||
selectIndex: {},
|
||||
},
|
||||
},
|
||||
titleSlugLength: 50,
|
||||
};
|
||||
|
||||
19
migrations/20260203052449_scene_attributes.js
Normal file
19
migrations/20260203052449_scene_attributes.js
Normal file
@@ -0,0 +1,19 @@
|
||||
exports.up = async (knex) => {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.json('attributes');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('movies', (table) => {
|
||||
table.json('attributes');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.dropColumn('attributes');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('movies', (table) => {
|
||||
table.dropColumn('attributes');
|
||||
});
|
||||
};
|
||||
11
migrations/20260206034715_raw_releases.js
Normal file
11
migrations/20260206034715_raw_releases.js
Normal file
@@ -0,0 +1,11 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.specificType('alt_descriptions', 'text ARRAY');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.dropColumn('alt_descriptions');
|
||||
});
|
||||
};
|
||||
58
migrations/20260207034922_chapter_details.js
Normal file
58
migrations/20260207034922_chapter_details.js
Normal file
@@ -0,0 +1,58 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.alterTable('chapters', (table) => {
|
||||
table.datetime('date');
|
||||
});
|
||||
|
||||
await knex.schema.createTable('chapters_trailers', (table) => {
|
||||
table.integer('chapter_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('chapters')
|
||||
.onDelete('cascade');
|
||||
|
||||
table.text('media_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media')
|
||||
.onDelete('cascade');
|
||||
});
|
||||
|
||||
await knex.schema.createTable('chapters_teasers', (table) => {
|
||||
table.integer('chapter_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('chapters')
|
||||
.onDelete('cascade');
|
||||
|
||||
table.text('media_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media')
|
||||
.onDelete('cascade');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('entities', (table) => {
|
||||
table.string('name_stylized');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('movies', (table) => {
|
||||
table.specificType('alt_descriptions', 'text ARRAY');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.schema.alterTable('chapters', (table) => {
|
||||
table.dropColumn('date');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('entities', (table) => {
|
||||
table.dropColumn('name_stylized');
|
||||
});
|
||||
|
||||
await knex.schema.dropTable('chapters_trailers');
|
||||
await knex.schema.dropTable('chapters_teasers');
|
||||
|
||||
await knex.schema.alterTable('movies', (table) => {
|
||||
table.dropColumn('alt_descriptions');
|
||||
});
|
||||
};
|
||||
28
migrations/20260208044729_random_sfw_improvements.js
Normal file
28
migrations/20260208044729_random_sfw_improvements.js
Normal file
@@ -0,0 +1,28 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.createMaterializedView('media_sfw', (view) => {
|
||||
view.as(knex('media').select('id').where('is_sfw', true));
|
||||
});
|
||||
|
||||
await knex.raw('CREATE UNIQUE INDEX media_sfw_id ON media_sfw(id)');
|
||||
|
||||
await knex.raw(`
|
||||
CREATE OR REPLACE FUNCTION get_random_sfw_media_id() RETURNS varchar AS $$
|
||||
SELECT id FROM media_sfw
|
||||
ORDER BY random()
|
||||
LIMIT 1;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
`);
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.raw(`
|
||||
CREATE OR REPLACE FUNCTION get_random_sfw_media_id() RETURNS varchar AS $$
|
||||
SELECT id FROM media
|
||||
WHERE is_sfw = true
|
||||
ORDER BY random()
|
||||
LIMIT 1;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
`);
|
||||
|
||||
await knex.schema.dropMaterializedView('media_sfw');
|
||||
};
|
||||
31
migrations/20260222055254_unique_origin_tags.js
Normal file
31
migrations/20260222055254_unique_origin_tags.js
Normal file
@@ -0,0 +1,31 @@
|
||||
exports.up = async (knex) => {
|
||||
// dedupe
|
||||
await knex.raw(`
|
||||
DELETE
|
||||
FROM releases_tags
|
||||
WHERE ctid IN
|
||||
(
|
||||
SELECT ctid
|
||||
FROM(
|
||||
SELECT
|
||||
*,
|
||||
ctid,
|
||||
row_number() OVER (PARTITION BY release_id, original_tag ORDER BY ctid)
|
||||
FROM releases_tags
|
||||
)s
|
||||
WHERE row_number >= 2
|
||||
)
|
||||
`);
|
||||
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.increments('id');
|
||||
table.unique(['release_id', 'original_tag']);
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.dropColumn('id');
|
||||
table.dropUnique(['release_id', 'original_tag']);
|
||||
});
|
||||
};
|
||||
23
migrations/20260301042453_foot_float.js
Normal file
23
migrations/20260301042453_foot_float.js
Normal file
@@ -0,0 +1,23 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.alterTable('actors', (table) => {
|
||||
table.decimal('foot')
|
||||
.alter();
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('actors_profiles', (table) => {
|
||||
table.decimal('foot')
|
||||
.alter();
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.schema.alterTable('actors', (table) => {
|
||||
table.integer('foot')
|
||||
.alter();
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('actors_profiles', (table) => {
|
||||
table.integer('foot')
|
||||
.alter();
|
||||
});
|
||||
};
|
||||
13
migrations/20260302222545_series_alt_descriptions.js
Normal file
13
migrations/20260302222545_series_alt_descriptions.js
Normal file
@@ -0,0 +1,13 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.alterTable('series', (table) => {
|
||||
table.specificType('alt_descriptions', 'text ARRAY');
|
||||
table.json('attributes');
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.schema.alterTable('series', (table) => {
|
||||
table.dropColumn('alt_descriptions');
|
||||
table.dropColumn('attributes');
|
||||
});
|
||||
};
|
||||
21
migrations/20260304020542_scene_actor_tags.js
Normal file
21
migrations/20260304020542_scene_actor_tags.js
Normal file
@@ -0,0 +1,21 @@
|
||||
exports.up = async function(knex) {
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.integer('actor_id')
|
||||
.references('id')
|
||||
.inTable('actors');
|
||||
|
||||
table.dropUnique(['tag_id', 'release_id']);
|
||||
});
|
||||
|
||||
await knex.raw('CREATE UNIQUE INDEX releases_tags_tag_id_release_id_actor_id ON releases_tags (tag_id, release_id, COALESCE(actor_id, -1))');
|
||||
};
|
||||
|
||||
exports.down = async function(knex) {
|
||||
await knex.schema.alterTable('releases_tags', (table) => {
|
||||
table.dropColumn('actor_id');
|
||||
|
||||
table.unique(['tag_id', 'release_id']);
|
||||
});
|
||||
|
||||
await knex.raw('DROP INDEX IF EXISTS releases_tags_tag_id_release_id_actor_id');
|
||||
};
|
||||
19
package-lock.json
generated
19
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "traxxx",
|
||||
"version": "1.248.19",
|
||||
"version": "1.250.38",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "traxxx",
|
||||
"version": "1.248.19",
|
||||
"version": "1.250.38",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "^3.458.0",
|
||||
@@ -94,7 +94,7 @@
|
||||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.18.21",
|
||||
"unprint": "^0.18.35",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
@@ -18822,6 +18822,11 @@
|
||||
"resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
|
||||
"integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw=="
|
||||
},
|
||||
"node_modules/set-cookie-parser": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-3.0.1.tgz",
|
||||
"integrity": "sha512-n7Z7dXZhJbwuAHhNzkTti6Aw9QDDjZtm3JTpTGATIdNzdQz5GuFs22w90BcvF4INfnrL5xrX3oGsuqO5Dx3A1Q=="
|
||||
},
|
||||
"node_modules/set-function-length": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz",
|
||||
@@ -20380,9 +20385,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/unprint": {
|
||||
"version": "0.18.21",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.21.tgz",
|
||||
"integrity": "sha512-JPGhU0BWEBttZejVHLWb5mfUCX6yuEcTHdu8Vqsk+WfIhNlGkNgHeZ4N2yvOPlIH7AkXo+3pa0EslaqTREX1DA==",
|
||||
"version": "0.18.35",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.35.tgz",
|
||||
"integrity": "sha512-oTCBE8pGzfTFlSb0QbYv/ctICTmcU/K81gOPfchn+efLHu48hq1S3582JHvwXAXCjiRKZYatJlEFzUTXVtfuvA==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"bottleneck": "^2.19.5",
|
||||
"cookie": "^1.1.1",
|
||||
@@ -20394,6 +20400,7 @@
|
||||
"moment-timezone": "^0.5.34",
|
||||
"object-hash": "^3.0.0",
|
||||
"patchright": "^1.56.1",
|
||||
"set-cookie-parser": "^3.0.1",
|
||||
"srcset": "^4.0.0",
|
||||
"tunnel": "^0.0.6",
|
||||
"undici": "^7.18.2"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "traxxx",
|
||||
"version": "1.248.19",
|
||||
"version": "1.250.38",
|
||||
"description": "All the latest porn releases in one place",
|
||||
"main": "src/app.js",
|
||||
"scripts": {
|
||||
@@ -153,7 +153,7 @@
|
||||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.18.21",
|
||||
"unprint": "^0.18.35",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
|
||||
@@ -537,6 +537,10 @@ const tags = [
|
||||
name: 'enhanced butt',
|
||||
slug: 'enhanced-butt',
|
||||
},
|
||||
{
|
||||
name: 'extreme insertion',
|
||||
slug: 'extreme-insertion',
|
||||
},
|
||||
{
|
||||
name: 'facefucking',
|
||||
slug: 'facefucking',
|
||||
@@ -834,6 +838,11 @@ const tags = [
|
||||
slug: 'natural-boobs',
|
||||
group: 'body',
|
||||
},
|
||||
{
|
||||
name: 'natural butt',
|
||||
slug: 'natural-butt',
|
||||
group: 'body',
|
||||
},
|
||||
{
|
||||
name: 'nipple clamps',
|
||||
slug: 'nipple-clamps',
|
||||
@@ -954,6 +963,11 @@ const tags = [
|
||||
slug: 'pyjamas',
|
||||
group: 'clothing',
|
||||
},
|
||||
{
|
||||
name: 'rave',
|
||||
slug: 'rave',
|
||||
group: 'clothing',
|
||||
},
|
||||
{
|
||||
name: 'redhead',
|
||||
slug: 'redhead',
|
||||
@@ -1612,6 +1626,10 @@ const aliases = [
|
||||
name: 'big tits d-dd cup',
|
||||
for: 'big-boobs',
|
||||
},
|
||||
{
|
||||
name: 'busty',
|
||||
for: 'big-boobs',
|
||||
},
|
||||
{
|
||||
name: 'busty - big boobs',
|
||||
for: 'big-boobs',
|
||||
@@ -2155,6 +2173,18 @@ const aliases = [
|
||||
name: 'natural tits',
|
||||
for: 'natural-boobs',
|
||||
},
|
||||
{
|
||||
name: 'natural butt',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'natural ass',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'real ass',
|
||||
for: 'natural-butt',
|
||||
},
|
||||
{
|
||||
name: 'oiled',
|
||||
for: 'oil',
|
||||
@@ -2994,6 +3024,8 @@ const priorities = [ // higher index is higher priority
|
||||
['blowbang', 'orgy'],
|
||||
['gangbang'],
|
||||
['gay', 'transsexual', 'bisexual', 'hentai'],
|
||||
['pissing'],
|
||||
['compilation', 'bts'],
|
||||
].reduce((acc, slugs, index) => {
|
||||
slugs.forEach((slug) => { acc[slug] = index; });
|
||||
|
||||
|
||||
@@ -9,6 +9,9 @@ const grandParentNetworks = [
|
||||
name: 'Gamma Entertainment',
|
||||
url: 'https://www.gammaentertainment.com',
|
||||
alias: ['gammaentertainment'],
|
||||
options: {
|
||||
preferNetwork: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'hush',
|
||||
@@ -56,7 +59,6 @@ const parentNetworks = [
|
||||
url: 'https://www.21sextury.com',
|
||||
description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
mobile: 'https://m.dpfanatics.com/en/video',
|
||||
},
|
||||
parent: 'gamma',
|
||||
@@ -105,20 +107,87 @@ const parentNetworks = [
|
||||
url: 'https://www.asgmax.com',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
scene: 'https://www.asgmax.com/en/video/asgmax',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const networks = [
|
||||
// paper street media
|
||||
{
|
||||
slug: 'teamskeet',
|
||||
name: 'Team Skeet',
|
||||
url: 'https://www.teamskeet.com',
|
||||
description: 'Welcome to teamskeet.com, the largest collection of exclusive teen porn sites and videos on the web. Check out our TeamSkeet porn sites now.',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'ts_network',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'mylf',
|
||||
name: 'MYLF',
|
||||
tags: ['milf'],
|
||||
url: 'https://www.mylf.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'mylf_bundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'pervz',
|
||||
name: 'Pervz',
|
||||
url: 'https://www.pervz.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'pervbundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'swappz',
|
||||
name: 'Swappz',
|
||||
url: 'https://www.swappz.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'swap_bundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'freeuse',
|
||||
name: 'Free Use',
|
||||
url: 'https://www.freeuse.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'freeusebundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'familystrokes',
|
||||
name: 'Family Strokes',
|
||||
url: 'https://www.familystrokes.com/',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
endpoint: 'familybundle',
|
||||
},
|
||||
},
|
||||
// charged, paper street media sister brand
|
||||
{
|
||||
slug: 'sayuncle',
|
||||
name: 'Say Uncle',
|
||||
url: 'https://www.sayuncle.com',
|
||||
parent: 'chargedmedia',
|
||||
tags: ['gay'],
|
||||
parameters: {
|
||||
endpoint: 'sau_network',
|
||||
},
|
||||
},
|
||||
// etc
|
||||
{
|
||||
slug: '21sextreme',
|
||||
name: '21Sextreme',
|
||||
url: 'https://www.21sextreme.com',
|
||||
description: 'Welcome to 21Sextreme.com, your portal to fisting porn, old and young lesbians, horny grannies & extreme BDSM featuring the best Euro & American Pornstars',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
mobile: 'https://m.dpfanatics.com/en/video',
|
||||
},
|
||||
parent: '21sextury',
|
||||
@@ -129,7 +198,6 @@ const networks = [
|
||||
url: 'https://www.21naturals.com',
|
||||
description: 'Welcome to 21Naturals.com, the porn network featuring the hottest pornstars from all over the world in all natural porn and erotic sex videos. Watch thousands of girls with natural tits',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
mobile: 'https://m.dpfanatics.com/en/video',
|
||||
},
|
||||
parent: '21sextury',
|
||||
@@ -159,7 +227,6 @@ const networks = [
|
||||
description: 'Adult Time is a premium streaming service for adults! Watch adult movies, series, and channels from the top names in the industry.',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
referer: 'https://freetour.adulttime.com/en/join',
|
||||
// scene: false,
|
||||
},
|
||||
@@ -176,9 +243,10 @@ const networks = [
|
||||
parent: 'porndoe',
|
||||
},
|
||||
{
|
||||
slug: 'amnesiac',
|
||||
name: 'Amnesiac',
|
||||
hasLogo: true,
|
||||
slug: 'hentaied',
|
||||
rename: 'amnesiac',
|
||||
name: 'Hentaied',
|
||||
url: 'https://hentaied.pro',
|
||||
},
|
||||
{
|
||||
slug: 'analvids',
|
||||
@@ -250,7 +318,6 @@ const networks = [
|
||||
url: 'https://www.blowpass.com',
|
||||
description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
referer: 'https://www.blowpass.com',
|
||||
},
|
||||
parent: 'gamma',
|
||||
@@ -303,9 +370,6 @@ const networks = [
|
||||
url: 'https://www.evilangel.com',
|
||||
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'exploitedx',
|
||||
@@ -349,7 +413,7 @@ const networks = [
|
||||
description: 'The world famous Dogfart Interracial series. Online since 1996, we have the largest collection of Interracial videos, pictures and content on the web.',
|
||||
parent: 'dfxtra',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
profileReferer: 'https://www.dogfartnetwork.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -362,9 +426,6 @@ const networks = [
|
||||
slug: 'fantasymassage',
|
||||
name: 'Fantasy Massage',
|
||||
url: 'https://www.fantasymassage.com',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
},
|
||||
parent: 'gamma',
|
||||
},
|
||||
{
|
||||
@@ -374,7 +435,6 @@ const networks = [
|
||||
description: 'Watch and download thousands of the best porn videos at FameDigital.com, the largest porn network on the web! The hottest teens, MILFs and more pornstars are all here!',
|
||||
parameters: {
|
||||
mobile: 'https://m.dpfanatics.com/en/video',
|
||||
layout: 'api',
|
||||
},
|
||||
parent: 'gamma',
|
||||
},
|
||||
@@ -394,7 +454,6 @@ const networks = [
|
||||
url: 'https://www.filthykings.com',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
queryChannel: true,
|
||||
scene: 'https://www.filthykings.com/en/video/filthykings',
|
||||
referer: 'https://www.filthykings.com',
|
||||
@@ -421,6 +480,7 @@ const networks = [
|
||||
slug: 'fullpornnetwork',
|
||||
name: 'Full Porn Network',
|
||||
url: 'https://www.fullpornnetwork.com',
|
||||
showcased: false,
|
||||
description: 'FullPornNetwork.com is the latest and greatest for one stop shop porn sites. Check out the expanding library of the multi-site network. All of fan\'s favorite content from ANALIZED.COM, DTFsluts.com, YourMomDoesPorn.com and many more. Give die hard porn fans access to an array of premium content available in 4k and 1080p. Full access included streaming hd and unlimited downloads. Be exclusive, be a member to FullPornNetwork.com Today.',
|
||||
},
|
||||
{
|
||||
@@ -434,9 +494,6 @@ const networks = [
|
||||
name: 'Girlsway',
|
||||
url: 'https://www.girlsway.com',
|
||||
description: 'Girlsway.com has the best lesbian porn videos online! The hottest pornstars & first time lesbians in real girl on girl sex, tribbing, squirting & pussy licking action right HERE!',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
},
|
||||
parent: 'gamma',
|
||||
},
|
||||
{
|
||||
@@ -484,7 +541,6 @@ const networks = [
|
||||
slug: 'jayrock',
|
||||
name: 'JayRock Productions',
|
||||
url: 'http://jayrockcontent.com',
|
||||
parent: 'gamma',
|
||||
},
|
||||
{
|
||||
slug: 'julesjordan',
|
||||
@@ -612,19 +668,6 @@ const networks = [
|
||||
description: 'Check out the Official Mofos Network of best amateur pornsites. Girlfriend – voyeur - college girls - first anal & more. Bonus Milf sites for wifey lovers.',
|
||||
parent: 'aylo',
|
||||
},
|
||||
{
|
||||
slug: 'mylf',
|
||||
name: 'MYLF',
|
||||
tags: ['milf'],
|
||||
url: 'https://www.mylf.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
// endpoint: 'mylf-elastic-hka5k7vyuw',
|
||||
fullEndpoint: 'mylf_bundle',
|
||||
modelPrefix: 'model_',
|
||||
avatars: 'https://images.mylfcdn.net/tsv4/model/profiles',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'nebraskacoeds',
|
||||
name: 'Nebraska Coeds',
|
||||
@@ -645,6 +688,9 @@ const networks = [
|
||||
name: 'Nubiles',
|
||||
url: 'https://www.nubiles.com',
|
||||
description: 'Welcome to the teen megasite that started it all! Browse our massive HD collection of fresh legal hotties at Nubiles.net.',
|
||||
options: {
|
||||
forceDeepUpdate: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'perfectgonzo',
|
||||
@@ -698,7 +744,6 @@ const networks = [
|
||||
description: 'PureTaboo.com is the ultimate site for family taboo porn, featuring submissive teens & virgins in rough sex videos in ultra 4k HD.',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
scene: 'https://www.puretaboo.com/en/video',
|
||||
referer: 'https://www.puretaboo.com',
|
||||
},
|
||||
@@ -714,22 +759,11 @@ const networks = [
|
||||
parentSession: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'sayuncle',
|
||||
name: 'Say Uncle',
|
||||
url: 'https://www.sayuncle.com',
|
||||
parent: 'chargedmedia',
|
||||
tags: ['gay'],
|
||||
parameters: {
|
||||
// endpoint: 'sau-elastic-00gy5fg5ra',
|
||||
fullEndpoint: 'sau_network',
|
||||
modelPrefix: 'model_',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'score',
|
||||
name: 'SCORE',
|
||||
url: 'https://www.scorepass.com',
|
||||
showcased: false,
|
||||
},
|
||||
{
|
||||
slug: 'sexyhub',
|
||||
@@ -750,58 +784,6 @@ const networks = [
|
||||
name: 'Spizoo',
|
||||
url: 'http://www.spizoo.com',
|
||||
},
|
||||
{
|
||||
slug: 'teamskeet',
|
||||
name: 'Team Skeet',
|
||||
url: 'https://www.teamskeet.com',
|
||||
description: 'Welcome to teamskeet.com, the largest collection of exclusive teen porn sites and videos on the web. Check out our TeamSkeet porn sites now.',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
// endpoint: 'ts-elastic-d5cat0jl5o',
|
||||
fullEndpoint: 'ts_network',
|
||||
modelPrefix: 'model_',
|
||||
avatars: 'https://images.mylfcdn.net/tsv4/model/profiles',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'pervz',
|
||||
name: 'Pervz',
|
||||
url: 'https://www.pervz.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
layout: 'search',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'swappz',
|
||||
name: 'Swappz',
|
||||
url: 'https://www.swappz.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
layout: 'search',
|
||||
endpoint: 'swap_bundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'freeuse',
|
||||
name: 'Free Use',
|
||||
url: 'https://www.freeuse.com',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
layout: 'search',
|
||||
endpoint: 'freeusebundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'familystrokes',
|
||||
name: 'Family Strokes',
|
||||
url: 'https://www.familystrokes.com/',
|
||||
parent: 'paperstreetmedia',
|
||||
parameters: {
|
||||
layout: 'search',
|
||||
endpoint: 'familybundle',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'teencoreclub',
|
||||
name: 'Teen Core Club',
|
||||
@@ -859,7 +841,7 @@ const networks = [
|
||||
description: 'Home of the Kim Kardashian Sex Tape, Porn Parodies, and over 30,000 XXX Movies from The World Leader In Adult Entertainment.',
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
sceneMovies: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -883,21 +865,17 @@ const networks = [
|
||||
name: 'XEmpire',
|
||||
url: 'https://www.xempire.com',
|
||||
description: 'XEmpire.com brings you today\'s top pornstars in beautifully shot, HD sex scenes across 4 unique porn sites of gonzo porn, interracial, lesbian & erotica!',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
actorScenes: 'https://www.xempire.com/en/videos/xempire/latest/{page}/All-Categories/0{actorPath}',
|
||||
sceneMovies: false,
|
||||
},
|
||||
parent: 'gamma',
|
||||
parameters: {
|
||||
sceneMovies: false,
|
||||
actorScenes: 'https://www.xempire.com/en/videos/xempire/latest/{page}/All-Categories/0{actorPath}',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'zerotolerance',
|
||||
name: 'Zero Tolerance',
|
||||
alias: ['ztod'],
|
||||
url: 'https://www.zerotolerancefilms.com',
|
||||
parameters: {
|
||||
layout: 'api',
|
||||
},
|
||||
parent: 'gamma',
|
||||
},
|
||||
// ASG MAX
|
||||
@@ -952,9 +930,10 @@ exports.seed = (knex) => Promise.resolve()
|
||||
alias: network.alias,
|
||||
url: network.url,
|
||||
description: network.description,
|
||||
has_logo: network.hasLogo,
|
||||
has_logo: network.hasLogo ?? true,
|
||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||
parameters: network.parameters,
|
||||
parameters: network.parameters || null,
|
||||
options: network.options,
|
||||
parent_id: grandParentNetworksBySlug[network.parent] || null,
|
||||
}));
|
||||
|
||||
@@ -968,9 +947,10 @@ exports.seed = (knex) => Promise.resolve()
|
||||
alias: network.alias,
|
||||
url: network.url,
|
||||
description: network.description,
|
||||
has_logo: network.hasLogo,
|
||||
has_logo: network.hasLogo ?? true,
|
||||
showcased: typeof network.showcased === 'boolean' ? network.showcased : true,
|
||||
parameters: network.parameters,
|
||||
parameters: network.parameters || null,
|
||||
options: network.options,
|
||||
parent_id: parentNetworksBySlug[network.parent] || grandParentNetworksBySlug[network.parent] || null,
|
||||
}));
|
||||
|
||||
|
||||
2581
seeds/02_sites.js
2581
seeds/02_sites.js
File diff suppressed because it is too large
Load Diff
@@ -573,6 +573,20 @@ const sfw = Object.entries({
|
||||
['A1v0-iH3T5A', 'Patrick Hendry'],
|
||||
['iFBIdX54BOk', 'Keagan Henman'],
|
||||
],
|
||||
tags: [
|
||||
['dp', 'Mr. Pugo'],
|
||||
['anal', 'Andrew Teoh'],
|
||||
['creampie', 'Sunil Lama'],
|
||||
['lesbian', 'Kelsey Curtis'],
|
||||
['facial', 'Jagoda Kondratiuk'],
|
||||
['squirting', 'Camerauthor'],
|
||||
['blowjob', 'Christine Tan'],
|
||||
['blowbang', 'William Warby'],
|
||||
['mfm', 'Seriously Low Carb'],
|
||||
['mff', 'Deon Black'],
|
||||
['gay', 'David Brooke Martin'],
|
||||
['gangbang', 'Mohamed Hassouna'],
|
||||
],
|
||||
})
|
||||
.map(([category, photos]) => photos.map(([photo, credit], index) => ({
|
||||
id: photo,
|
||||
@@ -1104,11 +1118,13 @@ exports.seed = (knex) => Promise.resolve()
|
||||
|
||||
const tagMediaWithDimensions = await Promise.map(tagMedia, async (media) => {
|
||||
const { width, height } = await sharp(path.join('public/img', media.path)).metadata(); // size not available from filepath
|
||||
const sfwMediaId = sfw.find((image) => image.id === media.tagSlug)?.id;
|
||||
|
||||
return {
|
||||
...media,
|
||||
width,
|
||||
height,
|
||||
sfwMediaId,
|
||||
};
|
||||
}, {
|
||||
concurrency: 20,
|
||||
@@ -1124,6 +1140,7 @@ exports.seed = (knex) => Promise.resolve()
|
||||
width: media.width,
|
||||
height: media.height,
|
||||
comment: media.comment,
|
||||
sfw_media_id: media.sfwMediaId,
|
||||
entity_id: entitiesBySlug[media.entitySlug]?.id,
|
||||
})), 'path', knex);
|
||||
|
||||
@@ -1148,8 +1165,6 @@ exports.seed = (knex) => Promise.resolve()
|
||||
media_id: mediaIdsByPath[photo.path],
|
||||
}));
|
||||
|
||||
console.log(tagPosterEntries);
|
||||
|
||||
await Promise.all([
|
||||
upsert('tags_posters', tagPosterEntries, 'tag_id', knex),
|
||||
upsert('tags_photos', tagPhotoEntries, ['tag_id', 'media_id'], knex),
|
||||
@@ -1164,4 +1179,6 @@ exports.seed = (knex) => Promise.resolve()
|
||||
.whereNotIn('media_id', tagPhotos.map((photo) => photo.id))
|
||||
.delete(),
|
||||
]);
|
||||
|
||||
await knex.raw('REFRESH MATERIALIZED VIEW media_sfw');
|
||||
});
|
||||
|
||||
@@ -204,10 +204,15 @@ const affiliates = [
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'disruptivefilms',
|
||||
network: 'disruptivefilms',
|
||||
url: 'https://www.g2buddy.com/disruptivefilms/go.php?pr=9&su=2&si=119&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'sodomysquad',
|
||||
url: 'https://www.g2buddy.com/sodomysquad/go.php?pr=9&su=2&si=137&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
// gamma > ags max > next door studios
|
||||
// excluded affiliate links that link back to main site and don't seem to track properly
|
||||
{
|
||||
@@ -494,6 +499,92 @@ const affiliates = [
|
||||
scene: false, // redirects to Adult Time
|
||||
},
|
||||
},
|
||||
// gamma > vivid
|
||||
{
|
||||
network: 'vivid',
|
||||
url: 'https://www.g2fame.com/vivid/go.php?pr=8&su=2&si=330&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
parameters: {
|
||||
scene: false, // redirects to homepage
|
||||
},
|
||||
},
|
||||
{
|
||||
channel: 'wheretheboysarent',
|
||||
url: 'https://www.g2fame.com/wheretheboysarent/go.php?pr=8&su=2&si=368&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'thebrats',
|
||||
url: 'https://www.g2fame.com/thebrats/go.php?pr=8&su=2&si=369&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
// gamma > zero tolerance
|
||||
{
|
||||
network: 'zerotolerance',
|
||||
url: 'https://www.g2fame.com/zerotolerancefilms/go.php?pr=8&su=2&si=507&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'zerotolerancefilms',
|
||||
url: 'https://www.g2fame.com/zerotolerancefilms/go.php?pr=8&su=2&si=507&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: '3rddegreefilms',
|
||||
url: 'https://www.g2fame.com/3rddegreefilms/go.php?pr=8&su=2&si=537&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'addicted2girls',
|
||||
url: 'https://www.g2fame.com/addicted2girls/go.php?pr=8&su=2&si=477&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'genderxfilms',
|
||||
url: 'https://www.g2fame.com/genderxfilms/go.php?pr=8&su=2&si=397&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'gangbangcreampie',
|
||||
url: 'https://www.g2fame.com/gangbangcreampie/go.php?pr=8&su=2&si=656&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'gloryholesecrets',
|
||||
url: 'https://www.g2fame.com/gloryholesecrets/go.php?pr=8&su=2&si=655&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'tabooheat',
|
||||
url: 'https://www.g2fame.com/tabooheat/go.php?pr=8&su=2&si=552&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'wicked',
|
||||
url: 'https://www.g2fame.com/wicked/go.php?pr=8&su=2&si=371&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
// gamma > independent channels
|
||||
{
|
||||
channel: 'biphoria',
|
||||
url: 'https://www.g2fame.com/biphoria/go.php?pr=8&su=2&si=418&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'burningangel',
|
||||
url: 'https://www.g2fame.com/burningangel/go.php?pr=8&su=2&si=174&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'chaosmen',
|
||||
url: 'https://www.g2fame.com/chaosmen/go.php?pr=8&su=2&si=608&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
{
|
||||
channel: 'diabolic',
|
||||
url: 'https://www.g2fame.com/diabolic/go.php?pr=8&su=2&si=523&ad=277470&pa=index&ar=&buffer=',
|
||||
comment: 'per signup',
|
||||
},
|
||||
// kelly madison / 8k
|
||||
{
|
||||
network: 'kellymadison',
|
||||
@@ -728,14 +819,185 @@ const affiliates = [
|
||||
},
|
||||
comment: '50% rev share',
|
||||
},
|
||||
// hentaied / amnesiac
|
||||
{
|
||||
network: 'hentaied',
|
||||
url: 'https://hentaied.pro/amember/aff/go/npjyjuekbvehqzpyqukrgqoogmwbkz',
|
||||
parameters: {
|
||||
query: 'ref=npjyjuekbvehqzpyqukrgqoogmwbkz',
|
||||
dynamicScene: 'https://hentaied.pro{scenePath}?ref=npjyjuekbvehqzpyqukrgqoogmwbkz',
|
||||
global: false, // banners might be a bit too disturbing for front-page
|
||||
},
|
||||
},
|
||||
{
|
||||
channel: 'defeated',
|
||||
url: 'https://defeatedsexfight.com/amember/aff/go/jlfowppdazbfucxrgfmgpno',
|
||||
parameters: {
|
||||
query: 'ref=jlfowppdazbfucxrgfmgpno', // not currently functional
|
||||
},
|
||||
},
|
||||
// perv city / bam visions
|
||||
{
|
||||
channel: 'bamvisions',
|
||||
url: 'https://secure.bamvisions.com/track/MTQ5OS4xLjEuMS4wLjAuMC4wLjA',
|
||||
},
|
||||
// teen mega world
|
||||
{
|
||||
network: 'teenmegaworld',
|
||||
url: 'https://secure.teenmegaworld.net/track/MzAxNjcxLjUuMS4xLjAuMC4wLjAuMA',
|
||||
},
|
||||
{
|
||||
channel: 'analangels',
|
||||
url: 'https://secure.anal-angels.com/track/MzAxNjcxLjUuMzMuNjYuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'analbeauty',
|
||||
url: 'https://secure.anal-beauty.com/track/MzAxNjcxLjUuNDAuNjcuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'beautyangels',
|
||||
url: 'https://secure.beauty-angels.com/track/MzAxNjcxLjUuMjcuNjguMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'beauty4k',
|
||||
url: 'https://secure.beauty4k.com/track/MzAxNjcxLjUuNDIuNjkuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'creampieangels',
|
||||
url: 'https://secure.creampie-angels.com/track/MzAxNjcxLjUuMzAuNzAuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'dirtycoach',
|
||||
url: 'https://secure.dirty-coach.com/track/MzAxNjcxLjUuMzkuMzkuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'dirtydoctor',
|
||||
url: 'https://secure.dirty-doctor.com/track/MzAxNjcxLjUuMzYuMzYuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'firstbgg',
|
||||
url: 'https://secure.firstbgg.com/track/MzAxNjcxLjUuMzIuNzEuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'fuckstudies',
|
||||
url: 'https://secure.fuckstudies.com/track/MzAxNjcxLjUuMjIuNzIuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'gagngape',
|
||||
url: 'https://secure.gag-n-gape.com/track/MzAxNjcxLjUuNDUuNDUuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'noboring',
|
||||
url: 'https://secure.noboring.com/track/MzAxNjcxLjUuNTEuNTEuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'ohmyholes',
|
||||
url: 'https://ohmyholes.com/track/MzAxNjcxLjUuNTguNzMuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'oldnyoung',
|
||||
url: 'https://secure.old-n-young.com/track/MzAxNjcxLjUuMTAuNzQuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'rawcouples',
|
||||
url: 'https://secure.rawcouples.com/track/MzAxNjcxLjUuNTUuNzUuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'soloteengirls',
|
||||
url: 'https://secure.soloteengirls.net/track/MzAxNjcxLjUuMi4yLjAuMC4wLjAuMA',
|
||||
},
|
||||
{
|
||||
channel: 'squirtingvirgin',
|
||||
url: 'https://secure.squirtingvirgin.com/track/MzAxNjcxLjUuMjguMjguMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'teensexmania',
|
||||
url: 'https://secure.teensexmania.com/track/MzAxNjcxLjUuMTMuNzYuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'teensexmovs',
|
||||
url: 'https://secure.teensexmovs.com/track/MzAxNjcxLjUuMy43Ny4wLjAuMC4wLjA',
|
||||
},
|
||||
{
|
||||
channel: 'tmwpov',
|
||||
url: 'https://tmwpov.com/track/MzAxNjcxLjUuNTcuNzguMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'tmwvrnet',
|
||||
url: 'https://secure.tmwvrnet.com/track/MzAxNjcxLjUuNDQuNzkuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'trickymasseur',
|
||||
url: 'https://secure.trickymasseur.com/track/MzAxNjcxLjUuMjYuODAuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'vogov',
|
||||
url: 'https://secure.vogov.com/track/MzAxNjcxLjUuNTkuODUuMC4wLjAuMC4w',
|
||||
},
|
||||
{
|
||||
channel: 'xangels',
|
||||
url: 'https://secure.x-angels.com/track/MzAxNjcxLjUuMzguODEuMC4wLjAuMC4w',
|
||||
},
|
||||
// Snow Valley / Tsunami
|
||||
{
|
||||
network: 'snowvalley',
|
||||
parameters: {
|
||||
query: 'ref=4c331ef6',
|
||||
},
|
||||
},
|
||||
// POV Porn Cash / HussiePass
|
||||
{
|
||||
network: 'hussiepass',
|
||||
url: 'https://secure.hussiepass.com/track/MTk0NS4xLjUuNy4wLjAuMC4wLjA',
|
||||
comment: '50% revshare',
|
||||
parameters: {
|
||||
// hussiepass website does not show network scenes
|
||||
channelScenes: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
channel: 'povpornstars',
|
||||
url: 'https://join.povpornstars.com/track/MTk0NS4xLjMuNS4wLjAuMC4wLjA',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
{
|
||||
channel: 'interracialpovs',
|
||||
url: 'https://join.interracialpovs.com/track/MTk0NS4xLjYuOC4wLjAuMC4wLjA',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
{
|
||||
channel: 'ravebunnys',
|
||||
url: 'https://secure.ravebunnys.com/track/MTk0NS4xLjExLjI5LjAuMC4wLjAuMA',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
{
|
||||
channel: 'hotandtatted',
|
||||
url: 'https://join.hotandtatted.com/track/MTk0NS4xLjEwLjEyLjAuMC4wLjAuMA',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
{
|
||||
channel: 'seehimfuck',
|
||||
url: 'https://join.seehimfuck.com/track/MTk0NS4xLjcuOS4wLjAuMC4wLjA',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
{
|
||||
channel: 'seehimsolo',
|
||||
url: 'https://join.seehimsolo.com/track/MTk0NS4xLjguMTAuMC4wLjAuMC4w',
|
||||
comment: '50% revshare',
|
||||
},
|
||||
// etc
|
||||
{
|
||||
network: 'bang',
|
||||
parameters: {
|
||||
query: 'aff=eyJ3IjoiMTQ1ODYiLCJsIjoiMzE2NDYiLCJ0IjoiNDU4NjEifQ==', // supported
|
||||
query: 'aff=eyJ3IjoiMTQ1ODYiLCJsIjoiMzE2NDYiLCJ0IjoiNDU4NjEifQ==',
|
||||
},
|
||||
comment: '$25 per signup',
|
||||
},
|
||||
{
|
||||
channel: 'sicflics',
|
||||
url: 'http://refer.ccbill.com/cgi-bin/clicks.cgi?CA=921613-0002&PA=2647683',
|
||||
comment: '40% rebill',
|
||||
},
|
||||
];
|
||||
|
||||
const bannerTags = {
|
||||
@@ -1048,16 +1310,19 @@ exports.seed = async (knex) => {
|
||||
|
||||
const affiliate = affiliates.find((aff) => aff.id === affiliateId)
|
||||
|| affiliates.find((aff) => aff.channel === resolvedChannel)
|
||||
|| affiliates.find((aff) => aff.network === resolvedNetwork);
|
||||
|| affiliates.find((aff) => aff.network === resolvedNetwork)
|
||||
|| affiliates.find((aff) => aff.channel === resolvedNetwork); // independent channel
|
||||
|
||||
if (!affiliate) {
|
||||
console.warn('UNMATCHED AFFILIATE', file);
|
||||
}
|
||||
|
||||
const isIndependent = affiliate?.channel && affiliate.channel === resolvedNetwork;
|
||||
|
||||
return {
|
||||
file,
|
||||
network: resolvedNetwork,
|
||||
channel: resolvedChannel,
|
||||
network: isIndependent ? null : resolvedNetwork,
|
||||
channel: isIndependent ? resolvedNetwork : resolvedChannel,
|
||||
affiliateId,
|
||||
affiliate: getAffiliateId(affiliate),
|
||||
banner,
|
||||
@@ -1098,6 +1363,16 @@ exports.seed = async (knex) => {
|
||||
const channelsBySlug = channels.reduce((acc, channel) => ({ ...acc, [channel.slug]: channel }), {});
|
||||
const tagsBySlug = tags.reduce((acc, tag) => ({ ...acc, [tag.slug]: tag }), {});
|
||||
|
||||
affiliates.forEach((affiliate) => {
|
||||
if (affiliate.channel && !channelsBySlug[affiliate.channel]) {
|
||||
throw new Error(`Unmatched affiliate channel ${affiliate.channel}`);
|
||||
}
|
||||
|
||||
if (affiliate.network && !networksBySlug[affiliate.network]) {
|
||||
throw new Error(`Unmatched affiliate network ${affiliate.network}`);
|
||||
}
|
||||
});
|
||||
|
||||
const affiliatesWithEntityId = affiliates.map((affiliate) => ({
|
||||
id: getAffiliateId(affiliate),
|
||||
entity_id: networksBySlug[affiliate.network]?.id || channelsBySlug[affiliate.channel]?.id || null,
|
||||
|
||||
@@ -17,11 +17,12 @@ const domPurify = DOMPurify(window);
|
||||
// const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const redis = require('./redis');
|
||||
const scrapers = require('./scrapers/scrapers').actors;
|
||||
const actorScrapers = require('./scrapers/scrapers').actors;
|
||||
|
||||
const argv = require('./argv');
|
||||
const include = require('./utils/argv-include')(argv);
|
||||
const bulkInsert = require('./utils/bulk-insert');
|
||||
const batchInsert = require('./utils/batch-insert');
|
||||
const chunk = require('./utils/chunk');
|
||||
const logger = require('./logger')(__filename);
|
||||
|
||||
@@ -46,6 +47,7 @@ const commonContext = {
|
||||
slugify,
|
||||
omit,
|
||||
unprint,
|
||||
batchInsert,
|
||||
};
|
||||
|
||||
const hairColors = {
|
||||
@@ -349,6 +351,7 @@ function curateProfileEntry(profile) {
|
||||
tattoos: profile.tattoos,
|
||||
blood_type: profile.bloodType,
|
||||
avatar_media_id: profile.avatarMediaId || null,
|
||||
updated_at: knex.raw('DEFAULT'), // default should be NOW(), this will update the column
|
||||
};
|
||||
|
||||
return curatedProfileEntry;
|
||||
@@ -376,7 +379,10 @@ async function interpolateProfiles(actorIdsOrNames, refreshView) {
|
||||
const { interpolateProfiles: interpolateProfilesUtil } = await actorsCommon;
|
||||
|
||||
try {
|
||||
await interpolateProfilesUtil(actorIdsOrNames, commonContext, { refreshView });
|
||||
await interpolateProfilesUtil(actorIdsOrNames, commonContext, {
|
||||
refreshView,
|
||||
...config.interpolation,
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
@@ -435,35 +441,35 @@ async function curateProfile(profile, actor) {
|
||||
|| null;
|
||||
|
||||
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
|
||||
curatedProfile.age = Number(profile.age) || null;
|
||||
curatedProfile.age = Math.round(profile.age) || null;
|
||||
|
||||
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.shoeSize = Number(profile.shoeSize) || profile.shoeSize?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.height = Math.round(profile.height || profile.height?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.weight = Math.round(profile.weight || profile.weight?.match?.(/\d+/)?.[0]) || null;
|
||||
|
||||
// separate measurement values
|
||||
curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match?.(/[a-zA-Z]+/)?.[0]) || null;
|
||||
curatedProfile.bust = Number(profile.bust) || profile.bust?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.bust = Math.round(profile.bust || profile.bust?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.waist = Math.round(profile.waist || profile.waist?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.hip = Math.round(profile.hip || profile.hip?.match?.(/\d+/)?.[0]) || null;
|
||||
|
||||
curatedProfile.leg = Number(profile.leg) || profile.leg?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.thigh = Number(profile.thigh) || profile.thigh?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.foot = Number(profile.foot) || profile.foot?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.leg = Math.round(profile.leg || profile.leg?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.thigh = Math.round(profile.thigh || profile.thigh?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.foot = Number(profile.foot || profile.foot?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.shoeSize = Number(profile.shoeSize || profile.shoeSize?.match?.(/\d+/)?.[0]) || null;
|
||||
|
||||
// combined measurement value
|
||||
// ExCoGi uses x, Jules Jordan has spaces between the dashes, SpermMenia/Cum Buffet sometimes misses cup
|
||||
const measurements = profile.measurements?.match(/(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i);
|
||||
|
||||
if (measurements) {
|
||||
curatedProfile.bust = Number(measurements[1]) || null;
|
||||
curatedProfile.bust = Math.round(measurements[1]) || null;
|
||||
curatedProfile.cup = measurements[2] || null;
|
||||
curatedProfile.waist = Number(measurements[3]) || null;
|
||||
curatedProfile.hip = Number(measurements[4]) || null;
|
||||
curatedProfile.waist = Math.round(measurements[3]) || null;
|
||||
curatedProfile.hip = Math.round(measurements[4]) || null;
|
||||
}
|
||||
|
||||
curatedProfile.penisLength = Number(profile.penisLength) || profile.penisLength?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.penisGirth = Number(profile.penisGirth) || profile.penisGirth?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.penisLength = Math.round(profile.penisLength || profile.penisLength?.match?.(/\d+/)?.[0]) || null;
|
||||
curatedProfile.penisGirth = Math.round(profile.penisGirth || profile.penisGirth?.match?.(/\d+/)?.[0]) || null;
|
||||
|
||||
curatedProfile.isCircumcised = getBoolean(profile.isCircumcised);
|
||||
curatedProfile.naturalBoobs = getBoolean(profile.naturalBoobs);
|
||||
@@ -541,7 +547,7 @@ async function curateProfile(profile, actor) {
|
||||
|
||||
async function insertProfiles(newProfiles) {
|
||||
if (newProfiles.length > 0) {
|
||||
const entries = await bulkInsert('actors_profiles', newProfiles);
|
||||
const entries = await batchInsert('actors_profiles', newProfiles);
|
||||
|
||||
logger.info(`Saved ${newProfiles.length} actor profiles`);
|
||||
|
||||
@@ -603,10 +609,7 @@ async function upsertProfiles(profiles) {
|
||||
}));
|
||||
|
||||
if (avatars.length > 0) {
|
||||
await knex('actors_avatars')
|
||||
.insert(avatars)
|
||||
.onConflict()
|
||||
.ignore();
|
||||
await batchInsert('actors_avatars', avatars, { conflict: false });
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -621,7 +624,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
try {
|
||||
const entity = entitiesBySlug[scraperSlug] || null;
|
||||
|
||||
const scraper = scrapers[scraperSlug];
|
||||
const scraper = actorScrapers[scraperSlug];
|
||||
const layoutScraper = resolveLayoutScraper(entity, scraper);
|
||||
|
||||
if (!layoutScraper?.fetchProfile) {
|
||||
@@ -756,7 +759,8 @@ function curateSocials(socials, platformsByHostname) {
|
||||
|
||||
async function associateSocials(profiles) {
|
||||
const { platformsByHostname } = await actorsCommon;
|
||||
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
|
||||
const profileEntryChunks = await Promise.all(chunk(profiles).map((profilesChunk) => knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profilesChunk.map((profile) => [profile.actorId, profile.entity.id]))));
|
||||
const profileEntries = profileEntryChunks.flat();
|
||||
|
||||
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
||||
if (!acc[profileEntry.actor_id]) {
|
||||
@@ -781,16 +785,14 @@ async function associateSocials(profiles) {
|
||||
return;
|
||||
}
|
||||
|
||||
await knex('actors_socials')
|
||||
.insert(curateSocials(profile.social, platformsByHostname).map((social) => ({
|
||||
platform: social.platform,
|
||||
handle: social.handle,
|
||||
url: social.url,
|
||||
actor_id: profile.actorId,
|
||||
// profile_id: profileId,
|
||||
})))
|
||||
.onConflict()
|
||||
.ignore();
|
||||
await batchInsert('actors_socials', curateSocials(profile.social, platformsByHostname).map((social) => ({
|
||||
platform: social.platform,
|
||||
handle: social.handle,
|
||||
url: social.url,
|
||||
actor_id: profile.actorId,
|
||||
})), {
|
||||
conflict: false,
|
||||
});
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
@@ -829,11 +831,11 @@ async function scrapeActors(argNames) {
|
||||
|
||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||
|
||||
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
||||
const sources = argv.profileSources || config.profiles || Object.keys(actorScrapers);
|
||||
const entitySlugs = sources.flat();
|
||||
|
||||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||
fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'], prefer: argv.prefer || 'channel' }),
|
||||
fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'], prefer: argv.prefer || 'options' }),
|
||||
knex('actors')
|
||||
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
|
||||
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
|
||||
|
||||
42
src/app.js
42
src/app.js
@@ -24,7 +24,8 @@ const { updateSceneSearch, updateMovieSearch } = require('./update-search');
|
||||
const { scrapeActors, deleteActors, flushActors, flushProfiles, interpolateProfiles } = require('./actors');
|
||||
const { flushEntities } = require('./entities');
|
||||
const { deleteScenes, deleteMovies, flushScenes, flushMovies, flushBatches } = require('./releases');
|
||||
const { flushOrphanedMedia } = require('./media');
|
||||
const { flushOrphanedMedia, detachReleaseMedia, detachEntityReleaseMedia } = require('./media');
|
||||
const { reassociateEntityReleaseTags, reassociateReleaseTags, reassociateOriginalTags } = require('./tags');
|
||||
const getFileEntries = require('./utils/file-entries');
|
||||
|
||||
const inspector = new Inspector();
|
||||
@@ -36,6 +37,7 @@ unprint.options({
|
||||
userAgent: 'traxxx',
|
||||
browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
|
||||
apiUserAgent: 'traxxx',
|
||||
clientRetirement: config.bypass.browser.clientRetirement,
|
||||
limits: {
|
||||
...config.limits,
|
||||
default: {
|
||||
@@ -50,7 +52,8 @@ unprint.options({
|
||||
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.isProxied ? ' proxied' : ''}${event.isBrowser ? ' browser' : ''}) ${event.url}`));
|
||||
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.isProxied ? 'proxied ' : ''}${event.isBrowser ? 'browser ' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
||||
|
||||
unprint.on('browser', (event) => logger.debug(`Unprint ${event.action === 'open' ? 'opened' : 'closed'} browsers ${event.keys} (${event.active})`));
|
||||
unprint.on('browserOpen', (event) => logger.debug(`Unprint opened browsers ${event.keys} (${event.active}/${config.bypass.browser.clientRetirement} active, ${event.clients} clients)`));
|
||||
unprint.on('browserClose', (event) => logger.debug(`Unprint closed${event.retired ? ' retired' : ''} browsers ${event.keys} (${event.active}/${config.bypass.browser.clientRetirement} active, ${event.clients} clients)`));
|
||||
|
||||
function logActive() {
|
||||
setTimeout(() => {
|
||||
@@ -110,6 +113,16 @@ async function startMemorySample(snapshotTriggers = []) {
|
||||
}, config.memorySampling.sampleDuration);
|
||||
}
|
||||
|
||||
async function testProxy() {
|
||||
if (config.proxy.enable && argv.testProxy) {
|
||||
const res = await unprint.get(argv.testProxy, { useProxy: true });
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Proxy is offline (${res.status})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function init() {
|
||||
try {
|
||||
await redis.connect();
|
||||
@@ -147,7 +160,8 @@ async function init() {
|
||||
}
|
||||
|
||||
if (argv.flushNetworks || argv.flushChannels) {
|
||||
await flushEntities(argv.flushNetworks, argv.flushChannels);
|
||||
// inject flushOrphanedMedia to prevent circular dependency with entity media flush
|
||||
await flushEntities(argv.flushNetworks, argv.flushChannels, flushOrphanedMedia);
|
||||
}
|
||||
|
||||
if (argv.flushBatches) {
|
||||
@@ -174,16 +188,38 @@ async function init() {
|
||||
await deleteMovies(argv.deleteMovies);
|
||||
}
|
||||
|
||||
if (argv.originalTags) {
|
||||
await reassociateOriginalTags(argv.originalTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.releaseTags) {
|
||||
await reassociateReleaseTags(argv.releaseTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.networkReleaseTags || argv.channelReleaseTags) {
|
||||
await reassociateEntityReleaseTags(argv.networkReleaseTags, argv.channelReleaseTags, argv.rematchTags);
|
||||
}
|
||||
|
||||
if (argv.flushOrphanedMedia) {
|
||||
await flushOrphanedMedia();
|
||||
}
|
||||
|
||||
if (argv.detachReleaseMedia) {
|
||||
await detachReleaseMedia(argv.detachReleaseMedia);
|
||||
}
|
||||
|
||||
if (argv.detachNetworkMedia || argv.detachChannelMedia) {
|
||||
await detachEntityReleaseMedia(argv.detachNetworkMedia, argv.detachChannelMedia);
|
||||
}
|
||||
|
||||
if (argv.request) {
|
||||
const res = await http[argv.requestMethod](argv.request);
|
||||
|
||||
console.log(res.status, res.body);
|
||||
}
|
||||
|
||||
await testProxy();
|
||||
|
||||
const actorsFromFile = argv.actorsFile && await getFileEntries(argv.actorsFile);
|
||||
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
|
||||
|
||||
|
||||
73
src/argv.js
73
src/argv.js
@@ -40,6 +40,11 @@ const { argv } = yargs
|
||||
type: 'boolean',
|
||||
alias: 'web',
|
||||
})
|
||||
.option('test-proxy', {
|
||||
describe: 'Test URL to ensure proxy is online',
|
||||
type: 'string',
|
||||
default: config.proxy.test,
|
||||
})
|
||||
.option('include-networks', {
|
||||
describe: 'Network to scrape all channels from (overrides configuration)',
|
||||
type: 'array',
|
||||
@@ -100,6 +105,21 @@ const { argv } = yargs
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
})
|
||||
.option('associate-actors', {
|
||||
describe: 'Associate scene actors and directors',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('associate-tags', {
|
||||
describe: 'Associate scene tags',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('associate-series', {
|
||||
describe: 'Associate scene series',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('scene', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
@@ -329,7 +349,32 @@ const { argv } = yargs
|
||||
describe: 'Remove files from storage when flushing media.',
|
||||
type: 'boolean',
|
||||
alias: 'flush-files',
|
||||
default: true,
|
||||
})
|
||||
.option('detach-channel-media', {
|
||||
describe: 'Remove media files from channel scenes.',
|
||||
type: 'array',
|
||||
})
|
||||
.option('detach-network-media', {
|
||||
describe: 'Remove media files from network scenes.',
|
||||
type: 'array',
|
||||
})
|
||||
.option('detach-release-media', {
|
||||
describe: 'Remove media files from network scenes.',
|
||||
type: 'array',
|
||||
alias: ['detach-scene-media'],
|
||||
})
|
||||
.option('detach-media-domains', {
|
||||
describe: 'Only detach these types of media.',
|
||||
type: 'array',
|
||||
default: [
|
||||
'posters',
|
||||
'photos',
|
||||
'caps',
|
||||
'trailers',
|
||||
'teasers',
|
||||
'covers',
|
||||
],
|
||||
alias: ['detach-media'],
|
||||
})
|
||||
.option('flush-channels', {
|
||||
describe: 'Delete all scenes and movies from channels.',
|
||||
@@ -387,6 +432,32 @@ const { argv } = yargs
|
||||
type: 'array',
|
||||
alias: ['delete-movie', 'remove-movies', 'remove-movies'],
|
||||
})
|
||||
.option('original-tags', {
|
||||
describe: 'Reassociate original tag names',
|
||||
type: 'array',
|
||||
alias: ['tags'],
|
||||
})
|
||||
.option('release-tags', {
|
||||
describe: 'Reassociate tags for scene IDs',
|
||||
type: 'array',
|
||||
alias: ['scene-tags'],
|
||||
})
|
||||
.option('channel-release-tags', {
|
||||
describe: 'Reassociate tags for all channel releases',
|
||||
type: 'array',
|
||||
alias: ['channel-scene-tags', 'channel-tags'],
|
||||
})
|
||||
.option('network-release-tags', {
|
||||
describe: 'Reassociate tags for all network releases',
|
||||
type: 'array',
|
||||
alias: ['network-scene-tags', 'network-tags'],
|
||||
})
|
||||
.option('rematch-tags', {
|
||||
describe: 'Reassociate tags that are already associated',
|
||||
type: 'boolean',
|
||||
alias: 'rematch',
|
||||
default: false,
|
||||
})
|
||||
.option('request', {
|
||||
describe: 'Make an arbitrary HTTP request',
|
||||
type: 'string',
|
||||
|
||||
10
src/deep.js
10
src/deep.js
@@ -69,6 +69,7 @@ async function fetchUnprintScene(scraper, url, entity, baseRelease, options, typ
|
||||
entity,
|
||||
baseRelease,
|
||||
headers: res.headers,
|
||||
cookies: res.cookies,
|
||||
include,
|
||||
beforeFetchScenes: options.beforeFetchScenes,
|
||||
parameters: options.parameters,
|
||||
@@ -212,9 +213,10 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') {
|
||||
...[].concat(curatedScrapedRelease.poster),
|
||||
...[].concat(baseRelease.poster),
|
||||
])).filter(Boolean),
|
||||
photos: curatedScrapedRelease.photos?.length > 0
|
||||
? curatedScrapedRelease.photos
|
||||
: baseRelease.photos,
|
||||
photos: [
|
||||
...curatedScrapedRelease.photos || [],
|
||||
...baseRelease.photos || [],
|
||||
],
|
||||
deep: !!scrapedRelease,
|
||||
entity,
|
||||
};
|
||||
@@ -267,7 +269,7 @@ async function scrapeReleases(baseReleases, entitiesByHostname, type) {
|
||||
|
||||
async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
|
||||
const baseReleases = toBaseReleases(baseReleasesOrUrls);
|
||||
const entitiesByHostname = await fetchReleaseEntities(baseReleases);
|
||||
const entitiesByHostname = await fetchReleaseEntities(baseReleases, { appendBySlug: false });
|
||||
|
||||
const deepReleases = await scrapeReleases(baseReleases, entitiesByHostname, type);
|
||||
|
||||
|
||||
@@ -7,8 +7,9 @@ const logger = require('./logger')(__filename);
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const { deleteScenes, deleteMovies, deleteSeries } = require('./releases');
|
||||
const { flushOrphanedMedia } = require('./media');
|
||||
const { resolveScraper, resolveLayoutScraper } = require('./scrapers/resolve');
|
||||
const { fetchEntityReleaseIds } = require('./entity-releases');
|
||||
const getRecursiveParameters = require('./utils/get-recursive-parameters');
|
||||
|
||||
function getRecursiveParent(entity) {
|
||||
if (!entity) {
|
||||
@@ -123,7 +124,7 @@ function urlToHostname(url) {
|
||||
try {
|
||||
const hostname = new URL(url)
|
||||
.hostname
|
||||
.match(/(www\.)?(.*)/)?.at(-1);
|
||||
.match(/(www\.|m\.)?(.*)/)?.at(-1);
|
||||
|
||||
return hostname;
|
||||
} catch (error) {
|
||||
@@ -219,7 +220,7 @@ async function fetchIncludedEntities() {
|
||||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' }) {
|
||||
async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel', appendBySlug: true }) {
|
||||
const entities = await knex.raw(`
|
||||
WITH RECURSIVE entity_tree as (
|
||||
SELECT to_jsonb(entities) as entity,
|
||||
@@ -257,17 +258,22 @@ async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' })
|
||||
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
|
||||
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`),
|
||||
entityTypes: options.types || ['channel', 'network'],
|
||||
sort: knex.raw(options.prefer === 'channel' ? 'asc' : 'desc'),
|
||||
sort: knex.raw(options.prefer === 'channel' || options.prefer === 'options' ? 'asc' : 'desc'),
|
||||
});
|
||||
|
||||
// channel entity will overwrite network entity
|
||||
// by default channel entity will overwrite network entity
|
||||
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => {
|
||||
const host = urlToHostname(entity.url);
|
||||
const curatedEntity = accEntities[entity.slug] || accEntities[host] || curateEntity(entity, true);
|
||||
const entityOptions = getRecursiveParameters(entity, 'options');
|
||||
const accEntity = accEntities[entity.slug] || accEntities[host];
|
||||
|
||||
const curatedEntity = !accEntity || (options.prefer === 'options' && entity.type === 'network' && entityOptions.preferNetwork)
|
||||
? curateEntity(entity, true)
|
||||
: accEntity;
|
||||
|
||||
return {
|
||||
...accEntities,
|
||||
[entity.slug]: curatedEntity,
|
||||
...(options.appendBySlug !== false ? { [entity.slug]: curatedEntity } : null),
|
||||
[host]: curatedEntity,
|
||||
};
|
||||
}, {});
|
||||
@@ -368,81 +374,10 @@ async function searchEntities(query, type, limit) {
|
||||
return curateEntities(entities);
|
||||
}
|
||||
|
||||
async function flushEntities(networkSlugs = [], channelSlugs = []) {
|
||||
async function flushEntities(networkSlugs = [], channelSlugs = [], flushOrphanedMedia) {
|
||||
const { sceneIds, movieIds, serieIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||
const entitySlugs = networkSlugs.concat(channelSlugs).join(', ');
|
||||
|
||||
const entityQuery = knex
|
||||
.withRecursive('selected_entities', knex.raw(`
|
||||
SELECT entities.*
|
||||
FROM entities
|
||||
WHERE
|
||||
entities.slug = ANY(:networkSlugs)
|
||||
AND entities.type = 'network'
|
||||
OR (entities.slug = ANY(:channelSlugs)
|
||||
AND entities.type = 'channel')
|
||||
UNION ALL
|
||||
SELECT entities.*
|
||||
FROM entities
|
||||
INNER JOIN selected_entities ON selected_entities.id = entities.parent_id
|
||||
`, {
|
||||
networkSlugs,
|
||||
channelSlugs,
|
||||
}));
|
||||
|
||||
const sceneIds = await entityQuery
|
||||
.clone()
|
||||
.select('releases.id')
|
||||
.distinct('releases.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('releases', 'releases.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('releases.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('effective_date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('effective_date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('releases.id');
|
||||
|
||||
const movieIds = await entityQuery
|
||||
.clone()
|
||||
.select('movies.id')
|
||||
.distinct('movies.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('movies', 'movies.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('movies.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('effective_date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('effective_date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('movies.id');
|
||||
|
||||
const serieIds = await entityQuery
|
||||
.clone()
|
||||
.select('series.id')
|
||||
.distinct('series.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('series', 'series.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('series.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('series.id');
|
||||
|
||||
if (sceneIds.length === 0 && movieIds.length === 0 && serieIds.length === 0) {
|
||||
logger.info(`No scenes, movies or series found to remove for ${entitySlugs}`);
|
||||
return;
|
||||
@@ -479,6 +414,7 @@ module.exports = {
|
||||
fetchIncludedEntities,
|
||||
fetchReleaseEntities,
|
||||
fetchEntitiesBySlug,
|
||||
fetchEntityReleaseIds,
|
||||
fetchEntity,
|
||||
fetchEntities,
|
||||
getRecursiveParent,
|
||||
|
||||
88
src/entity-releases.js
Normal file
88
src/entity-releases.js
Normal file
@@ -0,0 +1,88 @@
|
||||
'use strict';
|
||||
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
|
||||
async function fetchEntityReleaseIds(networkSlugs = [], channelSlugs = []) {
|
||||
const entityQuery = knex
|
||||
.withRecursive('selected_entities', knex.raw(`
|
||||
SELECT entities.*
|
||||
FROM entities
|
||||
WHERE
|
||||
entities.slug = ANY(:networkSlugs)
|
||||
AND entities.type = 'network'
|
||||
OR (entities.slug = ANY(:channelSlugs)
|
||||
AND entities.type = 'channel')
|
||||
UNION ALL
|
||||
SELECT entities.*
|
||||
FROM entities
|
||||
INNER JOIN selected_entities ON selected_entities.id = entities.parent_id
|
||||
`, {
|
||||
networkSlugs,
|
||||
channelSlugs,
|
||||
}));
|
||||
|
||||
const sceneIds = await entityQuery
|
||||
.clone()
|
||||
.select('releases.id')
|
||||
.distinct('releases.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('releases', 'releases.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('releases.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('effective_date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('effective_date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('releases.id');
|
||||
|
||||
const movieIds = await entityQuery
|
||||
.clone()
|
||||
.select('movies.id')
|
||||
.distinct('movies.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('movies', 'movies.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('movies.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('effective_date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('effective_date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('movies.id');
|
||||
|
||||
const serieIds = await entityQuery
|
||||
.clone()
|
||||
.select('series.id')
|
||||
.distinct('series.id')
|
||||
.from('selected_entities')
|
||||
.leftJoin('series', 'series.entity_id', 'selected_entities.id')
|
||||
.whereNotNull('series.id')
|
||||
.modify((builder) => {
|
||||
if (argv.flushAfter) {
|
||||
builder.where('date', '>=', argv.flushAfter);
|
||||
}
|
||||
|
||||
if (argv.flushBefore) {
|
||||
builder.where('date', '<=', argv.flushBefore);
|
||||
}
|
||||
})
|
||||
.pluck('series.id');
|
||||
|
||||
return {
|
||||
sceneIds,
|
||||
movieIds,
|
||||
serieIds,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchEntityReleaseIds,
|
||||
};
|
||||
22
src/knex.js
22
src/knex.js
@@ -3,7 +3,7 @@
|
||||
const config = require('config');
|
||||
const knex = require('knex');
|
||||
|
||||
module.exports = knex({
|
||||
const knexInstance = knex({
|
||||
client: 'pg',
|
||||
connection: config.database.owner,
|
||||
pool: config.database.pool,
|
||||
@@ -11,3 +11,23 @@ module.exports = knex({
|
||||
asyncStackTraces: process.env.NODE_ENV === 'development',
|
||||
// debug: process.env.NODE_ENV === 'development',
|
||||
});
|
||||
|
||||
knexInstance.on('query', function onQuery(query) {
|
||||
const bindingCount = query.bindings?.length ?? 0;
|
||||
|
||||
if (bindingCount > 10000) {
|
||||
const error = new Error(`[knex] Dangerous query: ${bindingCount} bindings detected: ${query.sql?.slice(0, 200)}${query.sql?.length > 200 ? '...' : ''}`);
|
||||
|
||||
Error.captureStackTrace(error, onQuery);
|
||||
// console.error(error);
|
||||
|
||||
throw error; // optionally hard-fail so you get a real stack trace
|
||||
}
|
||||
});
|
||||
|
||||
knexInstance.on('query-error', (error, query) => {
|
||||
error.knexSql = `${query.sql?.slice(0, 200)}${query.sql?.length > 200 ? '...' : ''}`;
|
||||
error.knexBindingCount = query.bindings?.length;
|
||||
});
|
||||
|
||||
module.exports = knexInstance;
|
||||
|
||||
@@ -9,7 +9,7 @@ require('winston-daily-rotate-file');
|
||||
const args = require('./argv');
|
||||
|
||||
function logger(filepath) {
|
||||
const root = filepath.match(/src[/\\]|dist[/\\]/);
|
||||
const root = filepath.match(/src[/\\]|dist[/\\]|tests[/\\]/);
|
||||
const filename = filepath.slice(root.index + root[0].length)
|
||||
.replace(path.extname(filepath), '');
|
||||
|
||||
|
||||
46
src/media.js
46
src/media.js
@@ -26,6 +26,7 @@ const http = require('./utils/http');
|
||||
const bulkInsert = require('./utils/bulk-insert');
|
||||
const chunk = require('./utils/chunk');
|
||||
const { get } = require('./utils/qu');
|
||||
const { fetchEntityReleaseIds } = require('./entity-releases');
|
||||
|
||||
// const pipeline = util.promisify(stream.pipeline);
|
||||
const streamQueue = taskQueue();
|
||||
@@ -646,6 +647,7 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
const res = await http.get(source.src, {
|
||||
limits: 'media',
|
||||
headers: {
|
||||
host: new URL(source.src).hostname,
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
},
|
||||
@@ -671,10 +673,20 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
|
||||
streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStream }) => {
|
||||
const meta = { mimetype: 'video/mp4' };
|
||||
const { hostname } = new URL(source.stream);
|
||||
|
||||
if (config.media.streams.excludeHostnames.includes(hostname)) {
|
||||
throw new Error(`Stream source hostname ${hostname} is excluded by config`);
|
||||
}
|
||||
|
||||
const streamIndex = config.media.streams.selectIndex[hostname];
|
||||
|
||||
const command = ffmpeg(source.stream)
|
||||
.format('mp4')
|
||||
.outputOptions(['-movflags frag_keyframe+empty_moov'])
|
||||
.outputOptions([
|
||||
'-movflags frag_keyframe+empty_moov',
|
||||
...(typeof streamIndex === 'number' ? [`-map p:${streamIndex}`] : []),
|
||||
])
|
||||
.on('start', (cmd) => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`));
|
||||
|
||||
const video = command.pipe();
|
||||
@@ -700,7 +712,7 @@ streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStr
|
||||
|
||||
return meta;
|
||||
}, {
|
||||
concurrency: config.media.streamConcurrency,
|
||||
concurrency: config.media.streams.concurrency,
|
||||
});
|
||||
|
||||
async function fetchSource(source, baseMedia) {
|
||||
@@ -709,7 +721,7 @@ async function fetchSource(source, baseMedia) {
|
||||
logger.silly(`Fetching media from ${source.src}`);
|
||||
logger.debug(`Memory usage before media fetch: ${process.memoryUsage.rss() / 1000000} MB (${source.src})`);
|
||||
|
||||
if (source.stream && !config.media.fetchStreams) {
|
||||
if (source.stream && !config.media.streams.enabled) {
|
||||
throw new Error(`Stream fetching disabled, ignoring ${source.stream}`);
|
||||
}
|
||||
|
||||
@@ -1149,8 +1161,36 @@ async function flushOrphanedMedia(stage = 1) {
|
||||
}
|
||||
}
|
||||
|
||||
async function detachReleaseMedia(rawSceneIds) {
|
||||
const sceneIds = rawSceneIds.map((sceneId) => Number(sceneId)).filter(Boolean);
|
||||
|
||||
await argv.detachMediaDomains.reduce(async (chain, domain) => {
|
||||
await chain;
|
||||
|
||||
const mediaEntries = await knex(`releases_${domain}`).whereIn('release_id', sceneIds);
|
||||
|
||||
await knex(`releases_${domain}`)
|
||||
.whereIn('release_id', sceneIds)
|
||||
.delete();
|
||||
|
||||
logger.info(`Removed ${mediaEntries.length} ${domain} from ${new Set(mediaEntries.map((mediaEntry) => mediaEntry.release_id)).size} scenes`);
|
||||
}, Promise.resolve());
|
||||
|
||||
if (argv.flushOrphanedMedia !== false) {
|
||||
await flushOrphanedMedia();
|
||||
}
|
||||
}
|
||||
|
||||
async function detachEntityReleaseMedia(networkSlugs = [], channelSlugs = []) {
|
||||
const { sceneIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||
|
||||
await detachReleaseMedia(sceneIds);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateAvatars,
|
||||
associateReleaseMedia,
|
||||
flushOrphanedMedia,
|
||||
detachReleaseMedia,
|
||||
detachEntityReleaseMedia,
|
||||
};
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
const adultempire = require('./adultempire');
|
||||
const angelogodshackoriginal = require('./angelogodshackoriginal');
|
||||
const americanpornstar = require('./americanpornstar');
|
||||
// const americanpornstar = require('./americanpornstar'); // offline
|
||||
const aziani = require('./aziani');
|
||||
const badoink = require('./badoink');
|
||||
const bamvisions = require('./bamvisions');
|
||||
@@ -100,7 +100,6 @@ module.exports = {
|
||||
'3rddegreefilms': gamma,
|
||||
addicted2girls: gamma,
|
||||
biphoria: gamma,
|
||||
blakemason: gamma,
|
||||
blowpass: gamma,
|
||||
burningangel: gamma,
|
||||
chaosmen: gamma,
|
||||
@@ -149,6 +148,8 @@ module.exports = {
|
||||
interracialpovs: hush,
|
||||
povpornstars: hush,
|
||||
seehimfuck: hush,
|
||||
ravebunnys: hush,
|
||||
hotandtatted: hush,
|
||||
// wankzvr
|
||||
wankzvr,
|
||||
tranzvr: wankzvr,
|
||||
@@ -164,17 +165,14 @@ module.exports = {
|
||||
thatsitcomshow: nubiles,
|
||||
// porndoe
|
||||
amateureuro: porndoe,
|
||||
forbondage: porndoe,
|
||||
mamacitaz: porndoe,
|
||||
transbella: porndoe,
|
||||
vipsexvault: porndoe,
|
||||
// forbondage: porndoe,
|
||||
// aziani
|
||||
aziani,
|
||||
'2poles1hole': aziani,
|
||||
creampiled: aziani,
|
||||
// woodman
|
||||
pierrewoodman,
|
||||
wakeupnfuck: pierrewoodman,
|
||||
// naughty america
|
||||
naughtyamerica,
|
||||
tonightsgirlfriend: naughtyamerica,
|
||||
@@ -201,21 +199,24 @@ module.exports = {
|
||||
swappz: teamskeet,
|
||||
freeuse: teamskeet,
|
||||
familystrokes: teamskeet,
|
||||
// model media
|
||||
jerkaoke: modelmedia,
|
||||
modelmediaasia: modelmedia,
|
||||
delphine: modelmedia,
|
||||
// etc
|
||||
'18vr': badoink,
|
||||
theflourishxxx: theflourish,
|
||||
pierrewoodman,
|
||||
exploitedx, // only from known URL that will specify site
|
||||
fullpornnetwork,
|
||||
adultempire,
|
||||
allherluv: missax,
|
||||
americanpornstar,
|
||||
// americanpornstar,
|
||||
angelogodshackoriginal,
|
||||
asiam: modelmedia,
|
||||
babevr: badoink,
|
||||
badoinkvr: badoink,
|
||||
bamvisions,
|
||||
bang,
|
||||
// delphine: modelmedia,
|
||||
meidenvanholland: bluedonkeymedia, // Vurig Vlaanderen uses same database
|
||||
boobpedia,
|
||||
bradmontana,
|
||||
@@ -226,7 +227,6 @@ module.exports = {
|
||||
hitzefrei,
|
||||
hookuphotshot,
|
||||
inthecrack,
|
||||
jerkaoke: modelmedia,
|
||||
karups,
|
||||
boyfun: karups,
|
||||
kellymadison,
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, channel, _options) {
|
||||
return scenes.map(({ query }) => {
|
||||
@@ -122,18 +121,18 @@ async function scrapeProfile({ query }) {
|
||||
const bioText = query.content('#profileModal .well');
|
||||
|
||||
profile.description = query.content('#profileModal .modal-body')
|
||||
.slice(bioText.length)
|
||||
?.slice(bioText?.length || 0)
|
||||
.replace(/Biography Text ©Adult DVD Empire/i, '')
|
||||
.trim();
|
||||
|
||||
profile.measurements = bio.measurements?.replace(/["\s]+/g, '');
|
||||
|
||||
profile.hair = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
profile.eyes = bio.eyes?.replace(/eyes?/i, '').trim();
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.weight = lbsToKg(bio.weight);
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||
|
||||
const avatar = query.img('picture img, .performer-image-container img');
|
||||
|
||||
@@ -155,8 +154,8 @@ async function fetchLatest(channel, page, options) {
|
||||
? `${options.parameters.latest}?page=${page}&view=grid`
|
||||
: `${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&view=grid`, {
|
||||
selectAll: '.item-grid-scene .grid-item',
|
||||
headers: {
|
||||
Cookie: 'ageConfirmed=true;',
|
||||
cookies: {
|
||||
ageConfirmed: true,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -171,6 +170,9 @@ async function fetchProfilePage(actorUrl) {
|
||||
const res = await unprint.get(actorUrl, {
|
||||
select: '#content',
|
||||
rejectUnauthorized: false,
|
||||
cookies: {
|
||||
ageConfirmed: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
@@ -189,10 +191,14 @@ async function fetchProfile(baseActor, channel, include) {
|
||||
}
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`https://www.adultempire.com/search/SearchAutoComplete_Agg_EmpireDTRank?search_type=Pornstars&rows=9&name_startsWith=${slugify(baseActor.name, '+')}`);
|
||||
const searchRes = await unprint.get(`https://www.adultempire.com/search/SearchAutoComplete_Agg_EmpireDTRank?search_type=Pornstars&rows=9&name_startsWith=${slugify(baseActor.name, '+')}`, {
|
||||
cookies: {
|
||||
ageConfirmed: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (searchRes.ok && searchRes.body.Results) {
|
||||
const actorResult = searchRes.body.Results.find((result) => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description));
|
||||
if (searchRes.ok && searchRes.data.Results) {
|
||||
const actorResult = searchRes.data.Results.find((result) => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description));
|
||||
|
||||
if (actorResult) {
|
||||
const url = `https://www.adultempire.com/${actorResult.BasicResponseGroup.id}`;
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
if (query.exists('.crowdfunding-post')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a');
|
||||
release.entryId = new URL(release.url).pathname.split('/')[1];
|
||||
|
||||
release.title = query.content('.allvideostitle p, .allvideostitle h2');
|
||||
|
||||
release.actors = query.contents('.tagsmodels a');
|
||||
|
||||
release.poster = query.sourceSet('.vidcont .attachment-thumbnail');
|
||||
release.teaser = query.video('.vidcont video', { attribute: 'data-src' });
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[1];
|
||||
|
||||
release.title = query.content('.title-fav h1');
|
||||
release.description = query.contents('.description #fullstory, .title-fav .cont > p').join(' ');
|
||||
|
||||
release.date = query.date('.datesingle', 'MMMM D, YYYY');
|
||||
release.duration = query.duration('.cont .duration');
|
||||
|
||||
release.actors = query.contents('.cont .tagsmodels:not(.director) a');
|
||||
release.director = query.content('.cont .director a');
|
||||
|
||||
release.poster = [
|
||||
query.img('.preview .attachment-full'),
|
||||
query.img('meta[property="og:image"]', { attribute: 'content' }),
|
||||
];
|
||||
|
||||
release.caps = query.all('.gallery a').map((photoEl) => [
|
||||
unprint.query.url(photoEl, null),
|
||||
unprint.query.img(photoEl, 'img'),
|
||||
]);
|
||||
|
||||
release.trailer = {
|
||||
src: query.video('.preview video'),
|
||||
referer: url,
|
||||
expectType: {
|
||||
'binary/octet-stream': 'video/mp4',
|
||||
},
|
||||
};
|
||||
|
||||
release.tags = query.contents('.post-categories a');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/all-videos/page/${page}/`;
|
||||
const res = await unprint.get(url, { selectAll: '.catposts > div:not(.clearfix):not(.flexcont)' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
scrapeScene,
|
||||
};
|
||||
@@ -57,7 +57,7 @@ function getCovers(images, target = 'cover') {
|
||||
}
|
||||
|
||||
function getVideos(data) {
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const teaserSources = data.videos?.mediabook?.files;
|
||||
const trailerSources = data.children.find((child) => child.type === 'trailer')?.videos.full?.files;
|
||||
|
||||
const teaser = teaserSources && Object.values(teaserSources).map((source) => ({
|
||||
@@ -84,7 +84,7 @@ function scrapeLatestX(data, site, filterChannel, options) {
|
||||
|
||||
release.url = `${basepath}/${data.id}/${slugify(release.title)}`; // spartanId doesn't work in URLs
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
release.duration = data.videos?.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
|
||||
release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender }));
|
||||
release.tags = data.tags.map((tag) => tag.name);
|
||||
@@ -127,6 +127,10 @@ async function scrapeLatest(items, site, filterChannel, options) {
|
||||
}
|
||||
|
||||
function scrapeRelease(data, url, channel, networkName, options) {
|
||||
if (Array.isArray(data)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
|
||||
const { title, description } = data;
|
||||
@@ -136,7 +140,7 @@ function scrapeRelease(data, url, channel, networkName, options) {
|
||||
release.description = description;
|
||||
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.duration = data.videos.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
release.duration = data.videos?.mediabook?.length > 1 ? data.videos.mediabook.length : null;
|
||||
|
||||
release.actors = data.actors.map((actor) => ({ name: actor.name, gender: actor.gender }));
|
||||
release.tags = data.tags.map((tag) => tag.name);
|
||||
@@ -144,7 +148,6 @@ function scrapeRelease(data, url, channel, networkName, options) {
|
||||
[release.poster, ...release.photos] = getThumbs(data).map((src) => ({
|
||||
src,
|
||||
referer: url,
|
||||
host: 'mediavault-private-fl.project1content.com',
|
||||
}));
|
||||
|
||||
const { teaser, trailer } = getVideos(data);
|
||||
@@ -270,7 +273,7 @@ async function fetchLatest(site, page = 1, options) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { instanceToken } = options.beforeNetwork?.instanceToken
|
||||
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
|
||||
? options.beforeNetwork
|
||||
: await getSession(site, options.parameters, url);
|
||||
|
||||
|
||||
@@ -71,10 +71,32 @@ function scrapeScene(data, channel, parameters) {
|
||||
return release;
|
||||
}
|
||||
|
||||
async function getBlockId(slug, dataSource, entity, parameters) {
|
||||
const res = await unprint.get(`https://azianistudios.com/tour_api.php/content/page?slug=${slug}&data_source=${JSON.stringify(dataSource)}`, {
|
||||
headers: {
|
||||
Referer: entity.url,
|
||||
'x-nats-cms-area-id': parameters.areaId,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.data.success) {
|
||||
// unsure how the blocks differ exactly, but type set_view is missing directors for some reason
|
||||
return res.data.blocks?.find((block) => ['navigation', 'html'].includes(block.settings.type))?.cms_block_id || null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
const blockId = await getBlockId(parameters.videos || '/videos', { page }, channel, parameters);
|
||||
|
||||
if (!blockId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const query = new URLSearchParams({
|
||||
cms_area_id: parameters.areaId,
|
||||
cms_block_id: parameters.blockId,
|
||||
cms_block_id: blockId,
|
||||
count: 100,
|
||||
start: (page - 1) * 100,
|
||||
orderby: 'published_desc',
|
||||
@@ -106,10 +128,20 @@ async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
async function fetchScene(url, entity, _baseRelease, { parameters }) {
|
||||
const entryId = new URL(url).pathname.match(/\/video\/(\w+)/)[1];
|
||||
|
||||
if (!entryId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const blockId = await getBlockId('/video/:id', entryId, entity, parameters);
|
||||
|
||||
if (!blockId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const query = new URLSearchParams({
|
||||
cms_set_ids: entryId,
|
||||
cms_area_id: parameters.areaId,
|
||||
cms_block_id: parameters.blockId,
|
||||
cms_block_id: blockId,
|
||||
content: 1,
|
||||
orderby: 'published_desc',
|
||||
content_type: 'video',
|
||||
@@ -172,7 +204,7 @@ function scrapeProfile(data, entity, parameters) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ url }, { entity, parameters }) {
|
||||
async function fetchProfile({ name, url }, { entity, parameters }) {
|
||||
if (!url) {
|
||||
// no easy search option
|
||||
return null;
|
||||
@@ -180,9 +212,19 @@ async function fetchProfile({ url }, { entity, parameters }) {
|
||||
|
||||
const actorId = new URL(url).pathname.match(/model\/(\d+)/)[1];
|
||||
|
||||
if (!actorId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const blockId = await getBlockId('/model/:id', { models: name, id: actorId }, entity, parameters);
|
||||
|
||||
if (!blockId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const query = new URLSearchParams({
|
||||
cms_data_value_ids: actorId,
|
||||
cms_block_id: entity.parameters.modelBlockId || entity.parameters.blockId,
|
||||
cms_block_id: blockId,
|
||||
cms_data_type_id: 4,
|
||||
}).toString();
|
||||
|
||||
|
||||
@@ -41,7 +41,12 @@ function getAvatarFallback(url) {
|
||||
return [
|
||||
`${origin}${pathname}`,
|
||||
url,
|
||||
];
|
||||
].map((src) => ({
|
||||
src,
|
||||
expectType: {
|
||||
'application/octet-stream': 'image/jpeg',
|
||||
},
|
||||
}));
|
||||
} catch (_error) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
'use strict';
|
||||
|
||||
const { ex } = require('../utils/q');
|
||||
const http = require('../utils/http');
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map((detail) => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
const bio = query.all('.infobox tr[valign="top"]')
|
||||
.map((detail) => unprint.query.contents(detail, 'td'))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [slugify(key, '_')]: value }), {});
|
||||
|
||||
/* unreliable, see: Syren De Mer
|
||||
const catlinks = qa('#mw-normal-catlinks a', true);
|
||||
@@ -17,71 +17,51 @@ function scrapeProfile(html) {
|
||||
profile.gender = isTrans ? 'transsexual' : 'female';
|
||||
*/
|
||||
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
profile.dateOfBirth = query.date('.bday', 'YYYY-MM-DD');
|
||||
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
profile.description = query.content('.mw-parser-output > p');
|
||||
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
profile.placeOfBirth = bio.born;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
profile.nationality = bio.nationality.split(',')[0];
|
||||
|
||||
if (bio.Measurements) {
|
||||
const measurements = bio.Measurements
|
||||
.match(/\d+(\w+)?-\d+-\d+/g)
|
||||
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
|
||||
.split('-');
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
// account for measuemrents being just e.g. '32EE'
|
||||
if (measurements) {
|
||||
const [bust, waist, hip] = measurements;
|
||||
|
||||
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
|
||||
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
|
||||
if (bio.bra_cup_size) {
|
||||
[profile.bust] = bio.bra_cup_size.match(/^\d+\w+/) || [];
|
||||
}
|
||||
|
||||
if (bio.Bra_cup_size) {
|
||||
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
|
||||
if (bust) [profile.bust] = bust;
|
||||
}
|
||||
if (/enhanced/i.test(bio.boobs)) profile.naturalBoobs = false;
|
||||
if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true;
|
||||
|
||||
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
|
||||
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\d+/g)[1]);
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color;
|
||||
if (bio.hair) [profile.hairColor] = bio.hair.split(/(?=[A-Z])/); // field concatted to BrunetteLong
|
||||
|
||||
if (bio.Eye_color) profile.eyes = bio.Eye_color;
|
||||
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
|
||||
if (bio.blood_group) profile.blood = bio.blood_group;
|
||||
if (bio.also_known_as) profile.aliases = bio.also_known_as.split(', ');
|
||||
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
const avatarThumbPath = query.img('.infobox .mw-file-description img');
|
||||
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
credit: null,
|
||||
};
|
||||
profile.avatar = unprint.prefixUrl(avatarPath, 'https://www.boobpedia.com');
|
||||
}
|
||||
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
profile.socials = query.urls('.infobox a.external');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlug = actorName.replace(/\s+/, '_');
|
||||
const res = await http.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
const actorSlug = slugify(actorName, '_', { lower: false });
|
||||
const res = await unprint.get(`https://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
const { decode } = require('html-entities');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(items, _channel) {
|
||||
@@ -12,13 +12,13 @@ function scrapeAll(items, _channel) {
|
||||
const { date, precision } = query.dateAgo('.fecha');
|
||||
const poster = query.img('.thumb');
|
||||
|
||||
release.entryId = query.number(null, /\d+/, 'onclick');
|
||||
release.url = query.url(null, 'href', { origin: 'https://www.cumlouder.com' });
|
||||
release.url = query.url(null, { origin: 'https://www.cumlouder.com' });
|
||||
release.entryId = new URL(release.url).pathname.match(/video\/([\w-]+)/)?.[1];
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
|
||||
release.title = query.cnt('h2');
|
||||
release.title = query.content('h2');
|
||||
release.duration = query.duration('.minutos');
|
||||
|
||||
release.poster = [
|
||||
@@ -30,26 +30,36 @@ function scrapeAll(items, _channel) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, channel, html) {
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await unprint.get(`${channel.url}/${page}/`, { selectAll: '.muestra-escena' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query, html }, url) {
|
||||
const release = {};
|
||||
|
||||
const { date, precision } = query.dateAgo('.sub-video .added');
|
||||
|
||||
release.entryId = html.match(/cumlouder_(\d+)/)?.[1];
|
||||
release.entryId = new URL(url).pathname.match(/video\/([\w-]+)/)?.[1];
|
||||
|
||||
release.title = query.cnt('.video-top h1');
|
||||
release.title = query.content('.video-top h1');
|
||||
release.description = query.text('.sub-video p');
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
|
||||
release.actors = query.all('.sub-video .pornstar-link').map((el) => ({
|
||||
name: query.cnt(el, null),
|
||||
url: query.url(el, null, 'href', { origin: 'https://www.cumlouder.com' }),
|
||||
name: unprint.query.content(el, null),
|
||||
url: unprint.query.url(el, null, { origin: 'https://www.cumlouder.com' }),
|
||||
}));
|
||||
|
||||
release.duration = query.duration('.video-top .duracion');
|
||||
release.tags = query.cnts('.video-top .tag-link');
|
||||
release.tags = query.contents('.video-top .tag-link');
|
||||
|
||||
release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1];
|
||||
release.video = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]); // no trailers but full-length videos
|
||||
@@ -59,55 +69,47 @@ function scrapeScene({ query }, channel, html) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, channel) {
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.context, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, channel) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.all('.data-bio li').reduce((acc, bioEl) => ({
|
||||
...acc,
|
||||
[slugify(query.cnt(bioEl, 'strong'), '_')]: query.text(bioEl),
|
||||
[slugify(unprint.query.content(bioEl, 'strong'), '_')]: unprint.query.text(bioEl),
|
||||
}), {});
|
||||
|
||||
profile.nationality = bio.nationality;
|
||||
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
|
||||
|
||||
profile.height = Number(bio.height) * 100;
|
||||
profile.weight = parseInt(bio.weight, 10);
|
||||
profile.eyes = bio.eye_color;
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
profile.description = query.cnt('.data-bio p:last-of-type');
|
||||
profile.description = query.content('.data-bio p:last-of-type');
|
||||
profile.avatar = query.img('.thumb-bio');
|
||||
|
||||
profile.scenes = scrapeAll(qu.initAll(el, '.muestra-escena'), channel);
|
||||
profile.socials = query.urls('a.twitter-timeline');
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.muestra-escena')), channel);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/${page}/`, '.muestra-escena');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, channel, res.html);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, channel) {
|
||||
const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas');
|
||||
const res = await unprint.get(`https://www.cumlouder.com/girl/${actor.slug}/`, { select: '.listado-escenas' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, channel);
|
||||
return scrapeProfile(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
@@ -81,7 +81,7 @@ function scrapeProfile({ query }, _entity) {
|
||||
|
||||
const bio = Object.fromEntries(keys.map((key, index) => [slugify(key, '_'), values[index]]));
|
||||
|
||||
profile.age = Number(bio.age) || null;
|
||||
profile.age = unprint.extractNumber(bio.age);
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const {
|
||||
fetchLatest,
|
||||
fetchApiLatest,
|
||||
fetchUpcoming,
|
||||
fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
} = require('./gamma');
|
||||
|
||||
const { get } = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractLowArtActors(release) {
|
||||
const actors = release.title
|
||||
.replace(/solo/i, '')
|
||||
.split(/,|\band\b/ig)
|
||||
.map((actor) => actor.trim());
|
||||
|
||||
return {
|
||||
...release,
|
||||
actors,
|
||||
};
|
||||
}
|
||||
|
||||
async function networkFetchLatest(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiLatest(site, page, false);
|
||||
|
||||
const releases = await fetchLatest(site, page);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return releases.map((release) => extractLowArtActors(release));
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site) {
|
||||
const release = await fetchScene(url, site);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return extractLowArtActors(release);
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchUpcoming(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
|
||||
|
||||
return fetchUpcoming(site, page);
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
|
||||
}
|
||||
|
||||
function scrapeClassicProfile({ qu, html }, site) {
|
||||
const profile = {};
|
||||
|
||||
profile.avatar = qu.img('.actorPicture');
|
||||
profile.releases = scrapeAll(html, null, site.url, false);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchClassicProfile(actorName, { site }) {
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
const url = `${site.url}/en/pornstars`;
|
||||
const pornstarsRes = await get(url);
|
||||
|
||||
if (!pornstarsRes.ok) return null;
|
||||
|
||||
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||
.find((el) => slugify(el.textContent) === actorSlug)
|
||||
?.value;
|
||||
|
||||
if (actorPath) {
|
||||
const actorUrl = `${site.url}${actorPath}`;
|
||||
const res = await get(actorUrl);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeClassicProfile(res.item, site);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function networkFetchProfile({ name: actorName }, context, include) {
|
||||
const profile = await ((context.site.parameters?.api && fetchApiProfile(actorName, context, include))
|
||||
|| (context.site.parameters?.classic && include.scenes && fetchClassicProfile(actorName, context, include)) // classic profiles only have scenes, no bio
|
||||
|| fetchProfile({ name: actorName }, context, true, getActorReleasesUrl, include));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: networkFetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming: networkFetchUpcoming,
|
||||
};
|
||||
@@ -1,91 +1,115 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), (el) => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
const bio = Object.fromEntries(query.all('.profile-meta-list li').map((bioEl) => [
|
||||
slugify(unprint.query.content(bioEl, 'span:first-child'), '_'),
|
||||
unprint.query.content(bioEl, 'span:last-child'),
|
||||
]).filter(([_key, value]) => value?.toLowerCase() !== 'unknown'));
|
||||
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
profile.description = query.content('#description div[data-test="biography"]');
|
||||
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
profile.age = unprint.extractNumber(bio.age);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.hip = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[key] = value;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (profile.placeOfBirth && bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
else if (bio.country) profile.birthPlace = bio.country;
|
||||
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.birthPlace = bio.place_of_birth;
|
||||
profile.nationality = bio.nationality;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
profile.eyes = bio.eye_color;
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
[profile.bust, profile.cup] = bio.bra?.match(/(\d+)([a-z]+)/i)?.slice(1) || [];
|
||||
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), (el) => el.href);
|
||||
// TODO: differentiate between bust and bra band size
|
||||
if (!profile.bust) {
|
||||
profile.bust = bio.bust;
|
||||
}
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, credit: null };
|
||||
if (!profile.cup) {
|
||||
profile.cup = bio.cup;
|
||||
}
|
||||
|
||||
profile.bust = unprint.extractNumber(bio.bra);
|
||||
profile.cup = bio.cup;
|
||||
profile.waist = unprint.extractNumber(bio.waist);
|
||||
profile.hip = unprint.extractNumber(bio.hip);
|
||||
|
||||
profile.height = unprint.extractNumber(bio.height);
|
||||
profile.weight = unprint.extractNumber(bio.weight);
|
||||
|
||||
profile.foot = unprint.extractNumber(bio.shoe_size);
|
||||
|
||||
profile.socials = query.urls('.profile-meta-item .teaser__link');
|
||||
|
||||
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
|
||||
profile.tattoos = bio.tattoo_locations;
|
||||
|
||||
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
profile.piercings = bio.piercing_locations;
|
||||
|
||||
if (/natural/i.test(bio.boobs)) profile.naturalBoobs = true;
|
||||
if (/fake/i.test(bio.boobs)) profile.naturalBoobs = false;
|
||||
|
||||
if (/natural/i.test(bio.butt)) profile.naturalButt = true;
|
||||
if (/fake/i.test(bio.butt)) profile.naturalButt = false;
|
||||
|
||||
const avatar = query.img('.dashboard-image-container img');
|
||||
|
||||
if (!avatar?.match(/placeholder/i)) {
|
||||
profile.avatar = avatar;
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeSearch(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const res = await http.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
async function getActorUrl(actor) {
|
||||
if (actor.url) {
|
||||
return actor.url;
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
const res = await unprint.post('https://www.freeones.com/xhr/search', {
|
||||
performerTypes: ['babe', 'male', 'trans'],
|
||||
query: actor.name,
|
||||
recipe: 'subject',
|
||||
size: 12,
|
||||
});
|
||||
|
||||
if (actorPath) {
|
||||
const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`);
|
||||
if (res.ok) {
|
||||
const model = res.data.hits?.find((result) => slugify(result.name) === actor.slug);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
if (model?.url) {
|
||||
return `https://www.freeones.com${model.url}/bio`;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor) {
|
||||
const res = await unprint.get(`https://freeones.com/${actor.slug}/bio`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context);
|
||||
}
|
||||
|
||||
const actorUrl = await getActorUrl(actor);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await unprint.get(actorUrl);
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.context);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -91,11 +91,13 @@ function scrapeProfile({ query }, { url }) {
|
||||
profile.description = [query.content('.model-bio-text, #performer-description'), bio.funfact].filter(Boolean).join(' ');
|
||||
profile.aliases = bio.alias?.split(/[,\n]/).map((alias) => alias.trim());
|
||||
|
||||
profile.age = parseInt(bio.age, 10) || null;
|
||||
profile.age = unprint.extractNumber(bio.age);
|
||||
profile.dateOfBirth = unprint.extractDate(bio.age, 'MM/DD/YYYY');
|
||||
profile.measurements = bio.measurements;
|
||||
profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || convert(bio.height, 'cm');
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
99
src/scrapers/hentaied.js
Normal file
99
src/scrapers/hentaied.js
Normal file
@@ -0,0 +1,99 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
if (query.exists('.crowdfunding-post')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a');
|
||||
release.entryId = new URL(release.url).pathname.split('/')[1];
|
||||
|
||||
release.title = query.content('.allvideostitle p, .allvideostitle h2');
|
||||
|
||||
release.actors = query.all('.tagsmodels a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.poster = query.sourceSet('.vidcont .attachment-thumbnail')
|
||||
|| query.sourceSet('.vidcont .attachment-thumbnail', 'nitro-lazy-srcset');
|
||||
|
||||
release.teaser = query.video('.vidcont video', { attribute: 'data-src' });
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = new URL(channel.url).pathname.includes('/projects')
|
||||
? `${channel.url}/page/${page}/`
|
||||
: `${channel.url}/all-videos/page/${page}/`;
|
||||
// const res = await unprint.get(url, { selectAll: '.catposts > div:not(.clearfix):not(.flexcont)' });
|
||||
const res = await unprint.get(url, { selectAll: '.catposts div[class*="post-id"], .catposts .half' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[1];
|
||||
|
||||
release.title = query.content('.title-fav h1');
|
||||
release.description = query.contents('.description #fullstory, .title-fav .cont > p').join(' ')
|
||||
|| query.attribute('meta[property="og:description"]', 'content')?.replace('[…]', '').replace(/\s*read more.*/i, '...'); // html description difficult to extract
|
||||
|
||||
release.date = query.date('.datesingle', 'MMMM D, YYYY')
|
||||
|| query.date('meta[property="article:published_time"]', 'YYYY-MM-DDTHH:mm:ss', { attribute: 'content' });
|
||||
|
||||
release.duration = query.duration('.cont .duration, .durationandtime .duration');
|
||||
|
||||
release.actors = query.all('.cont .tagsmodels:not(.director) a, .title-fav .tagsmodels:not(.director) a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.directors = query.contents('.cont .director a');
|
||||
|
||||
release.tags = query.contents('.post-categories a');
|
||||
|
||||
release.poster = [
|
||||
query.img('.preview .attachment-full'),
|
||||
query.img('meta[property="og:image"]', { attribute: 'content' }),
|
||||
].filter(Boolean);
|
||||
|
||||
release.caps = query.all('.gallery a').map((photoEl) => Array.from(new Set([
|
||||
unprint.query.url(photoEl, null),
|
||||
unprint.query.img(photoEl),
|
||||
...unprint.query.sourceSet(photoEl, 'img') || [],
|
||||
...unprint.query.sourceSet(photoEl, 'img', 'nitro-lazy-srcset') || [],
|
||||
])).filter(Boolean));
|
||||
|
||||
const trailer = query.video('.preview video');
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
referer: url,
|
||||
expectType: {
|
||||
'binary/octet-stream': 'video/mp4',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
scrapeScene,
|
||||
};
|
||||
@@ -1,20 +1,21 @@
|
||||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const unprint = require('unprint');
|
||||
const format = require('template-format');
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, inchesToCm } = require('../utils/convert');
|
||||
const tryUrls = require('../utils/try-urls');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
function deriveEntryId(release) {
|
||||
if (release.date && release.url) {
|
||||
const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1];
|
||||
|
||||
return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`;
|
||||
return `${slugify(unprint.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`;
|
||||
}
|
||||
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
return `${slugify(unprint.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -22,7 +23,7 @@ function deriveEntryId(release) {
|
||||
|
||||
function extractPoster(posterPath, site, baseRelease) {
|
||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const poster = unprint.prefixUrl(posterPath, site.parameters?.media || site.url);
|
||||
const posterSources = [
|
||||
poster,
|
||||
// upscaled
|
||||
@@ -40,38 +41,38 @@ function extractPoster(posterPath, site, baseRelease) {
|
||||
return [baseRelease?.poster || null, []];
|
||||
}
|
||||
|
||||
function getImageWithFallbacks(q, selector, site, el) {
|
||||
function getImageWithFallbacks(query, selector, site, el) {
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
unprint.query.attribute(el, selector, 'src0_3x'),
|
||||
unprint.query.attribute(el, selector, 'src0_2x'),
|
||||
unprint.query.attribute(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
query.attribute(selector, 'src0_3x'),
|
||||
query.attribute(selector, 'src0_2x'),
|
||||
query.attribute(selector, 'src0_1x'),
|
||||
];
|
||||
|
||||
return sources.filter(Boolean).map((src) => `${site.parameters?.media || site.url}${src}`);
|
||||
return sources.filter(Boolean).map((src) => unprint.prefixUrl(src, site.parameters?.media || site.url));
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.q('h4 a', true);
|
||||
release.title = query.content('h4 a');
|
||||
release.url = query.url('a');
|
||||
|
||||
release.date = query.date('.date', 'YYYY-MM-DD');
|
||||
release.duration = query.duration('.time');
|
||||
|
||||
const count = query.number('a img', null, 'cnt');
|
||||
const count = query.number('a img', { attribute: 'cnt' });
|
||||
|
||||
[release.poster, ...release.photos] = Array.from({ length: count }, (value, index) => [
|
||||
query.img('a img', `src${index}_3x`, { origin: channel.url }),
|
||||
query.img('a img', `src${index}_2x`, { origin: channel.url }),
|
||||
query.img('a img', `src${index}_1x`, { origin: channel.url }),
|
||||
[release.poster, ...release.photos] = Array.from({ length: count }, (_value, index) => [
|
||||
query.img('a img', { attribute: `src${index}_3x`, origin: channel.url }),
|
||||
query.img('a img', { attribute: `src${index}_2x`, origin: channel.url }),
|
||||
query.img('a img', { attribute: `src${index}_1x`, origin: channel.url }),
|
||||
]);
|
||||
|
||||
release.stars = query.count('img[src*="star_full"]') + (query.count('img[src*="star_half"]') * 0.5);
|
||||
@@ -85,18 +86,18 @@ function scrapeAllT1(scenes, site, accNetworkReleases) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.q('h4 a', 'title') || query.q('h4 a', true);
|
||||
release.title = query.attribute('h4 a', 'title') || query.content('h4 a');
|
||||
release.url = query.url('h4 a');
|
||||
|
||||
release.date = query.date('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = query.dur('.more-info-div');
|
||||
release.duration = query.duration('.more-info-div');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = query.q('.img-div img', 'src0_1x') || query.img('img.video_placeholder');
|
||||
const posterPath = query.attribute('.img-div img', 'src0_1x') || query.img('img.video_placeholder');
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const poster = unprint.prefixUrl(posterPath, site.parameters?.media || site.url);
|
||||
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
@@ -117,19 +118,40 @@ function scrapeAllT1(scenes, site, accNetworkReleases) {
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, _include, { uniqueReleases = [], duplicateReleases = [] }) {
|
||||
const url = (site.parameters?.latest && format(site.parameters.latest, { page }))
|
||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||
|| `${site.url}/categories/movies_${page}_d.html`;
|
||||
|
||||
const res = await unprint.get(url, { selectAll: '.modelfeature, .item-video, .updateItem' });
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.t1) {
|
||||
return scrapeAllT1(res.context, site, [...uniqueReleases, ...duplicateReleases]);
|
||||
}
|
||||
|
||||
return scrapeAll(res.context, site, uniqueReleases);
|
||||
}
|
||||
|
||||
function scrapeScene({ html, query }, channel, url) {
|
||||
const release = { url }; // url used for entry ID
|
||||
|
||||
release.title = query.cnt('.videoDetails h3');
|
||||
release.description = query.cnt('.videoDetails p');
|
||||
release.title = query.content('.videoDetails h3, .videoDetails h1');
|
||||
release.description = query.content('.videoDetails p');
|
||||
|
||||
release.date = query.date('.videoInfo p', ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
||||
release.duration = Number(query.cnt('.videoInfo p:nth-of-type(2)')?.match(/(\d+) min/i)?.[1]) * 60;
|
||||
release.duration = Number(query.content('.videoInfo p:nth-of-type(2)')?.match(/(\d+) min/i)?.[1]) * 60;
|
||||
|
||||
release.actors = query.cnts('.update_models a');
|
||||
release.actors = query.all('.update_models a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null, { origin: channel.origin }),
|
||||
}));
|
||||
|
||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||
const poster = qu.prefixUrl(posterPath, channel.url) || query.img('.update_thumb', 'src0_1x', { origin: channel.url }); // latter used when trailer requires signup
|
||||
const poster = unprint.prefixUrl(posterPath, channel.url) || query.img('.update_thumb', 'src0_1x', { origin: channel.url }); // latter used when trailer requires signup
|
||||
|
||||
[release.poster, ...release.photos] = [poster, ...query.imgs('.item-thumb img', 'src0_1x', { origin: channel.url })]
|
||||
.map((src) => src && [
|
||||
@@ -141,10 +163,10 @@ function scrapeScene({ html, query }, channel, url) {
|
||||
const trailerPath = html.match(/\/trailers?\/.*.mp4/);
|
||||
|
||||
if (trailerPath) {
|
||||
release.trailer = qu.prefixUrl(trailerPath, channel.parameters?.media || channel.url);
|
||||
release.trailer = unprint.prefixUrl(trailerPath, channel.parameters?.media || channel.url);
|
||||
}
|
||||
|
||||
release.tags = query.cnts('.featuring a[href*="categories/"]');
|
||||
release.tags = query.contents('.featuring a[href*="categories/"]');
|
||||
release.stars = query.count('.stars img[src*="star_full"]') + (query.count('.stars img[src*="star_half"]') * 0.5);
|
||||
|
||||
release.entryId = deriveEntryId(release);
|
||||
@@ -155,29 +177,34 @@ function scrapeScene({ html, query }, channel, url) {
|
||||
function scrapeSceneT1({ html, query }, site, url, baseRelease) {
|
||||
const release = { url };
|
||||
|
||||
release.title = query.q('.trailer-section-head .section-title', true);
|
||||
release.title = query.content('.trailer-section-head .section-title');
|
||||
release.description = query.text('.row .update-info-block');
|
||||
|
||||
release.date = query.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = query.dur('.update-info-row:nth-child(2)');
|
||||
release.duration = query.duration('.update-info-row:nth-child(2)');
|
||||
|
||||
release.actors = query.all('.models-list-thumbs a').map((el) => ({
|
||||
name: query.q(el, 'span', true),
|
||||
avatar: getImageWithFallbacks(query.q, 'img', site, el),
|
||||
name: unprint.query.content(el, 'span'),
|
||||
url: unprint.query.url(el, null),
|
||||
avatar: getImageWithFallbacks(query, 'img', site, el),
|
||||
}));
|
||||
|
||||
release.tags = query.all('.tags a', true);
|
||||
release.tags = query.contents('.tags a');
|
||||
|
||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||
const posterPath = query.q('.player-thumb img', 'src0_1x');
|
||||
const posterPath = query.img('.player-thumb img', { attribute: 'src0_1x' });
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: unprint.prefixUrl(trailer, site.parameters?.media || site.url),
|
||||
referer: url,
|
||||
};
|
||||
}
|
||||
|
||||
const stars = query.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
release.stars = query.number('.update-rating');
|
||||
|
||||
if (site.type === 'network') {
|
||||
const channelRegExp = new RegExp(site.children.map((channel) => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||
@@ -188,16 +215,99 @@ function scrapeSceneT1({ html, query }, site, url, baseRelease) {
|
||||
}
|
||||
}
|
||||
|
||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfileT1({ el, query }, site) {
|
||||
const profile = {};
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const res = await unprint.get(url);
|
||||
|
||||
const bio = query.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.t1) {
|
||||
return scrapeSceneT1(res.context, site, url, baseRelease);
|
||||
}
|
||||
|
||||
return scrapeScene(res.context, site, url, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchActorScenes({ query }, channel, accScenes = []) {
|
||||
const scenes = scrapeAll(unprint.initAll(query.all('.item-video')), channel);
|
||||
const nextPage = query.url('.next a');
|
||||
|
||||
if (nextPage) {
|
||||
const res = await unprint.get(nextPage);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorScenes(res.context, channel, scenes.concat(accScenes));
|
||||
}
|
||||
}
|
||||
|
||||
return accScenes.concat(scenes);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }, url, channel, options) {
|
||||
const profile = { url };
|
||||
|
||||
const bio = query.all('.stats li').reduce((acc, bioEl) => {
|
||||
const key = unprint.query.content(bioEl, 'strong');
|
||||
const value = unprint.query.url(bioEl, null) || unprint.query.text(bioEl);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value,
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.date_of_birth) profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (bio.height) profile.height = convert(bio.height, 'cm');
|
||||
if (bio.weight) profile.weight = convert(bio.weight.match(/(\d+)\s*lb/i)?.[1], 'lb', 'kg');
|
||||
if (bio.shoe_size && !/unknown/i.test(bio.shoe_size)) profile.foot = unprint.extractNumber(bio.shoe_size);
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || convert(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1], 'cm')) || null;
|
||||
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || convert(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1], 'cm')) || null;
|
||||
if (bio.circumcised && /yes/i.test(bio.circumcised)) profile.isCircumcised = true;
|
||||
if (bio.circumcised && /no/i.test(bio.circumcised)) profile.isCircumcised = false;
|
||||
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim()).filter((alias) => !/known/i.test(alias)); // filter out "No known aliases"
|
||||
|
||||
profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
||||
|
||||
profile.avatar = [
|
||||
query.img('.profile-pic img', { attribute: 'src0_3x', origin: channel.url }),
|
||||
query.img('.profile-pic img', { attribute: 'src0_2x', origin: channel.url }),
|
||||
query.img('.profile-pic img', { attribute: 'src0_1x', origin: channel.url }),
|
||||
];
|
||||
|
||||
if (options.includeActorScenes) {
|
||||
profile.releases = await fetchActorScenes({ query }, channel);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileT1({ query }, url, site) {
|
||||
const profile = { url };
|
||||
|
||||
const bio = query.contents('.detail-div + .detail-div p, .detail-div p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
if (!value) return acc;
|
||||
@@ -215,142 +325,49 @@ function scrapeProfileT1({ el, query }, site) {
|
||||
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
|
||||
profile.avatar = getImageWithFallbacks(query.q, '.img-div img', site);
|
||||
if (heightMetric) {
|
||||
profile.height = Number(heightMetric[1]);
|
||||
}
|
||||
|
||||
const qReleases = qu.initAll(el, '.item-video');
|
||||
if (heightImperial) {
|
||||
profile.height = convert(`${heightImperial[0]}' ${heightImperial[1]}"`, 'cm');
|
||||
}
|
||||
|
||||
profile.avatar = getImageWithFallbacks(query, '.img-div img', site);
|
||||
|
||||
const qReleases = unprint.initAll(query.all('.item-video'));
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchActorScenes({ query, el }, channel, accScenes = []) {
|
||||
const scenes = scrapeAll(qu.initAll(el, '.item-video'), channel);
|
||||
const nextPage = query.url('.next a');
|
||||
|
||||
if (nextPage) {
|
||||
const res = await qu.get(nextPage);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorScenes(res.item, channel, scenes.concat(accScenes));
|
||||
}
|
||||
}
|
||||
|
||||
return accScenes.concat(scenes);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query, el }, channel, options) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.all('.stats li').reduce((acc, bioEl) => {
|
||||
const key = query.cnt(bioEl, 'strong');
|
||||
const value = query.url(bioEl) || query.text(bioEl);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value,
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.date_of_birth) profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.shoe_size) profile.foot = Number(bio.shoe_size);
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||
if (bio.circumcised && /yes/i.test(bio.circumcised)) profile.isCircumcised = true;
|
||||
if (bio.circumcised && /no/i.test(bio.circumcised)) profile.isCircumcised = false;
|
||||
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
||||
|
||||
profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
||||
|
||||
profile.avatar = [
|
||||
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
||||
query.img('.profile-pic img', 'src0_2x', { origin: channel.url }),
|
||||
query.img('.profile-pic img', 'src0_1x', { origin: channel.url }),
|
||||
];
|
||||
|
||||
if (options.includeActorScenes) {
|
||||
profile.releases = await fetchActorScenes({ query, el }, channel);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, include, { uniqueReleases = [], duplicateReleases = [] }) {
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||
|| `${site.url}/categories/movies_${page}_d.html`;
|
||||
|
||||
const res = await qu.getAll(url, '.modelfeature, .item-video, .updateItem');
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.t1) {
|
||||
return scrapeAllT1(res.items, site, [...uniqueReleases, ...duplicateReleases]);
|
||||
}
|
||||
|
||||
return scrapeAll(res.items, site, uniqueReleases);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (site.parameters?.t1) {
|
||||
return scrapeSceneT1(res.item, site, url, baseRelease);
|
||||
}
|
||||
|
||||
return scrapeScene(res.item, site, url, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, { channel }, options) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
async function fetchProfile({ name: actorName, url: actorUrl }, { channel }, options) {
|
||||
const actorSlugA = slugify(actorName, '', { lower: false });
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const t1 = channel.parameters?.t1 ? 't1/' : '';
|
||||
|
||||
const res1 = channel.parameters?.profile
|
||||
? await qu.get(util.format(channel.parameters.profile, actorSlugA))
|
||||
: await qu.get(`${channel.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false });
|
||||
|
||||
const res = (res1.ok && res1)
|
||||
|| (channel.parameters?.profile && await qu.get(util.format(channel.parameters.profile, actorSlugB)))
|
||||
|| await qu.get(`${channel.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false });
|
||||
const { res, url } = await tryUrls([
|
||||
actorUrl,
|
||||
...channel.parameters?.profile ? [
|
||||
format(channel.parameters.profile, { actor: actorSlugA }),
|
||||
format(channel.parameters.profile, { actor: actorSlugB }),
|
||||
] : [
|
||||
`${channel.url}/${t1}models/${actorSlugA}.html`,
|
||||
`${channel.url}/${t1}models/${actorSlugB}.html`,
|
||||
],
|
||||
], { followRedirects: false });
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
if (channel.parameters?.t1) {
|
||||
return scrapeProfileT1(res.item, channel);
|
||||
return scrapeProfileT1(res.context, url, channel);
|
||||
}
|
||||
|
||||
return scrapeProfile(res.item, channel, options);
|
||||
return scrapeProfile(res.context, url, channel, options);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -1,268 +1,254 @@
|
||||
'use strict';
|
||||
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a', 'href', { origin: channel.url });
|
||||
// release.entryId = new URL(release.url).pathname.match(/\/Collection\/(\d+)/)[1]; can't be matched with upcoming scenes
|
||||
|
||||
release.shootId = query.cnt('a span:nth-of-type(1)').match(/^\d+/)?.[0];
|
||||
release.entryId = release.shootId;
|
||||
|
||||
release.date = query.date('a span:nth-of-type(2)', 'YYYY-MM-DD');
|
||||
release.actors = (query.q('a img', 'alt') || query.cnt('a span:nth-of-type(1)'))?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
||||
|
||||
release.poster = release.shootId
|
||||
? `https://inthecrack.com/assets/images/posters/collections/${release.shootId}.jpg`
|
||||
: query.img('a img', 'src', { origin: channel.url });
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
const title = query.cnt('span');
|
||||
|
||||
release.entryId = title.match(/^\d+/)[0];
|
||||
release.actors = title.slice(0, title.indexOf('-')).match(/[a-zA-Z]+(\s[a-zA-Z]+)*/g);
|
||||
|
||||
const date = moment.utc(title.match(/\w+ \d+\w+$/)[0], 'MMM Do');
|
||||
|
||||
if (date.isBefore()) {
|
||||
// date is next year
|
||||
release.date = date.add(1, 'year').toDate();
|
||||
} else {
|
||||
release.date = date.toDate();
|
||||
}
|
||||
|
||||
release.poster = [
|
||||
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
||||
query.img('img', 'src', { origin: channel.url }),
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeProfileScenes(items, actorName, channel) {
|
||||
return items.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
if (slugify(query.cnt()) === 'no-other-collections') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const details = query.cnts('figure p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value?.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
release.url = query.url('a', 'href', { origin: channel.url });
|
||||
|
||||
release.shootId = details.collection.match(/\d+/)[0];
|
||||
release.entryId = release.shootId;
|
||||
|
||||
release.date = qu.parseDate(details.release_date, 'YYYY-MM-DD');
|
||||
release.actors = [actorName];
|
||||
|
||||
/* rely on clip length
|
||||
const durationString = Object.keys(details).find(info => /\d+_min_video/.test(info));
|
||||
release.duration = durationString && Number(durationString.match(/^\d+/)?.[0]) * 60;
|
||||
*/
|
||||
|
||||
release.productionLocation = details.shoot_location;
|
||||
|
||||
release.poster = [
|
||||
`https://inthecrack.com/assets/images/posters/collections/${release.entryId}.jpg`,
|
||||
query.img('img', 'src', { origin: channel.url }),
|
||||
];
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, actorName, actorAvatar, channel, releasesFromScene) {
|
||||
function scrapeProfile(model, channel, isProfile = false) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.cnts(releasesFromScene ? 'ul li' : 'div.modelInfo li').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
profile.name = model.name; // used by shallow scrape
|
||||
profile.entryId = model.id;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
profile.dateOfBirth = unprint.extractDate(model.birthdate, 'YYYY-MM-DD');
|
||||
|
||||
profile.name = actorName || bio.name;
|
||||
profile.gender = 'female';
|
||||
profile.birthPlace = bio.nationality;
|
||||
profile.height = model.height;
|
||||
profile.weight = model.weight;
|
||||
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
const ethnicity = model.ethnicity?.title || model.ethnicity;
|
||||
|
||||
profile.releases = releasesFromScene?.[profile.name] || scrapeProfileScenes(qu.initAll(query.all('.Models li')), actorName, channel);
|
||||
if (!/none/i.test(ethnicity)) {
|
||||
profile.ethnicity = ethnicity;
|
||||
}
|
||||
|
||||
// avatar is the poster of a scene, find scene and use its high quality poster instead
|
||||
const avatarRelease = profile.releases.find((release) => new URL(release.poster[1]).pathname === new URL(actorAvatar).pathname);
|
||||
profile.avatar = avatarRelease?.poster[0];
|
||||
if (model.id) {
|
||||
profile.url = `${channel.origin}/modelcollections/${model.id}`;
|
||||
}
|
||||
|
||||
if (isProfile) {
|
||||
// exclude from scene actor data, as this will trigger a metric ton of untimely location lookups
|
||||
profile.birthPlace = model.countries?.map((country) => {
|
||||
if (country.name) {
|
||||
return country.name;
|
||||
}
|
||||
|
||||
if (country.isO2 || country.iso2) { // sic
|
||||
return country.isO2 || country.iso2;
|
||||
}
|
||||
|
||||
if (typeof country === 'string') {
|
||||
return country;
|
||||
}
|
||||
|
||||
return null;
|
||||
}).filter(Boolean)[0];
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchSceneActors(entryId, _release, channel) {
|
||||
const url = `https://inthecrack.com/Collection/Biography/${entryId}`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const actorTabs = qu.initAll(res.item.query.all('#ModelTabs li')).map(({ query }) => ({
|
||||
name: query.cnt('a'),
|
||||
id: query.q('a', 'data-model'),
|
||||
}));
|
||||
|
||||
const actorReleasesByActorName = actorTabs.reduce((acc, { name, id }) => {
|
||||
const releaseEls = qu.initAll(res.item.query.all(`#Model-${id} li`));
|
||||
const releases = scrapeProfileScenes(releaseEls, name, channel);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[name]: releases,
|
||||
};
|
||||
}, {});
|
||||
|
||||
const actors = qu.initAll(res.item.query.all('.modelInfo > li')).map((item) => {
|
||||
const avatar = item.query.img('img', 'src', { origin: channel.url });
|
||||
const profile = scrapeProfile(item, null, avatar, channel, actorReleasesByActorName);
|
||||
|
||||
return profile;
|
||||
});
|
||||
|
||||
return actors;
|
||||
function mergeModels(sceneModels, models, channel) {
|
||||
if (!Array.isArray(sceneModels) || !models) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return null;
|
||||
return sceneModels.map((modelId) => {
|
||||
const model = models[modelId?.id || modelId];
|
||||
|
||||
if (!model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return scrapeProfile(model, channel);
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeScene({ query, html }, url, channel) {
|
||||
const release = {};
|
||||
function scrapeAll(scenes, channel, models = {}, isUpcoming = false) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
const entryId = new URL(url).pathname.match(/\/Collection\/(\d+)/)[1];
|
||||
release.entryId = scene.id;
|
||||
release.shootId = scene.id;
|
||||
|
||||
release.shootId = query.cnt('h2 span').match(/^\d+/)?.[0];
|
||||
release.entryId = release.shootId; // site entry ID can't be matched with upcoming scenes
|
||||
release.title = scene.title;
|
||||
release.date = unprint.extractDate(scene.releaseDate, 'YYYY-MM-DD');
|
||||
|
||||
const actors = await fetchSceneActors(entryId, release, channel);
|
||||
release.actors = actors || query.cnt('h2 span')?.match(/[a-zA-Z]+(\s[A-Za-z]+)*/g);
|
||||
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${String(scene.id).padStart(3, '0')}.jpg?w=1400`;
|
||||
|
||||
release.description = query.cnt('p#CollectionDescription');
|
||||
release.productionLocation = query.cnt('.modelCollectionHeader p')?.match(/Shoot Location: (.*)/)?.[1];
|
||||
// coming soon photo remains available after release date
|
||||
release.photos = [`https://api.inthecrack.com/FileStore/images/coming_soon/${String(scene.id).padStart(3, '0')}.jpg`];
|
||||
|
||||
release.poster = qu.prefixUrl(html.match(/background-image: url\('(.*)'\)/)?.[1], channel.url);
|
||||
|
||||
release.chapters = query.all('.ClipOuter').map((el) => {
|
||||
const chapter = {};
|
||||
|
||||
chapter.title = query.text(el, 'h4');
|
||||
chapter.description = query.cnt(el, 'p');
|
||||
chapter.duration = query.dur(el, '.InlineDuration');
|
||||
|
||||
const posterStyle = query.style(el, '.clipImage', 'background-image');
|
||||
const poster = qu.prefixUrl(posterStyle.match(/url\((.*)\)/)?.[1], channel.url);
|
||||
|
||||
if (poster) {
|
||||
const { origin, pathname } = new URL(poster);
|
||||
|
||||
chapter.poster = [
|
||||
`${origin}${pathname}`, // full size
|
||||
poster,
|
||||
];
|
||||
if (isUpcoming) {
|
||||
return release;
|
||||
}
|
||||
|
||||
if (query.exists(el, '.ThreeDInfo')) {
|
||||
chapter.tags = ['3d'];
|
||||
}
|
||||
release.url = `${channel.origin}/collection/${scene.id}`;
|
||||
|
||||
return chapter;
|
||||
release.duration = scene.clipMinutesTotal * 60 || null;
|
||||
release.actors = mergeModels(scene.models, models, channel);
|
||||
|
||||
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
|
||||
release.photoCount = scene.picTotal;
|
||||
|
||||
release.productionLocation = scene.shootLocation;
|
||||
|
||||
return release;
|
||||
});
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const year = moment().subtract(page - 1, ' year').year();
|
||||
|
||||
const url = `${channel.url}/Collections/Date/${year}`;
|
||||
const res = await qu.getAll(url, '.collectionGridLayout li');
|
||||
async function fetchLatest(channel, page, context) {
|
||||
const res = await unprint.get('https://api.inthecrack.com/Collection/');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
// API has no pagination, simulate so it doesn't blow up the rest of the guts
|
||||
return scrapeAll(res.data.slice((page - 1) * 100, page * 100), channel, context.beforeFetchLatest);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(channel) {
|
||||
const res = await qu.getAll(channel.url, '#ComingSoon li');
|
||||
const res = await unprint.get('https://api.inthecrack.com/Home/coming_soon');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeUpcoming(res.items, channel);
|
||||
// API has no pagination, simulate so it doesn't blow up the rest of the guts
|
||||
return scrapeAll(res.data, channel, null, true);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
const qualityMap = {
|
||||
// unsnure about 2 and 5
|
||||
1: 360,
|
||||
3: 720,
|
||||
4: 1080,
|
||||
6: 2160,
|
||||
};
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
function scrapeScene(scene, channel, baseRelease, models = {}) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.shootId = scene.id;
|
||||
|
||||
release.url = `${channel.origin}/collection/${scene.id}`;
|
||||
|
||||
release.title = scene.title;
|
||||
release.description = scene.description;
|
||||
|
||||
release.actors = mergeModels(scene.models, models, channel);
|
||||
|
||||
release.productionDate = unprint.extractDate(scene.shootDate, 'YYYY-MM-DD');
|
||||
release.productionLocation = scene.shootLocation;
|
||||
|
||||
release.poster = `https://api.inthecrack.com/image/resize/images/posters/collections/${String(scene.id).padStart(3, '0')}.jpg?w=1400`;
|
||||
|
||||
release.photos = scene.galleryImages
|
||||
?.filter((image) => image.imageType === 1) // type 1 and 2 are dupes as far as thumbs are concerned
|
||||
.slice(0, 15) // only first 15 photos have a free thumb
|
||||
.map((image) => image.filename && `https://api.inthecrack.com/FileStore/images/gallerysamples/${scene.id}/${image.filename}`).filter(Boolean);
|
||||
|
||||
release.chapters = scene.clips?.map((clip) => ({
|
||||
entryId: clip.id,
|
||||
title: clip.title,
|
||||
description: clip.description,
|
||||
date: unprint.extractDate(clip.releaseDate, 'YYYY-MM-DD'),
|
||||
duration: clip.length,
|
||||
// this is how the site itself renders the thumbnails, I shit you not. does not return valid image without ?w parameter
|
||||
poster: `https://api.inthecrack.com/image/resize/images/posters/clips/${clip.videos?.[0]?.filename.match(/^(.*?)(?=\d+x\d+\.mp4)/)[0]}.jpg?w=1400`,
|
||||
}));
|
||||
|
||||
release.qualities = scene.clips?.[0]?.videos?.map((video) => qualityMap[video.videoResolutionId]).filter(Boolean);
|
||||
|
||||
if (!baseRelease.date) {
|
||||
// base release has 'official' release date, deep data only has chapter dates
|
||||
// though, this is probably how they calculate the collection date, too
|
||||
release.date = release.chapters
|
||||
?.map((chapter) => chapter.date)
|
||||
.filter(Boolean)
|
||||
.toSorted((dateA, dateB) => dateA - dateB)[0];
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, channel, _include) {
|
||||
const firstLetter = actorName.charAt(0).toUpperCase();
|
||||
const url = `${channel.url}/Collections/Name/${firstLetter}`;
|
||||
const res = await qu.getAll(url, '.collectionGridLayout li');
|
||||
|
||||
if (res.ok) {
|
||||
const actorItem = res.items.find(({ query }) => slugify(query.cnt('span')) === slugify(actorName));
|
||||
|
||||
if (actorItem) {
|
||||
const actorUrl = actorItem.query.url('a', 'href', { origin: channel.url });
|
||||
const actorAvatar = actorItem.query.img('img', 'src', { origin: channel.url });
|
||||
const actorRes = await qu.get(actorUrl);
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.item, actorName, actorAvatar, channel);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
}
|
||||
async function fetchScene(url, channel, baseRelease, context) {
|
||||
const entryId = new URL(url).pathname.match(/\/collection\/(\d+)/)?.[1];
|
||||
|
||||
if (!entryId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(`https://api.inthecrack.com/Collection/${entryId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.data, channel, baseRelease, context.beforeFetchScenes);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchModels() {
|
||||
const res = await unprint.get('https://api.inthecrack.com/Model/');
|
||||
|
||||
if (res.ok) {
|
||||
try {
|
||||
const modelsById = Object.fromEntries(res.data.map((model) => [model.id, model]));
|
||||
|
||||
return modelsById;
|
||||
} catch (error) {
|
||||
// we can continue, we just won't have model names
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
async function getModelId(actor) {
|
||||
if (actor.entryId) {
|
||||
return actor.entryId;
|
||||
}
|
||||
|
||||
if (actor.url) {
|
||||
const modelId = new URL(actor.url).pathname.match(/\/modelcollection\/(\d+)/)?.[1];
|
||||
|
||||
if (modelId) {
|
||||
return modelId;
|
||||
}
|
||||
}
|
||||
|
||||
const modelsById = await fetchModels();
|
||||
const model = Object.values(modelsById).find((searchModel) => slugify(searchModel.name) === slugify(actor.name));
|
||||
|
||||
if (model) {
|
||||
return model.id;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, channel) {
|
||||
const modelId = await getModelId(actor);
|
||||
|
||||
if (!modelId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(`https://api.inthecrack.com/Model/${modelId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.data, channel, true);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
beforeFetchLatest: fetchModels,
|
||||
beforeFetchScenes: fetchModels,
|
||||
};
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const logger = require('../logger');
|
||||
const { fetchApiLatest } = require('./gamma');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function fetchActors(entryId, channel, { token, time }) {
|
||||
const url = `${channel.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map((actor) => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchTrailerLocation(entryId, channel) {
|
||||
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await http.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${channel.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatest(items, channel) {
|
||||
return items.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('h5 a', null, { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('h5 a');
|
||||
|
||||
[release.poster, ...release.photos] = query.imgs('.screenshot').map((src) => [
|
||||
// unnecessarily large
|
||||
// src.replace(/\/\d+/, 3840),
|
||||
// src.replace(/\/\d+/, '/2000'),
|
||||
src.replace(/\/\d+/, '/1500'),
|
||||
src.replace(/\/\d+/, '/1000'),
|
||||
src,
|
||||
]);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query, html }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('h1.description');
|
||||
release.actors = query
|
||||
.all('.video-performer')
|
||||
.map((actorEl) => {
|
||||
const actorUrl = query.url(actorEl, 'a', 'href', { origin: channel.url });
|
||||
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
||||
const avatar = query.img(actorEl, 'img:not([data-bgsrc*="not-available"])', 'data-bgsrc');
|
||||
|
||||
return {
|
||||
name: query.cnt(actorEl, '.video-performer-name'),
|
||||
gender: 'female',
|
||||
avatar: avatar && [
|
||||
avatar.replace(/\/actor\/(\d+)/, '/actor/500'),
|
||||
avatar,
|
||||
],
|
||||
url: actorUrl,
|
||||
entryId,
|
||||
};
|
||||
})
|
||||
.concat({ name: 'Jay Rock', gender: 'male' });
|
||||
|
||||
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = query.number('.release-date:last-child') * 60;
|
||||
|
||||
release.studio = query.cnt('.studio span:nth-child(2)');
|
||||
release.director = query.text('.director');
|
||||
|
||||
release.tags = query.cnts('.tags a');
|
||||
|
||||
const poster = html.match(/url\((https.+\.jpg)\)/)?.[1];
|
||||
const photos = query.imgs('#moreScreenshots img');
|
||||
|
||||
[release.poster, ...release.photos] = [poster]
|
||||
.concat(photos)
|
||||
.filter(Boolean)
|
||||
.map((src) => [
|
||||
src.replace(/\/(\d+)\/\d+/, '/$1/1500'),
|
||||
src.replace(/\/(\d+)\/\d+/, '/$1/1000'),
|
||||
src,
|
||||
]);
|
||||
|
||||
const videoId = html.match(/item: (\d+)/)?.[1];
|
||||
|
||||
if (videoId) {
|
||||
release.trailer = { stream: `https://trailer.adultempire.com/hls/trailer/${videoId}/master.m3u8` };
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeSceneApi(scene, channel, tokens, deep) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${channel.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map((tag) => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map((resource) => resource.url);
|
||||
|
||||
if (deep) {
|
||||
// don't make external requests during update scraping, as this would happen for every scene on the page
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, channel, tokens),
|
||||
fetchTrailerLocation(release.entryId, channel),
|
||||
]);
|
||||
|
||||
release.actors = actors;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = { src: trailer, quality: 1080 };
|
||||
}
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestApi(scenes, site, tokens) {
|
||||
return Promise.map(scenes, async (scene) => scrapeSceneApi(scene, site, tokens, false), { concurrency: 10 });
|
||||
}
|
||||
|
||||
async function fetchToken(channel) {
|
||||
const res = await http.get(channel.url);
|
||||
const html = res.body.toString();
|
||||
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
|
||||
return { time, token };
|
||||
}
|
||||
|
||||
async function fetchLatestApi(channel, page = 1) {
|
||||
const { time, token } = await fetchToken(channel);
|
||||
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${channel.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.ok && res.body.status) {
|
||||
return scrapeLatestApi(res.body.response.collection, channel, { time, token });
|
||||
}
|
||||
|
||||
return res.ok ? res.body.status : res.status;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, options, preData) {
|
||||
if (channel.parameters?.useGamma) {
|
||||
return fetchApiLatest(channel, page, preData, options, false);
|
||||
}
|
||||
|
||||
const res = await qu.getAll(`https://jayspov.net/jays-pov-updates.html?view=list&page=${page}`, '.item-grid-list-view > .grid-item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLatest(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneApi(url, channel, baseRelease) {
|
||||
const { time, token } = baseRelease?.meta.tokens || await fetchToken(channel); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
const apiUrl = `${channel.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await http.get(apiUrl);
|
||||
|
||||
if (res.ok && res.body.status) {
|
||||
return scrapeSceneApi(res.body.response.collection[0], channel, { time, token }, true);
|
||||
}
|
||||
|
||||
return res.ok ? res.body.status : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
api: {
|
||||
fetchLatest: fetchLatestApi,
|
||||
fetchScene: fetchSceneApi,
|
||||
},
|
||||
};
|
||||
@@ -125,12 +125,12 @@ async function fetchScene(sceneUrl, entity, _baseRelease, { parameters }) {
|
||||
function scrapeProfile({ query }, url) {
|
||||
const profile = { url };
|
||||
|
||||
const bio = Object.fromEntries(query.all('ul[class*="HeroModel_list"] li').map((bioEl) => [
|
||||
const bio = Object.fromEntries(query.all('ul[class*=HeroModel][class*="__list"] li').map((bioEl) => [
|
||||
slugify(unprint.query.content(bioEl, 'p:first-child'), '_'),
|
||||
unprint.query.content(bioEl, 'p:last-child'),
|
||||
]));
|
||||
|
||||
profile.description = query.content('p[class*="HeroModel_text"]');
|
||||
profile.description = query.content('p[class*=HeroModel][class*="__text"]');
|
||||
profile.birthPlace = bio.place_of_birth;
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM DD, YYYY', 'MM/DD/YYYY']);
|
||||
@@ -163,7 +163,7 @@ function scrapeProfile({ query }, url) {
|
||||
profile.piercings = profile.hasPiercings ? bio.piercings : null;
|
||||
}
|
||||
|
||||
profile.avatar = query.sourceSet('picture[class*="modelImage"] img');
|
||||
profile.avatar = query.sourceSet('picture[class*="__modelImage"] img');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ function scrapeProfile(data) {
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||
profile.age = bio.age;
|
||||
profile.placeOfBirth = bio.born;
|
||||
profile.birthPlace = bio.born;
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
|
||||
@@ -2,6 +2,71 @@
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeSceneApi(scene, channel, parameters) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.shootId = scene.serial_number;
|
||||
|
||||
release.url = `${channel.origin}${parameters.basePath || ''}/videos/${release.shootId}`;
|
||||
|
||||
release.title = scene.title;
|
||||
release.altTitles = [scene.title_cn].filter(Boolean);
|
||||
|
||||
release.description = scene.description;
|
||||
release.altDescriptions = [scene.description_cn].filter(Boolean);
|
||||
|
||||
release.date = new Date(scene.published_at);
|
||||
release.duration = scene.duration;
|
||||
|
||||
release.actors = scene.models?.map((model) => ({
|
||||
name: model.name,
|
||||
alias: [model.name_cn].filter(Boolean),
|
||||
gender: model.gender,
|
||||
entryId: model.id,
|
||||
avatar: Array.from(new Set([
|
||||
model.avatar,
|
||||
model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed
|
||||
])).filter(Boolean),
|
||||
})).filter((actor) => actor.name?.toLowerCase() === 'amateur'); // generic name for various amateur models
|
||||
|
||||
release.tags = scene.tags?.map((tag) => tag.name);
|
||||
|
||||
release.poster = scene.cover;
|
||||
release.trailer = scene.preview_video;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestApi(channel, page, { parameters }) {
|
||||
const res = await unprint.get(`${parameters.api}/videos?page=${page}&pageSize=12&sort=published_at`);
|
||||
|
||||
if (res.ok && res.data?.status) {
|
||||
return res.data.data.list.map((scene) => scrapeSceneApi(scene, channel, parameters));
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneApi(url, channel, _baseRelease, { parameters }) {
|
||||
// shallow data missing actors and tags
|
||||
const shootId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)?.[1];
|
||||
|
||||
if (!shootId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(`${parameters.api}/videos/${shootId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeSceneApi(res.data.data, channel, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
@@ -10,7 +75,7 @@ function scrapeAll(scenes) {
|
||||
|
||||
const url = query.url(null);
|
||||
|
||||
if (url) {
|
||||
if (url && !url.includes('/plans')) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
|
||||
release.url = `${origin}${pathname}`;
|
||||
@@ -63,8 +128,91 @@ function scrapeAll(scenes) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
function scrapeProfileApi(model, channel, parameters) {
|
||||
const profile = {};
|
||||
|
||||
if (model.name?.toLowerCase() === 'amateur') {
|
||||
return null; // generic profile for various amateur models
|
||||
}
|
||||
|
||||
profile.entryId = model.id;
|
||||
profile.url = `${channel.origin}${parameters.basePath || ''}/models/${model.id}`;
|
||||
|
||||
profile.description = model.description || null;
|
||||
|
||||
profile.gender = model.gender;
|
||||
profile.alias = [model.name_cn].filter(Boolean);
|
||||
|
||||
if (!model.birth_day?.includes('0001')) {
|
||||
profile.dateOfBirth = unprint.extractDate(model.birth_day, 'YYYY-MM-DD');
|
||||
}
|
||||
|
||||
profile.birthPlace = model.birth_place || null;
|
||||
|
||||
profile.height = model.height_cm || null;
|
||||
profile.weight = model.weight_kg || null;
|
||||
|
||||
profile.bust = model.measurements_chest;
|
||||
profile.waist = model.measurements_waist;
|
||||
profile.hip = model.measurements_hips;
|
||||
|
||||
profile.avatar = Array.from(new Set([
|
||||
model.avatar,
|
||||
model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed
|
||||
])).filter(Boolean);
|
||||
|
||||
profile.socials = model.socialmedia;
|
||||
|
||||
profile.scenes = model.videos.map((scene) => scrapeSceneApi(scene, channel, parameters));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getModelId(actor, parameters) {
|
||||
if (actor.url) {
|
||||
const modelId = new URL(actor.url).pathname.match(/\/models\/\d+/)?.[1];
|
||||
|
||||
if (modelId) {
|
||||
return Number(modelId);
|
||||
}
|
||||
}
|
||||
|
||||
const res = await unprint.get(`${parameters.api}/search?keyword=${slugify(actor.name, '+')}`);
|
||||
|
||||
if (res.ok) {
|
||||
const model = res.data.data?.models?.find((modelResult) => slugify(modelResult.name) === actor.slug);
|
||||
|
||||
if (model) {
|
||||
return model.id;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfileApi(actor, { entity, parameters }) {
|
||||
const modelId = await getModelId(actor, parameters);
|
||||
|
||||
if (modelId) {
|
||||
const res = await unprint.get(`${parameters.api}/models/${modelId}`);
|
||||
|
||||
if (res.ok && res.data.data) {
|
||||
return scrapeProfileApi(res.data.data, entity, parameters);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getBioXPath(field) {
|
||||
return [
|
||||
`//span[text()="${field}"]/following-sibling::span`,
|
||||
`//span[text()="${field}"]/following-sibling::text()`,
|
||||
];
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url) {
|
||||
const profile = { url };
|
||||
const avatar = query.img('div[class*="prof-pic"] > img');
|
||||
|
||||
if (avatar) {
|
||||
@@ -75,12 +223,13 @@ function scrapeProfile({ query }) {
|
||||
}
|
||||
|
||||
profile.description = query.content('h2') || null;
|
||||
profile.height = query.number('//span[text()="Height"]/following-sibling::span', { match: /(\d+) cm/, matchIndex: 1 });
|
||||
profile.weight = query.number('//span[text()="Weight"]/following-sibling::span', { match: /(\d+) kg/, matchIndex: 1 });
|
||||
|
||||
// can't find a single profile wiht this information available, but add for good measure
|
||||
profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::span');
|
||||
profile.birthPlace = query.number('//span[text()="Birth Place"]/following-sibling::span');
|
||||
// ::node()[self::span or self::text()] not supported by unprint/JSDOM
|
||||
profile.height = query.number(getBioXPath('Height'), { match: /(\d+) cm/, matchIndex: 1 }) || null;
|
||||
profile.weight = query.number(getBioXPath('Weight'), { match: /(\d+) kg/, matchIndex: 1 }) || null;
|
||||
|
||||
profile.measurements = query.content(getBioXPath('Measurements')) || null;
|
||||
profile.birthPlace = query.content(getBioXPath('Birth Place')) || null;
|
||||
|
||||
profile.banner = query.img('div[class*="banner"] > img');
|
||||
profile.photos = query.imgs('#MusModelSwiper img');
|
||||
@@ -88,7 +237,7 @@ function scrapeProfile({ query }) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function getCookie(channel) {
|
||||
async function getCookie(channel, _parameters) {
|
||||
const tokenRes = await unprint.get(channel.url);
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
@@ -116,8 +265,8 @@ async function getCookie(channel) {
|
||||
return cookie;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const cookie = await getCookie(channel);
|
||||
async function fetchLatest(channel, page, context) {
|
||||
const cookie = await getCookie(channel, context.parameters);
|
||||
|
||||
const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, {
|
||||
selectAll: '.row a[video-id]',
|
||||
@@ -136,7 +285,7 @@ async function fetchLatest(channel, page) {
|
||||
// deep pages are paywalled
|
||||
|
||||
async function searchProfile(actor, context, cookie) {
|
||||
const searchRes = await unprint.get(`${context.channel.url}/livesearch?keyword=${actor.name}`, {
|
||||
const searchRes = await unprint.get(`${context.channel.url}${context.parameters.searchPath || '/livesearch'}?${context.parameters.searchParameter || 'keyword'}=${actor.name}`, {
|
||||
headers: {
|
||||
cookie,
|
||||
},
|
||||
@@ -150,7 +299,7 @@ async function searchProfile(actor, context, cookie) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, context) {
|
||||
const cookie = await getCookie(context.entity);
|
||||
const cookie = await getCookie(context.entity, context.parameters);
|
||||
const actorUrl = actor.url || await searchProfile(actor, context, cookie);
|
||||
|
||||
if (!actorUrl) {
|
||||
@@ -173,4 +322,9 @@ async function fetchProfile(actor, context) {
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
api: {
|
||||
fetchLatest: fetchLatestApi,
|
||||
fetchScene: fetchSceneApi,
|
||||
fetchProfile: fetchProfileApi,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -196,7 +196,7 @@ async function findModel(actor, entity) {
|
||||
const modelEl = resModels.context.query.all('.content-grid-item').find((el) => slugify(unprint.query.content(el, 'a.title')) === slugify(actor.name));
|
||||
|
||||
if (modelEl) {
|
||||
const modelUrl = `${origin}${unprint.query.url(modelEl, 'a.title')}`;
|
||||
const modelUrl = unprint.query.url(modelEl, 'a.title', { origin: entity.origin });
|
||||
const modelAvatar = unprint.query.sourceSet(modelEl, 'a picture img', 'data-srcset');
|
||||
|
||||
return {
|
||||
|
||||
@@ -99,7 +99,7 @@ async function scrapeProfile({ query }, url, include) {
|
||||
`);
|
||||
|
||||
profile.nationality = bio.nationality;
|
||||
profile.placeOfBirth = bio.birth_place;
|
||||
profile.birthPlace = bio.birth_place;
|
||||
profile.age = unprint.extractNumber(bio.age);
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.birth_date, 'MMM D, YYYY');
|
||||
|
||||
@@ -1,31 +1,22 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
async function scrapeProfile({ query }, _url) {
|
||||
const profile = {};
|
||||
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), (el) => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const entries = query.contents('.infoPiece').map((content) => content.split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [slugify(key, '_')]: value.trim() } : acc), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
profile.description = query.content('div[itemprop="description"]') || query.content('.longBio');
|
||||
|
||||
if (bio.gender) profile.gender = bio.gender;
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.birthday && !/-0001/.test(bio.birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.born) profile.birthdate = moment.utc(bio.born, 'YYYY-MM-DD').toDate();
|
||||
if (bio.birthday && !/-0001/.test(bio.birthday)) profile.dateOfBirth = unprint.extractDate(bio.Birthday, 'MMM D, YYYY'); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.born) profile.dateOfBirth = unprint.extractDate(bio.born, 'YYYY-MM-DD');
|
||||
|
||||
profile.birthPlace = bio.birth_place || bio.birthplace;
|
||||
profile.residencePlace = bio.city_and_country;
|
||||
@@ -33,46 +24,33 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
if (bio.measurements && bio.measurements !== '--') profile.measurements = bio.measurements;
|
||||
if (bio.fake_boobs) profile.naturalBoobs = bio.fake_boobs.toLowerCase() === 'no';
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.height) profile.height = unprint.extractNumber(bio.height, { match: /\((\d+)\s*cm\)/, matchIndex: 1 });
|
||||
if (bio.weight) profile.weight = unprint.extractNumber(bio.weight, { match: /\((\d+)\s*kg\)/, matchIndex: 1 });
|
||||
if (bio.hair_color) profile.hairColor = bio.hair_color;
|
||||
if (bio.eyes) profile.eyeColor = bio.eye_color;
|
||||
if (bio.piercings) profile.hasPiercings = bio.piercings.toLowerCase() === 'yes';
|
||||
if (bio.tattoos) profile.hasTattoos = bio.tattoos.toLowerCase() === 'yes';
|
||||
|
||||
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), (el) => el.href).filter((link) => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
|
||||
const avatar = query.img('#getAvatar') || query.img('.thumbImage img');
|
||||
|
||||
if (avatar && !/default\//.test(avatar)) {
|
||||
profile.avatar = avatar;
|
||||
}
|
||||
|
||||
profile.socials = query.urls('.socialList a').filter((link) => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
async function fetchProfile(actor) {
|
||||
const url = `https://www.pornhub.com/pornstar/${actor.slug}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
|
||||
const [modelRes, pornstarRes] = await Promise.all([
|
||||
http.get(modelUrl),
|
||||
http.get(pornstarUrl),
|
||||
]);
|
||||
|
||||
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
|
||||
const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
|
||||
if (model && pornstar) {
|
||||
return {
|
||||
...model,
|
||||
...pornstar,
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await http.get(pornstarUrl);
|
||||
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
return scrapeProfile(res.context, url);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -5,7 +5,7 @@ const angelogodshackoriginal = require('./angelogodshackoriginal');
|
||||
// const archangel = require('./archangel');
|
||||
const assylum = require('./assylum');
|
||||
const americanpornstar = require('./americanpornstar');
|
||||
const amnesiac = require('./amnesiac');
|
||||
const hentaied = require('./hentaied');
|
||||
const aziani = require('./aziani');
|
||||
const badoink = require('./badoink');
|
||||
const bamvisions = require('./bamvisions');
|
||||
@@ -19,7 +19,6 @@ const czechav = require('./czechav');
|
||||
const modelmedia = require('./modelmedia');
|
||||
const dorcel = require('./dorcel');
|
||||
const fabulouscash = require('./fabulouscash');
|
||||
// const famedigital = require('./famedigital');
|
||||
const firstanalquest = require('./firstanalquest');
|
||||
const elevatedx = require('./elevatedx');
|
||||
const exploitedx = require('./exploitedx');
|
||||
@@ -31,7 +30,6 @@ const hush = require('./hush');
|
||||
const innofsin = require('./innofsin');
|
||||
const insex = require('./insex');
|
||||
const inthecrack = require('./inthecrack');
|
||||
const jayrock = require('./jayrock');
|
||||
const jesseloadsmonsterfacials = require('./jesseloadsmonsterfacials');
|
||||
const julesjordan = require('./julesjordan');
|
||||
const karups = require('./karups');
|
||||
@@ -62,6 +60,7 @@ const radical = require('./radical');
|
||||
const rickysroom = require('./rickysroom');
|
||||
const sexlikereal = require('./sexlikereal');
|
||||
const score = require('./score');
|
||||
const sicflics = require('./sicflics');
|
||||
const snowvalley = require('./snowvalley');
|
||||
const spizoo = require('./spizoo');
|
||||
const teamskeet = require('./teamskeet');
|
||||
@@ -73,9 +72,7 @@ const tokyohot = require('./tokyohot');
|
||||
// const topwebmodels = require('./topwebmodels');
|
||||
const traxxx = require('./traxxx');
|
||||
const virtualtaboo = require('./virtualtaboo');
|
||||
const vivid = require('./vivid');
|
||||
const vixen = require('./vixen');
|
||||
const vogov = require('./vogov');
|
||||
const wankzvr = require('./wankzvr');
|
||||
const whalemember = require('./whalemember');
|
||||
// const xempire = require('./xempire');
|
||||
@@ -105,9 +102,9 @@ module.exports = {
|
||||
amateurallure: julesjordan,
|
||||
americanpornstar,
|
||||
amateureuro: porndoe,
|
||||
amnesiac,
|
||||
hentaied,
|
||||
angelogodshackoriginal,
|
||||
asiam: modelmedia,
|
||||
modelmediaasia: modelmedia,
|
||||
assylum,
|
||||
aziani,
|
||||
badoink,
|
||||
@@ -121,7 +118,7 @@ module.exports = {
|
||||
cumlouder,
|
||||
czechav,
|
||||
pornworld,
|
||||
// delphine: modelmedia,
|
||||
delphine: modelmedia,
|
||||
dorcel,
|
||||
elegantangel: adultempire,
|
||||
exploitedx,
|
||||
@@ -138,7 +135,6 @@ module.exports = {
|
||||
insex,
|
||||
interracialpass: hush,
|
||||
inthecrack,
|
||||
jayrock,
|
||||
jerkaoke: modelmedia,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
@@ -167,6 +163,7 @@ module.exports = {
|
||||
rickysroom,
|
||||
score,
|
||||
sexlikereal,
|
||||
sicflics,
|
||||
snowvalley,
|
||||
spizoo,
|
||||
swallowsalon: julesjordan,
|
||||
@@ -179,9 +176,7 @@ module.exports = {
|
||||
traxxx,
|
||||
vipsexvault: porndoe,
|
||||
virtualtaboo,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
wankzvr,
|
||||
westcoastproductions: adultempire,
|
||||
whalemember,
|
||||
|
||||
@@ -164,7 +164,8 @@ function scrapeScene({ query }, url) {
|
||||
release.tags = query.contents('.p-desc a[href*="tag/"], .desc a[href*="tag/"]');
|
||||
|
||||
const style = query.content('.vp style');
|
||||
const poster = query.img('#videos_page-page .item-img img') || style?.match(/background-image: url\('(http[\w.:/_-]+)'\);/)?.[1];
|
||||
// const poster = query.img('#videos_page-page .item-img img') || style?.match(/background-image: url\('(http[\w.:/_-]+)'\);/)?.[1];
|
||||
const poster = style?.match(/background-image: url\('(http[\w.:/_-]+)'\);/)?.[1];
|
||||
const fallbackPoster = resizeSrc(query.img('meta[itemprop="image"]', { attribute: 'content' })); // usually a different image
|
||||
|
||||
const photos = query.all('.gallery .thumb').map((imgEl) => {
|
||||
@@ -174,9 +175,9 @@ function scrapeScene({ query }, url) {
|
||||
|
||||
return Array.from(new Set([
|
||||
...isJoin ? [] : [link],
|
||||
img.replace('_tn', ''),
|
||||
img?.replace('_tn', ''),
|
||||
img,
|
||||
]));
|
||||
])).filter(Boolean);
|
||||
});
|
||||
|
||||
if (poster) {
|
||||
|
||||
137
src/scrapers/sicflics.js
Normal file
137
src/scrapers/sicflics.js
Normal file
@@ -0,0 +1,137 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeLatest(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = query.dataset('a[data-movie]', 'movie');
|
||||
release.shootId = release.entryId;
|
||||
|
||||
release.title = query.content('.vidtitle p:first-child');
|
||||
release.description = query.text('.collapse p');
|
||||
|
||||
release.date = query.date('.vidtitle p:last-child', 'MMMM Do, YYYY');
|
||||
release.poster = query.img('.vidthumb img');
|
||||
|
||||
release.forceDeep = true;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await unprint.get(`https://www.sicflics.com/cinema/12-chronological-order/page${page}.html`, { selectAll: '.update-wrap' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLatest(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeLatestMobile(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a[href*="movie/"]');
|
||||
|
||||
release.entryId = new URL(release.url).pathname.match(/\/movie\/c\/(\d+)/)?.[1];
|
||||
release.shootId = release.entryId;
|
||||
|
||||
release.title = query.content('.titlebg h4');
|
||||
release.date = query.date('.date span', 'MMMM Do, YYYY');
|
||||
|
||||
release.poster = query.img('a img.thumb');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatestMobile(channel, page) {
|
||||
const res = await unprint.get(`https://m.sicflics.com/cinema/12-chronological-order/page${page}.html`, { selectAll: '.episode-box' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLatestMobile(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, entryId, parameters) {
|
||||
const release = { entryId };
|
||||
|
||||
release.title = query.content('h4');
|
||||
release.date = query.date('span[title="Date Added"]', 'MMMM Do, YYYY');
|
||||
|
||||
const tags = query.contents('.vidwrap a[href="#"]').map((tag) => tag.replace('#', ''));
|
||||
|
||||
if (parameters?.actorTags) {
|
||||
release.actors = tags.map((tag) => parameters.actorTags[slugify(tag, '_')]).filter(Boolean);
|
||||
release.tags = tags.filter((tag) => !parameters.actorTags[slugify(tag, '_')]);
|
||||
} else {
|
||||
release.tags = tags;
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchScene(_url, _channel, baseRelease, { parameters }) {
|
||||
if (!baseRelease?.entryId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(`https://www2.sicflics.com/v6/v6.pop.php?id=${baseRelease.entryId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.context, baseRelease.entryId, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeSceneMobile({ query }, url, parameters) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/movie\/c\/(\d+)/)?.[1];
|
||||
|
||||
release.title = query.content('.single-title');
|
||||
release.description = query.text('.moviedesc');
|
||||
|
||||
release.date = query.date('//strong[contains(text(), "Uploaded on")]/following-sibling::text()', 'MMMM Do, YYYY');
|
||||
|
||||
const tags = query.contents('.player-block a[href*="/tag"]').map((tag) => tag.replace('#', ''));
|
||||
|
||||
if (parameters?.actorTags) {
|
||||
release.actors = tags.map((tag) => parameters.actorTags[slugify(tag, '_')]).filter(Boolean);
|
||||
release.tags = tags.filter((tag) => !parameters.actorTags[slugify(tag, '_')]);
|
||||
} else {
|
||||
release.tags = tags;
|
||||
}
|
||||
|
||||
release.poster = query.img('.thumb-pad img');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchSceneMobile(url, _channel, _baseRelease, { parameters }) {
|
||||
const res = await unprint.get(url, { select: '.player-block' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeSceneMobile(res.context, url, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
mobile: {
|
||||
fetchLatest: fetchLatestMobile,
|
||||
fetchScene: fetchSceneMobile,
|
||||
},
|
||||
};
|
||||
@@ -41,20 +41,22 @@ async function fetchTrailerUrl(videoId, entity) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, channel, parameters, includeTrailers) {
|
||||
async function scrapeScene(scene, channel, _parameters, includeTrailers) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
// release.entryId = scene.id; // legacy
|
||||
release.entryId = scene.itemId;
|
||||
|
||||
release.url = `${channel.type === 'network' || channel.parameters?.layout === 'organic' ? channel.url : channel.parent.url}/movies/${release.entryId}`;
|
||||
|
||||
release.title = scene.title;
|
||||
release.description = scene.description;
|
||||
release.date = unprint.extractDate(scene.publishedDate, 'YYYY-MM-DD');
|
||||
|
||||
// release.actors = scene.models?.map((model) => model.modelName) || [];
|
||||
release.actors = scene.models?.map((model) => ({
|
||||
name: model.modelName || model.name || model.title,
|
||||
avatar: model.img || (parameters.avatars && `${parameters.avatars}/${slugify(model.modelName || model.name || model.title, '_')}.jpg`),
|
||||
avatar: model.img,
|
||||
gender: model.gender,
|
||||
url: `${channel.url}/models/${model.modelId || model.id}`,
|
||||
}));
|
||||
|
||||
@@ -90,38 +92,12 @@ function scrapeAll(scenes, channel, parameters) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
const res = await http.get(`https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-videoscontent`}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
|
||||
// url: 'https://www.pervz.com/series/pervz-features',
|
||||
const seriesId = parameters.id || new URL(channel.url).pathname.match(/\/series\/([a-z-]+)/)?.[1];
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.body.hits.hits.map(({ _source: scene }) => scene), channel, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestOrganic(channel, page, context) {
|
||||
const res = await http.get(`https://store.psmcdn.net/${context.parameters.endpoint}/newestMovies/items.json?orderBy="$key"&startAt="${context.cursor || 'aaaaaaaa'}"&limitToFirst=100`);
|
||||
|
||||
if (res.ok) {
|
||||
const scenes = scrapeAll(Object.values(res.body), channel, context.parameters);
|
||||
|
||||
return {
|
||||
// cursor implies page > 1 and first scene is last scene on previous page,
|
||||
// it probably won't trip up the pagination logic, but avoid the duplicate anyway
|
||||
scenes: context.cursor ? scenes.slice(1) : scenes,
|
||||
context: {
|
||||
cursor: Object.keys(res.body).at(-1), // official page seems to derive cursor from last scene, too
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestSearch(channel, page = 1, { parameters }) {
|
||||
const url = parameters.id
|
||||
? `https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?q=(site.seo.seoSlug:"${parameters.id}" AND type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`
|
||||
: `https://tours-store.psmcdn.net/${parameters.fullEndpoint || parameters.endpoint}/_search?sort=publishedDate:desc&q=(type:video AND isXSeries:false)&size=30&from=${(page - 1) * 30}`;
|
||||
const url = seriesId
|
||||
? `https://tours-store.psmcdn.net/${parameters.endpoint}/_search?q=(site.seo.seoSlug:"${seriesId}" AND type:video)&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`
|
||||
: `https://tours-store.psmcdn.net/${parameters.endpoint}/_search?sort=publishedDate:desc&q=(type:video AND isXSeries:false)&size=30&from=${(page - 1) * 30}`;
|
||||
|
||||
const res = await http.get(url);
|
||||
|
||||
@@ -133,7 +109,7 @@ async function fetchLatestSearch(channel, page = 1, { parameters }) {
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel, baseScene, { parameters, includeTrailers }) {
|
||||
if (parameters.layout !== 'organic' && baseScene?.entryId && !includeTrailers) {
|
||||
if (baseScene?.entryId && !includeTrailers) {
|
||||
// overview and deep data is the same in elastic API, don't hit server unnecessarily
|
||||
return baseScene;
|
||||
}
|
||||
@@ -173,6 +149,7 @@ async function scrapeProfile(actor, entity, parameters) {
|
||||
profile.url = `${entity.url}/models/${actor.id}`;
|
||||
|
||||
profile.description = actor.modelBio;
|
||||
profile.gender = actor.gender;
|
||||
|
||||
if (actor.bio.about && !/\band\b/.test(actor.bio.about)) {
|
||||
const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => {
|
||||
@@ -233,15 +210,11 @@ async function scrapeProfile(actor, entity, parameters) {
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, { entity, parameters }) {
|
||||
// const url = format(parameters.profiles, { slug: baseActor.slug });
|
||||
const url = parameters.layout === 'organic'
|
||||
? `https://store.psmcdn.net/${parameters.endpoint}/modelsContent/${baseActor.slug}.json`
|
||||
: `https://tours-store.psmcdn.net/${parameters.fullEndpoint || `${parameters.endpoint}-modelscontent`}/_doc/${parameters.modelPrefix || ''}${baseActor.slug}`;
|
||||
|
||||
const url = `https://tours-store.psmcdn.net/${parameters.endpoint}/_doc/model_${baseActor.slug}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok && res.data) {
|
||||
return scrapeProfile(parameters.layout === 'organic' ? res.data : res.data._source || res.body, entity, parameters);
|
||||
return scrapeProfile(res.data._source || res.body, entity, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -251,12 +224,4 @@ module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
organic: {
|
||||
fetchLatest: fetchLatestOrganic,
|
||||
fetchScene,
|
||||
},
|
||||
search: {
|
||||
fetchLatest: fetchLatestSearch,
|
||||
fetchScene,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -12,6 +12,9 @@ function scrapeAll(scenes, channel) {
|
||||
release.url = query.url('.thumb__title-link');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/trailers\/(.+).html/)?.[1].toLowerCase();
|
||||
|
||||
// channel origin doesn't always match the site we're fetching the assets from
|
||||
const { origin } = new URL(release.url);
|
||||
|
||||
release.title = query.content('.thumb__title-link');
|
||||
release.date = query.date('time', unprint.dateConstants.DATETIME_LOCAL_SECONDS, { attribute: 'datetime' }) || query.date('time', 'MMMM D, YYYY');
|
||||
|
||||
@@ -20,14 +23,14 @@ function scrapeAll(scenes, channel) {
|
||||
url: unprint.query.url(el, null),
|
||||
}));
|
||||
|
||||
release.poster = query.img('.thumb__picture img');
|
||||
release.poster = query.sourceSet('.thumb__picture img', 'srcset', { origin });
|
||||
|
||||
[release.poster, ...release.photos] = Object
|
||||
release.photos = Object
|
||||
.entries(query.el('.thumb__image').dataset)
|
||||
.filter(([key]) => /^src/.test(key))
|
||||
.map(([, value]) => [value.replace('-1x', '-2x'), value].map((path) => unprint.prefixUrl(path, network.url)));
|
||||
.filter(([key]) => key.includes('srcset-'))
|
||||
.map(([_key, value]) => unprint.extractSourceSet(value, { origin }));
|
||||
|
||||
release.teaser = query.video('.thumb__video', { origin: network.url });
|
||||
release.teaser = query.video('.thumb__video', { origin });
|
||||
|
||||
release.likes = query.number('.thumb__rating-value');
|
||||
|
||||
@@ -41,8 +44,21 @@ function scrapeAll(scenes, channel) {
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
||||
const url = `${channel.type === 'network' || parameters.native ? channel.url : channel.parent.url}/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.video-list .thumb' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, context) {
|
||||
const release = {};
|
||||
const { origin } = new URL(context.url);
|
||||
// channel origin doesn't always match the site we're fetching the assets from
|
||||
|
||||
release.entryId = new URL(context.url).pathname.match(/\/trailers\/(.+).html/)[1].toLowerCase();
|
||||
|
||||
@@ -59,9 +75,9 @@ function scrapeScene({ query }, context) {
|
||||
|
||||
release.tags = query.contents('.video-tag-link');
|
||||
|
||||
const poster = query.img('.video-cover__image');
|
||||
const poster = query.sourceSet('.video-cover__image', 'srcset', { origin });
|
||||
|
||||
if (!poster.includes('error')) {
|
||||
if (!poster?.includes('error')) {
|
||||
release.poster = poster;
|
||||
}
|
||||
|
||||
@@ -87,23 +103,14 @@ function scrapeProfile({ query }, url, entity) {
|
||||
profile.hairColor = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
|
||||
profile.avatar = query.img('.model-profile-image-picture source', { origin: entity.url, attribute: 'srcset' }) || query.img('.model-profile-image-picture img', { origin: entity.url });
|
||||
profile.avatar = query.img('.model-profile-image-picture source', { origin: entity.origin, attribute: 'srcset' })
|
||||
|| query.img('.model-profile-image-picture img', { origin: entity.origin });
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.video-list .thumb')), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.type === 'network' ? channel.url : channel.parent.url}/search.php?site[]=${channel.parameters.siteId}&page=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.video-list .thumb' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, entity, include) {
|
||||
const url = actor.url || `${entity.url}/models/${slugify(actor.name)}.html`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable no-unused-vars */
|
||||
const { get, ed } = require('../utils/q');
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeLatestNative(scenes, site) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
|
||||
release.title = scene.name;
|
||||
release.date = ed(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
|
||||
release.actors = scene.cast?.map((actor) => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeSceneNative({ html, q, qa }, url, _site) {
|
||||
const release = { url };
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
|
||||
const dateString = qa('h5').find((el) => /Released/.test(el.textContent)).textContent;
|
||||
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = qa('h5').find((el) => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestNative(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await http.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcomingNative(site) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchSceneNative(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneWrapper(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||
const searchRes = await http.get(searchUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
|
||||
const sceneMatch = searchRes.body.responseData.find((item) => slugify(item.name) === slugify(scene.title));
|
||||
|
||||
if (sceneMatch) {
|
||||
return {
|
||||
...scene,
|
||||
url: `${site.url}${sceneMatch.url}`,
|
||||
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scene;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
@@ -1,12 +1,9 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const moment = require('moment');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
@@ -28,6 +25,7 @@ function getAvatarFallbacks(avatar) {
|
||||
src: source,
|
||||
expectType: {
|
||||
'binary/octet-stream': 'image/jpeg',
|
||||
'application/octet-stream': 'image/jpeg',
|
||||
},
|
||||
}));
|
||||
}
|
||||
@@ -45,6 +43,7 @@ function curateSources(sources, type = 'image/jpeg') {
|
||||
type: source.type || type,
|
||||
expectType: {
|
||||
'binary/octet-stream': type,
|
||||
'application/octet-stream': type,
|
||||
},
|
||||
}))
|
||||
.sort((resA, resB) => (resB.width * resB.height) - (resA.width * resA.height)) // number of pixels
|
||||
@@ -60,7 +59,7 @@ function scrapeAll(scenes, channel) {
|
||||
release.url = `${channel.url}/videos/${data.slug}`;
|
||||
release.title = data.title;
|
||||
|
||||
release.date = qu.extractDate(data.releaseDate);
|
||||
release.date = unprint.extractDate(data.releaseDate);
|
||||
release.actors = (data.modelsSlugged || data.models)?.map((model) => ({
|
||||
name: model.name,
|
||||
url: model.slugged && `${channel.url}/models/${model.slugged}`,
|
||||
@@ -69,12 +68,27 @@ function scrapeAll(scenes, channel) {
|
||||
release.poster = curateSources(data.images.listing);
|
||||
release.teaser = curateSources(data.previews.listing, 'video/mp4');
|
||||
|
||||
release.stars = data.rating;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/videos?page=${page}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const data = res.context.query.json('#__NEXT_DATA__');
|
||||
|
||||
if (data?.props.pageProps.edges) {
|
||||
return scrapeAll(data.props.pageProps.edges.map((edge) => edge.node), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scenes, site) {
|
||||
return scenes.map((scene) => {
|
||||
if (!scene || scene.isPreReleasePeriod) {
|
||||
@@ -91,7 +105,7 @@ function scrapeUpcoming(scenes, site) {
|
||||
.map((component) => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.date = unprint.extractDate(scene.releaseDate);
|
||||
release.datePrecision = 'minute';
|
||||
|
||||
release.actors = scene.models.map((model) => model.name);
|
||||
@@ -103,8 +117,97 @@ function scrapeUpcoming(scenes, site) {
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(channel) {
|
||||
const query = `
|
||||
query getNextScene($site: Site!) {
|
||||
nextScene: findNextReleaseVideo(input: { site: $site }) {
|
||||
videoId
|
||||
slug
|
||||
isPreReleasePeriod
|
||||
releaseDate
|
||||
models {
|
||||
name
|
||||
__typename
|
||||
}
|
||||
images {
|
||||
countdown {
|
||||
...ImageInfo
|
||||
__typename
|
||||
}
|
||||
poster {
|
||||
...ImageInfo
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
previews {
|
||||
countdown {
|
||||
...PreviewInfo
|
||||
__typename
|
||||
}
|
||||
poster {
|
||||
...PreviewInfo
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment ImageInfo on Image {
|
||||
src
|
||||
placeholder
|
||||
width
|
||||
height
|
||||
highdpi {
|
||||
double
|
||||
triple
|
||||
__typename
|
||||
}
|
||||
webp {
|
||||
src
|
||||
placeholder
|
||||
highdpi {
|
||||
double
|
||||
triple
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment PreviewInfo on Preview {
|
||||
src
|
||||
width
|
||||
height
|
||||
type
|
||||
}
|
||||
`;
|
||||
|
||||
const res = await unprint.post(`${channel.url}/graphql`, {
|
||||
operationName: 'getNextScene',
|
||||
query,
|
||||
variables: {
|
||||
site: channel.slug.toUpperCase(),
|
||||
},
|
||||
}, {
|
||||
interface: 'request',
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
if (res.data.data.nextScene) {
|
||||
return scrapeUpcoming(res.data.data.nextScene, channel);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function getTrailer(videoId, channel, url) {
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
const res = await unprint.post(`${channel.url}/graphql`, {
|
||||
operationName: 'getToken',
|
||||
variables: {
|
||||
videoId,
|
||||
@@ -158,36 +261,37 @@ async function getTrailer(videoId, channel, url) {
|
||||
}
|
||||
`,
|
||||
}, {
|
||||
interface: 'request',
|
||||
headers: {
|
||||
referer: url,
|
||||
origin: channel.url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.body.data?.generateVideoToken) {
|
||||
if (res.ok && res.data.data?.generateVideoToken) {
|
||||
return [
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p2160?.token,
|
||||
src: res.data.data.generateVideoToken.p2160?.token,
|
||||
quality: 2160,
|
||||
},
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p1080?.token,
|
||||
src: res.data.data.generateVideoToken.p1080?.token,
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p720?.token,
|
||||
src: res.data.data.generateVideoToken.p720?.token,
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p480?.token,
|
||||
src: res.data.data.generateVideoToken.p480?.token,
|
||||
quality: 480,
|
||||
},
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p360?.token,
|
||||
src: res.data.data.generateVideoToken.p360?.token,
|
||||
quality: 360,
|
||||
},
|
||||
{
|
||||
src: res.body.data.generateVideoToken.p270?.token,
|
||||
src: res.data.data.generateVideoToken.p270?.token,
|
||||
quality: 270,
|
||||
},
|
||||
];
|
||||
@@ -204,12 +308,11 @@ async function scrapeScene(data, url, channel, options) {
|
||||
description: data.video.description,
|
||||
actors: data.video.models,
|
||||
director: data.video.directorNames,
|
||||
duration: qu.durationToSeconds(data.video.runLength),
|
||||
stars: data.video.rating,
|
||||
duration: unprint.extractDuration(data.video.runLength),
|
||||
};
|
||||
|
||||
release.entryId = data.video.newId;
|
||||
release.date = qu.extractDate(data.video.releaseDate);
|
||||
release.date = unprint.extractDate(data.video.releaseDate);
|
||||
|
||||
release.actors = data.video.modelsSlugged.map((model) => ({
|
||||
name: model.name,
|
||||
@@ -266,7 +369,6 @@ async function scrapeSceneData(data, channel, options) {
|
||||
}));
|
||||
|
||||
release.channel = data.site;
|
||||
release.stars = data.rating;
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -359,7 +461,7 @@ async function fetchGraphqlScene(release, channel) {
|
||||
}
|
||||
`;
|
||||
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
const res = await unprint.post(`${channel.url}/graphql`, {
|
||||
operationName: 'searchVideos',
|
||||
variables: {
|
||||
videoId: entryId,
|
||||
@@ -378,6 +480,7 @@ async function fetchGraphqlScene(release, channel) {
|
||||
}
|
||||
`,
|
||||
}, {
|
||||
interface: 'request',
|
||||
headers: {
|
||||
referer: release.url,
|
||||
origin: channel.url,
|
||||
@@ -385,7 +488,7 @@ async function fetchGraphqlScene(release, channel) {
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.data.video;
|
||||
return res.data.data.video;
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -398,14 +501,15 @@ async function fetchScene(url, channel, baseRelease, options) {
|
||||
return scrapeSceneData(graphqlData, channel, options);
|
||||
}
|
||||
|
||||
const session = qu.session();
|
||||
const res = await qu.get(url, null, null, { session });
|
||||
const res = await unprint.get(url, {
|
||||
interface: 'request',
|
||||
useBrowser: !!options.parameters?.useBrowser,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const dataString = res.item.query.html('#__NEXT_DATA__');
|
||||
const data = dataString && JSON.parse(dataString);
|
||||
const data = res.context.query.json('#__NEXT_DATA__');
|
||||
|
||||
return scrapeScene(data.props.pageProps, url, channel, options, session);
|
||||
return scrapeScene(data.props.pageProps, url, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -436,113 +540,8 @@ async function scrapeProfile(data, channel) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/videos?page=${page}`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const dataString = res.item.query.html('#__NEXT_DATA__');
|
||||
const data = dataString && JSON.parse(dataString);
|
||||
|
||||
if (data?.props.pageProps.edges) {
|
||||
return scrapeAll(data.props.pageProps.edges.map((edge) => edge.node), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(channel) {
|
||||
const query = `
|
||||
query getNextScene($site: Site!) {
|
||||
nextScene: findNextReleaseVideo(input: { site: $site }) {
|
||||
videoId
|
||||
slug
|
||||
isPreReleasePeriod
|
||||
releaseDate
|
||||
models {
|
||||
name
|
||||
__typename
|
||||
}
|
||||
images {
|
||||
countdown {
|
||||
...ImageInfo
|
||||
__typename
|
||||
}
|
||||
poster {
|
||||
...ImageInfo
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
previews {
|
||||
countdown {
|
||||
...PreviewInfo
|
||||
__typename
|
||||
}
|
||||
poster {
|
||||
...PreviewInfo
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment ImageInfo on Image {
|
||||
src
|
||||
placeholder
|
||||
width
|
||||
height
|
||||
highdpi {
|
||||
double
|
||||
triple
|
||||
__typename
|
||||
}
|
||||
webp {
|
||||
src
|
||||
placeholder
|
||||
highdpi {
|
||||
double
|
||||
triple
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment PreviewInfo on Preview {
|
||||
src
|
||||
width
|
||||
height
|
||||
type
|
||||
}
|
||||
`;
|
||||
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
operationName: 'getNextScene',
|
||||
query,
|
||||
variables: {
|
||||
site: channel.slug.toUpperCase(),
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
if (res.body.data.nextScene) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, channel);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, { channel }) {
|
||||
const res = await http.post(`${channel.url}/graphql`, {
|
||||
const res = await unprint.post(`${channel.url}/graphql`, {
|
||||
operationName: 'searchModels',
|
||||
variables: {
|
||||
slug: actor.slug,
|
||||
@@ -605,14 +604,15 @@ async function fetchProfile(actor, { channel }) {
|
||||
${imageFragment}
|
||||
`,
|
||||
}, {
|
||||
interface: 'request',
|
||||
headers: {
|
||||
referer: channel.url,
|
||||
origin: channel.url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.body.data?.model) {
|
||||
return scrapeProfile(res.body.data, channel);
|
||||
if (res.ok && res.data.data?.model) {
|
||||
return scrapeProfile(res.data.data, channel);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -1,203 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
// const slugify = require('../utils/slugify');
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
const http = require('../utils/http');
|
||||
|
||||
function getLicenseCode(html) {
|
||||
const licensePrefix = 'license_code: \'';
|
||||
const licenseStart = html.indexOf(licensePrefix);
|
||||
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
|
||||
|
||||
const c = '16px';
|
||||
let f;
|
||||
let g;
|
||||
let h;
|
||||
let i;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
|
||||
for (f = '', g = 1; g < licenseCode.length; g += 1) {
|
||||
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
|
||||
}
|
||||
|
||||
for (j = parseInt(f.length / 2, 10),
|
||||
k = parseInt(f.substring(0, j + 1), 10),
|
||||
l = parseInt(f.substring(j), 10),
|
||||
g = l - k,
|
||||
g < 0 && (g = -g),
|
||||
f = g,
|
||||
g = k - l,
|
||||
g < 0 && (g = -g),
|
||||
f += g,
|
||||
f *= 2,
|
||||
f = String(f),
|
||||
i = (parseInt(c, 10) / 2) + 2,
|
||||
m = '',
|
||||
g = 0; g < j + 1; g += 1) {
|
||||
for (h = 1; h <= 4; h += 1) {
|
||||
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
|
||||
|
||||
if (n >= i) n -= i;
|
||||
m += n;
|
||||
}
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
function decodeTrailerUrl(html, encodedTrailerUrl) {
|
||||
const licenseCode = getLicenseCode(html);
|
||||
const i = licenseCode;
|
||||
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
let o;
|
||||
|
||||
const d = '16px';
|
||||
const g = encodedTrailerUrl.split('/').slice(2);
|
||||
|
||||
let h = g[5].substring(0, 2 * parseInt(d, 10));
|
||||
|
||||
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
|
||||
for (l = k, m = k; m < i.length; m += 1) {
|
||||
l += parseInt(i[m], 10);
|
||||
}
|
||||
|
||||
for (; l >= h.length;) {
|
||||
l -= h.length;
|
||||
}
|
||||
|
||||
for (n = '', o = 0; o < h.length; o += 1) {
|
||||
if (o === k) {
|
||||
n += h[l];
|
||||
} else {
|
||||
n += (o === l ? h[k] : h[o]);
|
||||
}
|
||||
}
|
||||
|
||||
h = n;
|
||||
}
|
||||
|
||||
g[5] = g[5].replace(j, h);
|
||||
const trailer = g.join('/');
|
||||
|
||||
return trailer;
|
||||
}
|
||||
|
||||
function scrapeLatest(html) {
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
release.entryId = q('.ico-fav-0').dataset.favVideoId;
|
||||
|
||||
const titleEl = q('.video-title-title');
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
|
||||
release.date = qd('.video-data em', 'MMM DD, YYYY');
|
||||
release.actors = qa('.video-model-list a', true);
|
||||
|
||||
const posterData = q('img.thumb').dataset;
|
||||
release.poster = posterData.src;
|
||||
release.trailer = posterData.preview;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url) {
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map((source) => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"]');
|
||||
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
|
||||
}
|
||||
|
||||
const varsPrefix = 'flashvars = {';
|
||||
const varsStart = html.indexOf(varsPrefix);
|
||||
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
|
||||
|
||||
const vars = varsString.split(',').reduce((acc, item) => {
|
||||
const [prop, value] = item.split(': ');
|
||||
acc[prop.trim()] = value.trim().replace(/'/g, '');
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
release.trailer = [
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_url),
|
||||
quality: parseInt(vars.video_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url),
|
||||
quality: parseInt(vars.video_alt_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url2),
|
||||
quality: parseInt(vars.video_alt_url2_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url3),
|
||||
quality: parseInt(vars.video_alt_url3_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url4),
|
||||
quality: parseInt(vars.video_alt_url4_text, 10),
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url) {
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
@@ -1,68 +1,43 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function getTrailerUrl(release, channel, request) {
|
||||
const csrfToken = request.cookie.match('csrfst=(.*?);')?.[1];
|
||||
|
||||
if (!csrfToken) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await http.post(`${channel.url}/ajax/player-config.json`, {
|
||||
item_id: release.entryId,
|
||||
}, {
|
||||
headers: {
|
||||
'X-CSRF-Token': csrfToken,
|
||||
},
|
||||
session: request.session,
|
||||
encodeJSON: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const trailers = res.body.streams.map((trailer) => ({
|
||||
src: trailer.url,
|
||||
quality: Number(trailer.id?.match(/\d+/)?.[0] || trailer?.name.match(/\d+/)?.[0]),
|
||||
vr: true,
|
||||
}));
|
||||
|
||||
return {
|
||||
trailers,
|
||||
poster: qu.prefixUrl(res.body.poster, res.body.thumbCDN),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
function scrapeAll(scenes, channel, url) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a', 'href', { origin: channel.url });
|
||||
release.url = query.url('a', { origin: channel.origin });
|
||||
release.entryId = new URL(release.url).pathname.match(/(\d+)\/?$/)?.[1];
|
||||
|
||||
release.title = query.cnt('.card__h');
|
||||
release.date = query.date('.card__date', 'D MMMM, YYYY');
|
||||
release.title = query.content('.card__h');
|
||||
release.date = query.date('.card__date', 'D MMMM, YYYY', { match: null });
|
||||
|
||||
release.actors = query.all('.card__links a').map((el) => ({
|
||||
name: qu.query.cnt(el),
|
||||
url: qu.query.url(el, null, 'href', { origin: channel.url }),
|
||||
release.actors = query.all('.card__links a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null, { origin: channel.url }),
|
||||
}));
|
||||
|
||||
const poster = query.srcset('picture source[type="image/jpeg"]', 'data-srcset')
|
||||
|| query.srcset('picture source[type="image/jpeg"]', 'srcset')
|
||||
|| query.srcset('.video__cover', 'srcset');
|
||||
const poster = query.sourceSet('picture source[type="image/jpeg"]', 'data-srcset')
|
||||
|| query.sourceSet('picture source[type="image/jpeg"]', 'srcset')
|
||||
|| query.sourceSet('.video__cover', 'srcset');
|
||||
|
||||
if (poster?.[0]) {
|
||||
release.poster = [
|
||||
poster[0].replace(/small|tiny/, 'large'),
|
||||
...poster,
|
||||
];
|
||||
].map((src) => ({
|
||||
src,
|
||||
referer: url,
|
||||
}));
|
||||
|
||||
release.teaser = poster[0].replace(/\b(cover|hero|\d+)\/[a-z0-9_]+\.[a-z]+$/i, 'roll.webm'); // actually how site generates teaser URL
|
||||
const teaser = poster[0].replace(/\b(cover|hero|\d+)\/[a-z0-9_]+\.[a-z]+$/i, 'roll.webm'); // actually how site generates teaser URL
|
||||
|
||||
release.teaser = {
|
||||
src: teaser,
|
||||
referer: url,
|
||||
};
|
||||
}
|
||||
|
||||
release.channel = channel.slug; // avoid being assigned to WankzVR network
|
||||
@@ -71,31 +46,90 @@ function scrapeAll(scenes, channel) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, url, channel, baseRelease, options, request) {
|
||||
async function fetchLatest(channel, page) {
|
||||
const url = `${channel.url}/videos?o=d&p=${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.layout__content > .cards-list .card' }); // .cards-list is also used for hidden upcoming scenes
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel, url);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function getTrailerUrl(release, channel, cookies, referer) {
|
||||
const csrfToken = cookies.csrfst;
|
||||
|
||||
if (!csrfToken) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.post(`${channel.url}/ajax/player-config.json`, {
|
||||
item_id: release.entryId,
|
||||
}, {
|
||||
form: true,
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrfToken,
|
||||
},
|
||||
cookies,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const trailers = res.data.streams.map((trailer) => ({
|
||||
src: trailer.url,
|
||||
quality: Number(trailer.id?.match(/\d+/)?.[0] || trailer?.name.match(/\d+/)?.[0]),
|
||||
vr: true,
|
||||
referer,
|
||||
}));
|
||||
|
||||
const poster = unprint.prefixUrl(res.data.poster, res.data.thumbCDN);
|
||||
|
||||
return {
|
||||
trailers,
|
||||
poster: poster && {
|
||||
src: poster,
|
||||
referer,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, { url, entity, include, cookies }) {
|
||||
const release = {};
|
||||
const data = query.json('script[type="application/ld+json"]');
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1];
|
||||
|
||||
release.title = query.cnt('.detail__title');
|
||||
release.description = query.cnt('.detail__txt');
|
||||
release.title = query.content('.detail__title');
|
||||
release.description = query.content('.detail__txt');
|
||||
|
||||
release.date = query.date('.detail__date', 'D MMMM, YYYY');
|
||||
release.date = query.date('.detail__date', 'D MMMM, YYYY', { match: null });
|
||||
release.duration = query.number('.time') * 60;
|
||||
|
||||
release.actors = (query.all('.detail__header-lg .detail__models a') || query.all('.detail__header-sm .detail__models a')).map((el) => ({
|
||||
name: qu.query.cnt(el),
|
||||
url: qu.query.url(el, null, 'href', { origin: channel.url }),
|
||||
name: unprint.query.content(el),
|
||||
url: unprint.query.url(el, null, { origin: entity.origin }),
|
||||
}));
|
||||
|
||||
release.tags = query.cnts('.tag-list .tag').concat(query.cnts('.detail__specs-list .detail__specs-item'));
|
||||
release.tags = query.contents('.tag-list .tag').concat(query.contents('.detail__specs-list .detail__specs-item'));
|
||||
|
||||
release.photos = query.all('.photo-strip__slide').map((el) => ([
|
||||
qu.query.img(el, null, 'data-src'),
|
||||
qu.query.img(el, 'img', 'src'),
|
||||
]));
|
||||
unprint.query.img(el, null, 'data-src'),
|
||||
unprint.query.img(el, 'img'),
|
||||
].map((src) => ({ src, referer: url }))));
|
||||
|
||||
if (options.includePosters || options.includeTrailers) {
|
||||
const { trailers, poster } = await getTrailerUrl(release, channel, request);
|
||||
if (data?.thumbnailUrl) {
|
||||
release.poster = [
|
||||
data.thumbnailUrl.replace(/small|tiny/, 'large'),
|
||||
data.thumbnailUrl,
|
||||
].map((src) => ({ src, referer: url }));
|
||||
}
|
||||
|
||||
if (include.trailers || (!release.poster && include.poster)) {
|
||||
const { trailers, poster } = await getTrailerUrl(release, entity, cookies, url) || {};
|
||||
|
||||
release.trailer = trailers;
|
||||
release.poster = poster;
|
||||
@@ -105,17 +139,17 @@ async function scrapeScene({ query }, url, channel, baseRelease, options, reques
|
||||
}
|
||||
|
||||
async function fetchActorScenes({ query }, url, entity, page = 1, accScenes = []) {
|
||||
const scenes = scrapeAll(qu.initAll(query.all('.cards-list .card')), entity);
|
||||
const scenes = scrapeAll(unprint.initAll(query.all('.cards-list .card')), entity);
|
||||
const hasNextPage = !query.exists('.pagenav__link.inactive');
|
||||
|
||||
if (hasNextPage) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
searchParams.set('p', page + 1);
|
||||
|
||||
const res = await qu.get(`${origin}${pathname}?${searchParams}`);
|
||||
const res = await unprint.get(`${origin}${pathname}?${searchParams}`);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorScenes(res.item, url, entity, page + 1, accScenes.concat(scenes));
|
||||
return fetchActorScenes(res.context, url, entity, page + 1, accScenes.concat(scenes));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,20 +161,24 @@ async function scrapeProfile({ query }, url, entity, options) {
|
||||
|
||||
const bio = query.all('.person__meta__item').reduce((acc, el) => ({
|
||||
...acc,
|
||||
[slugify(qu.query.cnt(el, '.person__meta__label'))]: qu.query.text(el),
|
||||
[slugify(unprint.query.content(el, '.person__meta__label'))]: unprint.query.text(el),
|
||||
}), {});
|
||||
|
||||
profile.description = query.cnt('.person__content');
|
||||
profile.description = query.content('.person__content');
|
||||
|
||||
profile.gender = entity.slug === 'tranzvr' ? 'transsexual' : 'female';
|
||||
profile.age = Number(bio.age) || null;
|
||||
|
||||
profile.birthPlace = bio.birthplace;
|
||||
|
||||
profile.height = parseInt(bio.height, 10);
|
||||
// height shown in imperial with cm between brackets when requested from North American IP, but only in cm for European IPs
|
||||
profile.height = unprint.extractNumber(bio.height, { match: /(\d+)cm/, matchIndex: 1 });
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
profile.avatar = query.srcset('.person__avatar img');
|
||||
profile.avatar = query.sourceSet('.person__avatar img').map((src) => ({
|
||||
src,
|
||||
referer: url,
|
||||
}));
|
||||
|
||||
if (options.includeActorScenes) {
|
||||
profile.scenes = await fetchActorScenes({ query }, url, entity);
|
||||
@@ -149,22 +187,12 @@ async function scrapeProfile({ query }, url, entity, options) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/videos?o=d&p=${page}`, '.cards-list .card');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, { entity }, options) {
|
||||
const url = `${entity.url}/${baseActor.slug}`;
|
||||
const res = await qu.get(url);
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, url, entity, options);
|
||||
return scrapeProfile(res.context, url, entity, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
@@ -174,5 +202,4 @@ module.exports = {
|
||||
fetchLatest,
|
||||
scrapeScene,
|
||||
fetchProfile,
|
||||
deprecated: true,
|
||||
};
|
||||
|
||||
@@ -53,7 +53,9 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
||||
date_precision: release.datePrecision,
|
||||
slug,
|
||||
description: decode(release.description),
|
||||
alt_descriptions: release.altDescriptions?.map((description) => decode(description)),
|
||||
comment: release.comment,
|
||||
attributes: release.attributes,
|
||||
photo_count: Number(release.photoCount) || null,
|
||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||
deep_url: release.deepUrl,
|
||||
@@ -251,6 +253,7 @@ async function storeChapters(releases) {
|
||||
releaseId: release.id,
|
||||
index: index + 1,
|
||||
time: chapter.time,
|
||||
date: chapter.date,
|
||||
duration: chapter.duration,
|
||||
title: chapter.title,
|
||||
description: chapter.description,
|
||||
@@ -266,6 +269,7 @@ async function storeChapters(releases) {
|
||||
index: chapter.index,
|
||||
time: chapter.time,
|
||||
duration: chapter.duration,
|
||||
date: chapter.date,
|
||||
title: chapter.title,
|
||||
description: chapter.description,
|
||||
release_id: chapter.releaseId,
|
||||
@@ -429,7 +433,7 @@ async function storeScenes(releases, useBatchId) {
|
||||
const curatedDuplicateReleases = await Promise.all(duplicateReleasesWithId.map((release) => curateReleaseEntry(release, batchId)));
|
||||
const releasesWithId = uniqueReleasesWithId.concat(duplicateReleasesWithId);
|
||||
|
||||
const updated = await knex.raw(`
|
||||
const updatedChunks = await Promise.all(chunk(curatedDuplicateReleases, 500).map(async (chunkedReleases) => knex.raw(`
|
||||
UPDATE releases
|
||||
SET url = COALESCE(new.url, releases.url),
|
||||
date = COALESCE(new.date, releases.date),
|
||||
@@ -439,24 +443,30 @@ async function storeScenes(releases, useBatchId) {
|
||||
shoot_id = COALESCE(new.shoot_id, releases.shoot_id),
|
||||
duration = COALESCE(new.duration, releases.duration),
|
||||
comment = COALESCE(new.comment, releases.comment),
|
||||
attributes = COALESCE(new.attributes::jsonb || releases.attributes::jsonb, new.attributes::jsonb, releases.attributes::jsonb),
|
||||
deep = new.url IS NOT NULL,
|
||||
updated_at = NOW()
|
||||
FROM json_to_recordset(:scenes)
|
||||
AS new(id int, url text, date timestamptz, entity json, title text, description text, shoot_id text, duration integer, comment text, deep boolean)
|
||||
AS new(id int, url text, date timestamptz, entity json, title text, description text, shoot_id text, duration integer, comment text, attributes json, deep boolean)
|
||||
WHERE releases.id = new.id
|
||||
`, {
|
||||
scenes: JSON.stringify(curatedDuplicateReleases),
|
||||
});
|
||||
scenes: JSON.stringify(chunkedReleases),
|
||||
})));
|
||||
|
||||
const [actors, storedSeries] = await Promise.all([
|
||||
associateActors(releasesWithId, batchId),
|
||||
storeSeries(releasesWithId.map((release) => release.serie && { ...release.serie, entity: release.entity }).filter(Boolean), batchId),
|
||||
associateReleaseTags(releasesWithId),
|
||||
storeChapters(releasesWithId),
|
||||
argv.associateActors && associateActors(releasesWithId, batchId),
|
||||
argv.associateSeries && storeSeries(releasesWithId.map((release) => release.serie && { ...release.serie, entity: release.entity }).filter(Boolean), batchId),
|
||||
argv.associateTags && associateReleaseTags(releasesWithId),
|
||||
argv.associateTags && storeChapters(releasesWithId),
|
||||
]);
|
||||
|
||||
await associateSerieScenes(storedSeries, releasesWithId);
|
||||
await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time
|
||||
if (argv.associateSeries && storedSeries) {
|
||||
await associateSerieScenes(storedSeries, releasesWithId);
|
||||
}
|
||||
|
||||
if (argv.associateActors && actors) {
|
||||
await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time
|
||||
}
|
||||
|
||||
await updateSceneSearch(releasesWithId.map((release) => release.id));
|
||||
|
||||
@@ -467,7 +477,9 @@ async function storeScenes(releases, useBatchId) {
|
||||
await scrapeActors(actors.map((actor) => actor.name));
|
||||
}
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length}, updated ${updated.rowCount} releases`);
|
||||
const updated = updatedChunks.reduce((acc, updatedChunk) => acc + updatedChunk.rowCount, 0);
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length}, updated ${updated} releases`);
|
||||
|
||||
await notify(releasesWithId);
|
||||
|
||||
|
||||
67
src/tags.js
67
src/tags.js
@@ -1,8 +1,12 @@
|
||||
'use strict';
|
||||
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
|
||||
const { fetchEntityReleaseIds } = require('./entities');
|
||||
|
||||
const slugify = require('./utils/slugify');
|
||||
const bulkInsert = require('./utils/bulk-insert');
|
||||
const batchInsert = require('./utils/batch-insert');
|
||||
|
||||
function curateTagMedia(media) {
|
||||
if (!media) {
|
||||
@@ -73,10 +77,9 @@ function withRelations(queryBuilder, withMedia) {
|
||||
}
|
||||
}
|
||||
|
||||
async function matchReleaseTags(releases) {
|
||||
const tags = releases
|
||||
.map((release) => release.tags).flat()
|
||||
.map((tag) => tag?.trim().match(/[a-z0-9]+/ig)?.join(' ').toLowerCase())
|
||||
async function matchTags(rawTags) {
|
||||
const tags = rawTags
|
||||
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
|
||||
.filter(Boolean);
|
||||
|
||||
const tagEntries = await knex('tags')
|
||||
@@ -153,12 +156,12 @@ async function associateReleaseTags(releases, type = 'release') {
|
||||
return;
|
||||
}
|
||||
|
||||
const tagIdsBySlug = await matchReleaseTags(releases);
|
||||
const tagIdsBySlug = await matchTags(releases.flatMap((release) => release.tags));
|
||||
const entityTagIdsByEntityId = await getEntityTags(releases);
|
||||
|
||||
const tagAssociations = buildReleaseTagAssociations(releases, tagIdsBySlug, entityTagIdsByEntityId, type);
|
||||
|
||||
await bulkInsert(`${type}s_tags`, tagAssociations, false);
|
||||
await batchInsert(`${type}s_tags`, tagAssociations, { conflict: false });
|
||||
}
|
||||
|
||||
async function fetchTag(tagId) {
|
||||
@@ -187,8 +190,58 @@ async function fetchTags(limit = 100) {
|
||||
return tags.map((tag) => curateTag(tag));
|
||||
}
|
||||
|
||||
async function reassociateTagEntries(tagEntries, rematch) {
|
||||
const updatableTagEntries = tagEntries.filter((tagEntry) => (!tagEntry.tag_id || rematch) && tagEntry.original_tag);
|
||||
const matchedTags = await matchTags(Array.from(new Set(updatableTagEntries.map((tagEntry) => tagEntry.original_tag))));
|
||||
|
||||
const updatedTagEntries = updatableTagEntries.map((tagEntry) => ({
|
||||
...tagEntry,
|
||||
tag_id: matchedTags[slugify(tagEntry.original_tag)],
|
||||
})).filter((tagEntry) => tagEntry.tag_id);
|
||||
|
||||
if (updatedTagEntries.length > 0) {
|
||||
const trx = await knex.transaction();
|
||||
|
||||
await trx('releases_tags')
|
||||
.whereIn('id', updatedTagEntries.map((tagEntry) => tagEntry.id))
|
||||
.delete();
|
||||
|
||||
await batchInsert('releases_tags', updatedTagEntries.map((tagEntry) => ({
|
||||
...tagEntry,
|
||||
id: undefined,
|
||||
})), {
|
||||
conflict: false,
|
||||
transaction: trx,
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`Updated ${updatedTagEntries.length} tags in ${new Set(updatedTagEntries.map((tagEntry) => tagEntry.release_id)).size} scenes`);
|
||||
}
|
||||
|
||||
async function reassociateReleaseTags(rawSceneIds, rematch) {
|
||||
const sceneIds = rawSceneIds.map((sceneId) => Number(sceneId)).filter(Boolean);
|
||||
const tagEntries = await knex('releases_tags').whereIn('release_id', sceneIds);
|
||||
|
||||
await reassociateTagEntries(tagEntries, rematch);
|
||||
}
|
||||
|
||||
async function reassociateEntityReleaseTags(networkSlugs = [], channelSlugs = [], rematch = false) {
|
||||
const { sceneIds } = await fetchEntityReleaseIds(networkSlugs, channelSlugs);
|
||||
|
||||
await reassociateReleaseTags(sceneIds, rematch);
|
||||
}
|
||||
|
||||
async function reassociateOriginalTags(originalTags, rematch) {
|
||||
const tagEntries = await knex('releases_tags').whereIn(knex.raw('lower(releases_tags.original_tag)'), originalTags.map((originalTag) => originalTag.toLowerCase()));
|
||||
|
||||
await reassociateTagEntries(tagEntries, rematch);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
associateReleaseTags,
|
||||
fetchTag,
|
||||
fetchTags,
|
||||
reassociateEntityReleaseTags,
|
||||
reassociateReleaseTags,
|
||||
reassociateOriginalTags,
|
||||
};
|
||||
|
||||
64
src/tools/batch-test.js
Normal file
64
src/tools/batch-test.js
Normal file
@@ -0,0 +1,64 @@
|
||||
'use strict';
|
||||
|
||||
const knex = require('../knex');
|
||||
const batchInsert = require('../utils/batch-insert');
|
||||
|
||||
async function createTestTable() {
|
||||
const tableExists = await knex.schema.hasTable('batch_test');
|
||||
|
||||
if (tableExists) {
|
||||
// await knex('batch_test').delete();
|
||||
return;
|
||||
}
|
||||
|
||||
await knex.schema.createTable('batch_test', (table) => {
|
||||
table.increments('id');
|
||||
|
||||
table.string('name')
|
||||
.unique();
|
||||
|
||||
table.integer('age');
|
||||
table.text('location');
|
||||
|
||||
table.datetime('created_at')
|
||||
.notNullable()
|
||||
.defaultTo(knex.fn.now());
|
||||
});
|
||||
}
|
||||
|
||||
async function init() {
|
||||
await createTestTable();
|
||||
|
||||
const transaction = await knex.transaction();
|
||||
|
||||
const entries = await batchInsert('batch_test', [
|
||||
{
|
||||
name: 'John',
|
||||
age: 18,
|
||||
location: 'Home',
|
||||
},
|
||||
{
|
||||
name: 'Jack',
|
||||
age: 38,
|
||||
location: 'Work',
|
||||
},
|
||||
{
|
||||
name: 'James',
|
||||
age: 35,
|
||||
location: 'Club',
|
||||
},
|
||||
], {
|
||||
conflict: 'name',
|
||||
update: true,
|
||||
transaction,
|
||||
commit: false,
|
||||
});
|
||||
|
||||
await transaction.commit();
|
||||
console.log('ENTRIES', entries);
|
||||
|
||||
// await knex.schema.dropTable('batch_test');
|
||||
await knex.destroy();
|
||||
}
|
||||
|
||||
init();
|
||||
136
src/tools/gamma_banners.js
Normal file
136
src/tools/gamma_banners.js
Normal file
@@ -0,0 +1,136 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
const fs = require('fs');
|
||||
const { Readable } = require('stream');
|
||||
const { pipeline } = require('stream/promises');
|
||||
|
||||
const knex = require('../knex');
|
||||
const argv = require('../argv');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const apiUrl = 'https://vjoc5ygk89-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(3.33.0)%3B%20Browser%20(lite)%3B%20react%20(16.8.6)%3B%20react-instantsearch%20(5.7.0)%3B%20JS%20Helper%20(2.28.1)&x-algolia-application-id=VJOC5YGK89&x-algolia-api-key=c5546bdfb4d3f31daf49ed3bb1463561';
|
||||
|
||||
async function fetchBanners() {
|
||||
const res = await unprint.post(
|
||||
apiUrl,
|
||||
{
|
||||
requests: [
|
||||
{
|
||||
indexName: 'creatives',
|
||||
params: new URLSearchParams({
|
||||
hitsPerPage: 1000,
|
||||
maxValuesPerFacet: 100,
|
||||
page: 0,
|
||||
filters: '(ProgramType:Legacy OR ProgramType:Internal) AND NOT OverlayActive:false',
|
||||
facets: '["SceneActors","SceneCategories","ProgramName","Size","Niche","MediaExt","SiteTag","OverlayName"]',
|
||||
facetFilters: `[["SiteTag:${argv.site}"],["MediaExt:jpg", "MediaExt:png", "MediaExt:gif"]]`,
|
||||
}).toString(),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'content-type': 'application/x-www-form-urlencoded',
|
||||
referer: 'https://creatives.gammae.com/',
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
if (res.ok && res.data.results[0]) {
|
||||
return res.data.results[0].hits;
|
||||
}
|
||||
|
||||
console.error(`Failed API request (${res.status}): ${res.body}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function matchTags(rawTags) {
|
||||
if (!rawTags) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tags = rawTags
|
||||
.map((tag) => tag?.trim().match(/[a-z0-9()]+/ig)?.join(' ').toLowerCase())
|
||||
.filter(Boolean);
|
||||
|
||||
const tagEntries = await knex('tags')
|
||||
.select('tags.slug', 'aliases.slug as alias_slug')
|
||||
.whereIn(knex.raw('lower(tags.name)'), tags)
|
||||
.leftJoin('tags as aliases', 'aliases.id', 'tags.alias_for')
|
||||
.orderByRaw('CASE WHEN tags.alias_for IS NOT NULL THEN aliases.priority ELSE tags.priority END DESC');
|
||||
|
||||
return tagEntries.map((tagEntry) => tagEntry.alias_slug || tagEntry.slug);
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const banners = await fetchBanners();
|
||||
|
||||
if (!banners) {
|
||||
return;
|
||||
}
|
||||
|
||||
await banners.reduce(async (chain, banner) => {
|
||||
await chain;
|
||||
|
||||
const channel = slugify(banner.SiteTag, '');
|
||||
const url = unprint.prefixUrl(banner.MediaLocation || banner.CreativeURL, 'https://cdn.banhq.com');
|
||||
|
||||
if (!url) {
|
||||
console.log('No URL found');
|
||||
console.log(banner);
|
||||
return;
|
||||
}
|
||||
|
||||
const tags = await matchTags([
|
||||
...banner.Tags?.map((tag) => tag.Value) || [],
|
||||
...banner.SceneCategories || [],
|
||||
banner.Niche,
|
||||
].filter(Boolean));
|
||||
|
||||
const fileTags = tags.slice(0, 4).join('_');
|
||||
const fileActors = banner.SceneActors?.slice(0, 2).map((actor) => slugify(actor, '_')).join('_');
|
||||
|
||||
// tags are unreliable and describe entire scene, not banner, don't include by default
|
||||
const segments = [channel, banner.Width, banner.Height, banner.MediaID, argv.actors?.[0] !== false && fileActors].filter(Boolean);
|
||||
const filename = `${segments.join('_')}${argv.tags && argv.tags ? `-${fileTags}` : ''}.${banner.MediaExt || 'jpg'}`;
|
||||
|
||||
const filepath = `/tmp/gamma/${channel}/${filename}`;
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(banner);
|
||||
}
|
||||
|
||||
if (argv.preview) {
|
||||
console.log(`Preview ${url}: ${filepath}`);
|
||||
return;
|
||||
}
|
||||
|
||||
await fs.promises.mkdir(`/tmp/gamma/${channel}`, { recursive: true });
|
||||
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
|
||||
if (res.ok && res.body) {
|
||||
const writer = fs.createWriteStream(filepath);
|
||||
|
||||
await pipeline(Readable.fromWeb(res.body), writer);
|
||||
|
||||
if (argv.actors) {
|
||||
console.log(`Saved ${url} to ${filepath}`);
|
||||
} else {
|
||||
console.log(`Saved ${url} to ${filepath}, actors ${banner.SceneActors?.join(', ') || ''}`);
|
||||
}
|
||||
} else {
|
||||
console.log(`Failed to fetch ${url} (${res.status})`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(`Failed to fetch ${url}: ${error.message}`);
|
||||
}
|
||||
}, Promise.resolve());
|
||||
|
||||
await knex.destroy();
|
||||
}
|
||||
|
||||
init();
|
||||
15
src/tools/huge-query.js
Normal file
15
src/tools/huge-query.js
Normal file
@@ -0,0 +1,15 @@
|
||||
'use strict';
|
||||
|
||||
const knex = require('../knex');
|
||||
|
||||
async function init() {
|
||||
const data = Array.from({ length: 100_000 }, (value, index) => ({
|
||||
id: `test_affiliate_${index}`,
|
||||
}));
|
||||
|
||||
await knex('affiliates').insert(data);
|
||||
|
||||
console.log('Done!');
|
||||
}
|
||||
|
||||
init();
|
||||
@@ -41,7 +41,7 @@ async function fetchScenes() {
|
||||
studios.name as studio_name,
|
||||
grandparents.id as parent_network_id,
|
||||
COALESCE(JSON_AGG(DISTINCT (actors.id, actors.name)) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
|
||||
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
||||
COALESCE(JSON_AGG(DISTINCT (tags.id, tags.name, tags.priority, tags_aliases.name, local_tags.actor_id)) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags,
|
||||
COALESCE(JSON_AGG(DISTINCT (movies.id, movies.title)) FILTER (WHERE movies.id IS NOT NULL), '[]') as movies,
|
||||
COALESCE(JSON_AGG(DISTINCT (series.id, series.title)) FILTER (WHERE series.id IS NOT NULL), '[]') as series,
|
||||
COALESCE(JSON_AGG(DISTINCT (releases_fingerprints.hash)) FILTER (WHERE releases_fingerprints.hash IS NOT NULL), '[]') as fingerprints,
|
||||
@@ -136,6 +136,14 @@ async function init() {
|
||||
dupe_index int
|
||||
)`);
|
||||
|
||||
await utilsApi.sql('drop table if exists scenes_tags');
|
||||
await utilsApi.sql(`create table scenes_tags (
|
||||
id int,
|
||||
scene_id int,
|
||||
tag_id int,
|
||||
actor_id int
|
||||
)`);
|
||||
|
||||
console.log('Recreated scenes table');
|
||||
console.log('Fetching scenes from primary database');
|
||||
|
||||
@@ -143,49 +151,62 @@ async function init() {
|
||||
|
||||
console.log('Fetched scenes from primary database');
|
||||
|
||||
const docs = scenes.map((scene) => {
|
||||
const docs = scenes.flatMap((scene) => {
|
||||
const flatActors = scene.actors.flatMap((actor) => actor.f2.match(/[\w']+/g)); // match word characters to filter out brackets etc.
|
||||
const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => (tag.f4 ? `${tag.f2} ${tag.f4}` : tag.f2).match(/[\w']+/g)); // only make top tags searchable to minimize cluttered results
|
||||
const filteredTitle = filterTitle(scene.title, [...flatActors, ...flatTags]);
|
||||
|
||||
return {
|
||||
replace: {
|
||||
index: 'scenes',
|
||||
id: scene.id,
|
||||
doc: {
|
||||
title: scene.title || undefined,
|
||||
title_filtered: filteredTitle || undefined,
|
||||
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
|
||||
created_at: Math.round(scene.created_at.getTime() / 1000),
|
||||
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
|
||||
is_showcased: scene.showcased,
|
||||
entry_id: scene.entry_id || undefined,
|
||||
shoot_id: scene.shoot_id || undefined,
|
||||
channel_id: scene.channel_id,
|
||||
channel_slug: scene.channel_slug,
|
||||
channel_name: [].concat(scene.channel_name, scene.channel_aliases).join(' '),
|
||||
network_id: scene.network_id || undefined,
|
||||
network_slug: scene.network_slug || undefined,
|
||||
network_name: [].concat(scene.network_name, scene.network_aliases).join(' ') || undefined,
|
||||
studio_id: scene.studio_id || undefined,
|
||||
studio_slug: scene.studio_slug || undefined,
|
||||
studio_name: scene.studio_name || undefined,
|
||||
entity_ids: [scene.channel_id, scene.network_id, scene.parent_network_id, scene.studio_id].filter(Boolean), // manticore does not support OR, this allows IN
|
||||
actor_ids: scene.actors.map((actor) => actor.f1),
|
||||
actors: scene.actors.map((actor) => actor.f2).join(),
|
||||
tag_ids: scene.tags.map((tag) => tag.f1),
|
||||
tags: flatTags.join(' '),
|
||||
movie_ids: scene.movies.map((movie) => movie.f1),
|
||||
movies: scene.movies.map((movie) => movie.f2).join(' '),
|
||||
serie_ids: scene.series.map((serie) => serie.f1),
|
||||
series: scene.series.map((serie) => serie.f2).join(' '),
|
||||
fingerprints: scene.fingerprints.join(' '),
|
||||
meta: scene.date ? format(scene.date, 'y yy M MM MMM MMMM d dd') : undefined,
|
||||
stashed: scene.stashed || 0,
|
||||
dupe_index: scene.dupe_index || 0,
|
||||
return [
|
||||
{
|
||||
replace: {
|
||||
index: 'scenes',
|
||||
id: scene.id,
|
||||
doc: {
|
||||
title: scene.title || undefined,
|
||||
title_filtered: filteredTitle || undefined,
|
||||
date: scene.date ? Math.round(scene.date.getTime() / 1000) : undefined,
|
||||
created_at: Math.round(scene.created_at.getTime() / 1000),
|
||||
effective_date: Math.round((scene.date || scene.created_at).getTime() / 1000),
|
||||
is_showcased: scene.showcased,
|
||||
entry_id: scene.entry_id || undefined,
|
||||
shoot_id: scene.shoot_id || undefined,
|
||||
channel_id: scene.channel_id,
|
||||
channel_slug: scene.channel_slug,
|
||||
channel_name: [].concat(scene.channel_name, scene.channel_aliases).join(' '),
|
||||
network_id: scene.network_id || undefined,
|
||||
network_slug: scene.network_slug || undefined,
|
||||
network_name: [].concat(scene.network_name, scene.network_aliases).join(' ') || undefined,
|
||||
studio_id: scene.studio_id || undefined,
|
||||
studio_slug: scene.studio_slug || undefined,
|
||||
studio_name: scene.studio_name || undefined,
|
||||
entity_ids: [scene.channel_id, scene.network_id, scene.parent_network_id, scene.studio_id].filter(Boolean), // manticore does not support OR, this allows IN
|
||||
actor_ids: scene.actors.map((actor) => actor.f1),
|
||||
actors: scene.actors.map((actor) => actor.f2).join(),
|
||||
tag_ids: scene.tags.map((tag) => tag.f1),
|
||||
tags: flatTags.join(' '),
|
||||
movie_ids: scene.movies.map((movie) => movie.f1),
|
||||
movies: scene.movies.map((movie) => movie.f2).join(' '),
|
||||
serie_ids: scene.series.map((serie) => serie.f1),
|
||||
series: scene.series.map((serie) => serie.f2).join(' '),
|
||||
fingerprints: scene.fingerprints.join(' '),
|
||||
meta: scene.date ? format(scene.date, 'y yy M MM MMM MMMM d dd') : undefined,
|
||||
stashed: scene.stashed || 0,
|
||||
dupe_index: scene.dupe_index || 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
...scene.tags.map((tag) => ({
|
||||
replace: {
|
||||
index: 'scenes_tags',
|
||||
// id: scene.id,
|
||||
doc: {
|
||||
scene_id: scene.id,
|
||||
tag_id: tag.f1,
|
||||
actor_id: tag.f5,
|
||||
},
|
||||
},
|
||||
})),
|
||||
];
|
||||
});
|
||||
|
||||
// const accData = chunk(docs, 10000).reduce(async (chain, docsChunk, index, array) => {
|
||||
|
||||
88
src/tools/manticore-stashes.js
Normal file
88
src/tools/manticore-stashes.js
Normal file
@@ -0,0 +1,88 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const manticore = require('manticoresearch');
|
||||
|
||||
const knex = require('../knex');
|
||||
const chunk = require('../utils/chunk');
|
||||
|
||||
const mantiClient = new manticore.ApiClient();
|
||||
|
||||
mantiClient.basePath = `http://${config.database.manticore.host}:${config.database.manticore.httpPort}`;
|
||||
|
||||
const utilsApi = new manticore.UtilsApi(mantiClient);
|
||||
const indexApi = new manticore.IndexApi(mantiClient);
|
||||
|
||||
async function syncStashes(domain = 'scene') {
|
||||
await utilsApi.sql(`truncate table ${domain}s_stashed`);
|
||||
|
||||
const stashes = await knex(`stashes_${domain}s`)
|
||||
.select(
|
||||
`stashes_${domain}s.id as stashed_id`,
|
||||
`stashes_${domain}s.${domain}_id`,
|
||||
'stashes.id as stash_id',
|
||||
'stashes.user_id as user_id',
|
||||
`stashes_${domain}s.created_at as created_at`,
|
||||
)
|
||||
.leftJoin('stashes', 'stashes.id', `stashes_${domain}s.stash_id`);
|
||||
|
||||
await chunk(stashes, 1000).reduce(async (chain, stashChunk, index) => {
|
||||
await chain;
|
||||
|
||||
const stashDocs = stashChunk.map((stash) => ({
|
||||
replace: {
|
||||
index: `${domain}s_stashed`,
|
||||
id: stash.stashed_id,
|
||||
doc: {
|
||||
[`${domain}_id`]: stash[`${domain}_id`],
|
||||
stash_id: stash.stash_id,
|
||||
user_id: stash.user_id,
|
||||
created_at: Math.round(stash.created_at.getTime() / 1000),
|
||||
},
|
||||
},
|
||||
}));
|
||||
|
||||
await indexApi.bulk(stashDocs.map((doc) => JSON.stringify(doc)).join('\n'));
|
||||
|
||||
console.log(`Synced ${index * 1000 + stashChunk.length}/${stashes.length} ${domain} stashes`);
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function init() {
|
||||
await utilsApi.sql('drop table if exists scenes_stashed');
|
||||
|
||||
await utilsApi.sql(`create table if not exists scenes_stashed (
|
||||
scene_id int,
|
||||
stash_id int,
|
||||
user_id int,
|
||||
created_at timestamp
|
||||
)`);
|
||||
|
||||
await utilsApi.sql('drop table if exists movies_stashed');
|
||||
|
||||
await utilsApi.sql(`create table if not exists movies_stashed (
|
||||
movie_id int,
|
||||
stash_id int,
|
||||
user_id int,
|
||||
created_at timestamp
|
||||
)`);
|
||||
|
||||
await utilsApi.sql('drop table if exists actors_stashed');
|
||||
|
||||
await utilsApi.sql(`create table if not exists actors_stashed (
|
||||
actor_id int,
|
||||
stash_id int,
|
||||
user_id int,
|
||||
created_at timestamp
|
||||
)`);
|
||||
|
||||
await syncStashes('scene');
|
||||
await syncStashes('actor');
|
||||
await syncStashes('movie');
|
||||
|
||||
console.log('Done!');
|
||||
|
||||
knex.destroy();
|
||||
}
|
||||
|
||||
init();
|
||||
@@ -16,14 +16,14 @@ async function updateManticoreStashedScenes(docs) {
|
||||
await chunk(docs, 1000).reduce(async (chain, docsChunk) => {
|
||||
await chain;
|
||||
|
||||
const sceneIds = docsChunk.map((doc) => doc.replace.id);
|
||||
const sceneIds = docsChunk.filter((doc) => !!doc.replace).map((doc) => doc.replace.id);
|
||||
|
||||
const stashes = await knex('stashes_scenes')
|
||||
.select('stashes_scenes.id as stashed_id', 'stashes_scenes.scene_id', 'stashes_scenes.created_at', 'stashes.id as stash_id', 'stashes.user_id as user_id')
|
||||
.leftJoin('stashes', 'stashes.id', 'stashes_scenes.stash_id')
|
||||
.whereIn('scene_id', sceneIds);
|
||||
|
||||
const stashDocs = docsChunk.flatMap((doc) => {
|
||||
const stashDocs = docsChunk.filter((doc) => doc.replace).flatMap((doc) => {
|
||||
const sceneStashes = stashes.filter((stash) => stash.scene_id === doc.replace.id);
|
||||
|
||||
if (sceneStashes.length === 0) {
|
||||
@@ -50,6 +50,25 @@ async function updateManticoreStashedScenes(docs) {
|
||||
if (stashDocs.length > 0) {
|
||||
await indexApi.bulk(stashDocs.map((doc) => JSON.stringify(doc)).join('\n'));
|
||||
}
|
||||
|
||||
const deleteSceneIds = docs.filter((doc) => doc.delete).map((doc) => doc.delete.id);
|
||||
|
||||
if (deleteSceneIds.length > 0) {
|
||||
await indexApi.callDelete({
|
||||
index: 'scenes_stashed',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
in: {
|
||||
scene_id: deleteSceneIds,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
@@ -128,9 +147,20 @@ async function updateManticoreSceneSearch(releaseIds) {
|
||||
studios.showcased
|
||||
`, releaseIds && [releaseIds]);
|
||||
|
||||
// console.log(scenes.rows);
|
||||
const scenesById = Object.fromEntries(scenes.rows.map((scene) => [scene.id, scene]));
|
||||
|
||||
const docs = releaseIds.map((sceneId) => {
|
||||
const scene = scenesById[sceneId];
|
||||
|
||||
if (!scene) {
|
||||
return {
|
||||
delete: {
|
||||
index: 'scenes',
|
||||
id: sceneId,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const docs = scenes.rows.map((scene) => {
|
||||
const flatActors = scene.actors.flatMap((actor) => actor.f2.split(' '));
|
||||
const flatTags = scene.tags.filter((tag) => tag.f3 > 6).flatMap((tag) => [tag.f2].concat(tag.f4)).filter(Boolean); // only make top tags searchable to minimize cluttered results
|
||||
const filteredTitle = filterTitle(scene.title, [...flatActors, ...flatTags]);
|
||||
@@ -291,7 +321,20 @@ async function updateManticoreMovieSearch(movieIds) {
|
||||
movies_covers.*
|
||||
`, movieIds && [movieIds]);
|
||||
|
||||
const docs = movies.rows.map((movie) => {
|
||||
const moviesById = Object.fromEntries(movies.rows.map((movie) => [movie.id, movie]));
|
||||
|
||||
const docs = movieIds.map((movieId) => {
|
||||
const movie = moviesById[movieId];
|
||||
|
||||
if (!movie) {
|
||||
return {
|
||||
delete: {
|
||||
index: 'movies',
|
||||
id: movieId,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const combinedTags = Object.values(Object.fromEntries(movie.tags.concat(movie.movie_tags).map((tag) => [tag.f1, {
|
||||
id: tag.f1,
|
||||
name: tag.f2,
|
||||
|
||||
@@ -38,7 +38,7 @@ function filterLocalUniqueReleases(releases, accReleases) {
|
||||
};
|
||||
}
|
||||
|
||||
async function filterUniqueReleases(releases) {
|
||||
async function filterUniqueReleases(releases, options) {
|
||||
const releaseIdentifierChunks = chunk(releases.map((release) => [release.entity.id, release.entryId.toString()]));
|
||||
|
||||
const duplicateReleaseEntryChunks = await Promise.map(releaseIdentifierChunks, async (releaseIdentifiers) => {
|
||||
@@ -47,9 +47,13 @@ async function filterUniqueReleases(releases) {
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
.whereIn(['entity_id', 'entry_id'], releaseIdentifiers)
|
||||
.where((builder) => {
|
||||
if (!options?.forceDeepUpdate) {
|
||||
// we do not want to force deep rescrapes on release day, so if the scene is already deep-scraped, we add it to the duplicate list
|
||||
builder.where('deep', true); // scene is already deep scraped
|
||||
}
|
||||
|
||||
// check if previously upcoming scenes can be excluded from duplicates to be rescraped for release day updates
|
||||
builder
|
||||
.where('deep', true) // scene is already deep scraped
|
||||
.orWhereNull('date')
|
||||
.orWhereNotIn('date_precision', ['day', 'minute']) // don't worry about scenes without (accurate) dates for now
|
||||
.orWhere(knex.raw('date > NOW() - INTERVAL \'12 hours\'')) // scene is still upcoming, with a rough offset to wait for the end of the day west of UTC
|
||||
@@ -110,18 +114,23 @@ function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = []) {
|
||||
}
|
||||
|
||||
async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||
const options = {
|
||||
...config.options[scraper.slug],
|
||||
...include,
|
||||
...preData,
|
||||
parameters: getRecursiveParameters(entity),
|
||||
options: getRecursiveParameters(entity, 'options'),
|
||||
};
|
||||
|
||||
async function scrapeReleasesPage(page, accReleases, pageContext) {
|
||||
const options = {
|
||||
...config.options[scraper.slug],
|
||||
...include,
|
||||
...preData,
|
||||
const pageOptions = {
|
||||
...options,
|
||||
...pageContext,
|
||||
parameters: getRecursiveParameters(entity),
|
||||
};
|
||||
|
||||
const rawPageReleases = isUpcoming
|
||||
? await scraper.fetchUpcoming(entity, page, options, preData)
|
||||
: await scraper.fetchLatest(entity, page, options, preData);
|
||||
? await scraper.fetchUpcoming(entity, page, pageOptions, preData)
|
||||
: await scraper.fetchLatest(entity, page, pageOptions, preData);
|
||||
|
||||
const pageReleases = rawPageReleases?.scenes || rawPageReleases;
|
||||
|
||||
@@ -157,7 +166,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) {
|
||||
|
||||
const { uniqueReleases, duplicateReleases } = argv.force
|
||||
? { uniqueReleases: limitedReleases, duplicateReleases: [] }
|
||||
: await filterUniqueReleases(limitedReleases);
|
||||
: await filterUniqueReleases(limitedReleases, options);
|
||||
|
||||
return { uniqueReleases, duplicateReleases };
|
||||
}
|
||||
@@ -305,8 +314,6 @@ async function scrapeNetworkParallel(networkEntity) {
|
||||
async function fetchUpdates() {
|
||||
const includedNetworks = await fetchIncludedEntities();
|
||||
|
||||
// console.log(includedNetworks[0]);
|
||||
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
async (networkEntity) => (networkEntity.parameters?.sequential
|
||||
|
||||
96
src/utils/batch-insert.js
Executable file
96
src/utils/batch-insert.js
Executable file
@@ -0,0 +1,96 @@
|
||||
'use strict';
|
||||
|
||||
const knex = require('../knex');
|
||||
const chunk = require('./chunk');
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
// improved version of bulkInsert
|
||||
async function batchInsert(table, items, {
|
||||
conflict = true,
|
||||
update = false,
|
||||
chunkSize = 1000,
|
||||
concurrent = false,
|
||||
transaction,
|
||||
commit = false,
|
||||
} = {}) {
|
||||
if (!table) {
|
||||
throw new Error('No table specified for batch insert');
|
||||
}
|
||||
|
||||
if (!Array.isArray(items)) {
|
||||
throw new Error('Batch insert items are not an array');
|
||||
}
|
||||
|
||||
if (items.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const chunks = chunk(items, chunkSize);
|
||||
const conflicts = [].concat(conflict).filter((column) => typeof column === 'string'); // conflict might be 'true'
|
||||
const trx = transaction || await knex.transaction();
|
||||
|
||||
try {
|
||||
const queries = chunks.map((chunkItems) => {
|
||||
const query = trx(table)
|
||||
.insert(chunkItems)
|
||||
.returning('*');
|
||||
|
||||
if (conflicts.length > 0) {
|
||||
if (Array.isArray(update)) {
|
||||
// udpate specified
|
||||
return query
|
||||
.onConflict(conflicts)
|
||||
.merge(update);
|
||||
}
|
||||
|
||||
if (update) {
|
||||
// update all
|
||||
return query
|
||||
.onConflict(conflicts)
|
||||
.merge();
|
||||
}
|
||||
|
||||
throw new Error('Batch insert conflict columns must be specified together with update');
|
||||
}
|
||||
|
||||
if (conflict && update) {
|
||||
throw new Error('Batch insert conflict must specify columns, or update must be disabled');
|
||||
}
|
||||
|
||||
// error on any conflict
|
||||
if (conflict) {
|
||||
return query;
|
||||
}
|
||||
|
||||
// ignore duplicates, keep old entries as-is
|
||||
return query
|
||||
.onConflict()
|
||||
.ignore();
|
||||
});
|
||||
|
||||
const results = concurrent
|
||||
? await Promise.all(queries)
|
||||
: await queries.reduce(async (chain, query) => {
|
||||
const acc = await chain;
|
||||
const result = await query;
|
||||
|
||||
return acc.concat(result);
|
||||
}, Promise.resolve([]));
|
||||
|
||||
if (!transaction || commit) {
|
||||
await trx.commit();
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
if (!transaction || commit) {
|
||||
await trx.rollback();
|
||||
}
|
||||
|
||||
logger.error(`Failed batch insert: ${error.message} (${error.detail})`);
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = batchInsert;
|
||||
@@ -1,11 +1,11 @@
|
||||
'use strict';
|
||||
|
||||
function getRecursiveParameters(entity, parameters) {
|
||||
function getRecursiveParameters(entity, property = 'parameters', parameters) {
|
||||
if (entity.parent) {
|
||||
return getRecursiveParameters(entity.parent, { ...entity.parameters, ...parameters });
|
||||
return getRecursiveParameters(entity.parent, property, { ...entity[property], ...parameters });
|
||||
}
|
||||
|
||||
return { ...entity.parameters, ...parameters };
|
||||
return { ...entity[property], ...parameters };
|
||||
}
|
||||
|
||||
module.exports = getRecursiveParameters;
|
||||
|
||||
@@ -4,8 +4,10 @@ const config = require('config');
|
||||
const test = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const unprint = require('unprint');
|
||||
const omit = require('object.omit');
|
||||
|
||||
const argv = require('../src/argv');
|
||||
const logger = require('../src/logger')(__filename);
|
||||
const include = require('../src/utils/argv-include')(argv);
|
||||
const slugify = require('../src/utils/slugify');
|
||||
const scrapers = require('../src/scrapers/scrapers');
|
||||
@@ -31,6 +33,12 @@ unprint.options({
|
||||
proxy: config.proxy,
|
||||
});
|
||||
|
||||
unprint.on('requestInit', (event) => logger.debug(`Unprint ${event.method} (${event.interval}ms/${event.concurrency}p${event.isProxied ? ' proxied' : ''}${event.isBrowser ? ' browser' : ''}) ${event.url}`));
|
||||
unprint.on('requestError', (event) => logger.error(`Unprint failed ${event.isProxied ? 'proxied ' : ''}${event.isBrowser ? 'browser ' : ''}${event.method} ${event.url} (${event.status}): ${event.statusText}`));
|
||||
|
||||
unprint.on('browserOpen', (event) => logger.debug(`Unprint opened browsers ${event.keys} (${event.active}/${config.bypass.browser.clientRetirement} active, ${event.clients} clients)`));
|
||||
unprint.on('browserClose', (event) => logger.debug(`Unprint closed${event.retired ? ' retired' : ''} browsers ${event.keys} (${event.active}/${config.bypass.browser.clientRetirement} active, ${event.clients} clients)`));
|
||||
|
||||
const actors = [
|
||||
// vixen
|
||||
{ entity: 'vixen', name: 'Abella Danger', fields: ['gender', 'avatar', 'description'] },
|
||||
@@ -43,9 +51,13 @@ const actors = [
|
||||
{ entity: 'milfy', name: 'Clea Gaultier', fields: ['gender', 'avatar', 'description'] },
|
||||
{ entity: 'wifey', name: 'Danielle Renae', fields: ['gender', 'avatar', 'description'] },
|
||||
// teamskeet
|
||||
{ entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] },
|
||||
{ entity: 'mylf', name: 'Eliza Ibarra', fields: ['avatar', 'measurements', 'nationality', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||
{ entity: 'sayuncle', name: 'Greg McKeon', fields: ['avatar', 'description'] },
|
||||
{ entity: 'teamskeet', name: 'Abella Danger', fields: ['description', 'gender', 'avatar', 'measurements', 'birthPlace', 'nationality', 'ethnicity', 'height', 'weight', 'hairColor', 'hasPiercings'] },
|
||||
{ entity: 'mylf', name: 'Eliza Ibarra', fields: ['avatar', 'gender', 'measurements', 'nationality', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||
{ entity: 'familystrokes', name: 'Eliza Ibarra', fields: ['avatar', 'gender', 'measurements', 'nationality', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||
{ entity: 'freeuse', name: 'Gabbie Carter', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'swappz', name: 'Riley Reid', fields: ['avatar', 'gender', 'measurements', 'nationality', 'ethnicity', 'hairColor', 'hasPiercings', 'height', 'weight'] },
|
||||
{ entity: 'pervz', name: 'Kenzie Reeves', fields: ['avatar', 'gender', 'measurements', 'nationality', 'ethnicity', 'hairColor'] },
|
||||
{ entity: 'sayuncle', name: 'Greg McKeon', fields: ['avatar', 'gender', 'description'] },
|
||||
// mike adriano
|
||||
{ entity: 'trueanal', name: 'Brenna McKenna', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor', 'hasTattoos'] },
|
||||
{ entity: 'analonly', name: 'Lilith Grace', fields: ['avatar', 'gender', 'description', 'dateOfBirth', 'birthPlace', 'measurements', 'eyes', 'weight', 'height', 'hairColor'] },
|
||||
@@ -61,9 +73,11 @@ const actors = [
|
||||
{ entity: 'eyeontheguy', name: 'Tommy Gunn', fields: ['avatar'] },
|
||||
{ entity: 'interracialpovs', name: 'Nia Nacci', fields: ['avatar', 'aliases', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
|
||||
{ entity: 'povpornstars', name: 'Anna Bell Peaks', fields: ['avatar', 'aliases', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'naturalBoobs', 'socials'] },
|
||||
{ entity: 'seehimfuck', name: 'Sheem The Dream', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'penisLength', 'circumcised', 'socials'] },
|
||||
{ entity: 'seehimfuck', name: 'Sheem The Dream', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'hasTattoos', 'hasPiercings', 'penisLength', 'isCircumcised', 'socials'] },
|
||||
{ entity: 'hushpass', name: 'Dylan Ryder', fields: ['avatar'] },
|
||||
{ entity: 'interracialpass', name: 'Aidra Fox', fields: ['avatar', 'height', 'measurements'] },
|
||||
{ entity: 'ravebunnys', name: 'Lacey Jayne', fields: ['avatar', 'height', 'measurements', 'dateOfBirth', 'birthPlace', 'description', 'ethnicity', 'weight', 'naturalBoobs'] },
|
||||
{ entity: 'hotandtatted', name: 'Valerica Steele', url: 'https://hotandtatted.com/models/tattooed-pornstar-val-steele.html', fields: ['avatar', 'measurements', 'dateOfBirth', 'birthPlace', 'description', 'ethnicity', 'weight', 'foot', 'naturalBoobs', 'hasPiercings'] },
|
||||
// kelly madison / 8K
|
||||
{ entity: 'kellymadison', name: 'Ava Addams', fields: ['avatar', 'description', 'age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
|
||||
{ entity: '8kmembers', name: 'Angie Lynx', fields: ['age', 'height', 'measurements', 'birthPlace', 'dateOfBirth', 'ethnicity'] },
|
||||
@@ -76,7 +90,7 @@ const actors = [
|
||||
{ entity: 'letsdoeit', name: 'Nicole Doshi', fields: ['avatar', 'description', 'gender', 'height', 'measurements', 'birthPlace', 'dateOfBirth'] },
|
||||
{ entity: 'killergram', name: 'Clea Gaultier', fields: ['avatar', 'gender', 'hairColor', 'ethnicity'] },
|
||||
{ entity: 'men', name: 'Cade Maddox', fields: ['avatar', 'description', 'gender', 'height', 'ethnicity', 'penisLength', 'dateOfBirth', 'weight', 'hairColor', 'hasTattoos'] },
|
||||
{ entity: 'metrohd', name: 'April Olsen', fields: ['avatar', 'description', 'gender', 'birthPlace', 'height', 'measurements', 'dateOfBirth', 'weight'] },
|
||||
{ entity: 'metrohd', name: 'Vanna Bardot', fields: ['avatar', 'description', 'gender', 'birthPlace', 'height', 'measurements', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity', 'hasTattoos'] },
|
||||
{ entity: 'mofos', name: 'Ariana Starr', fields: ['avatar', 'description', 'gender', 'birthPlace', 'height', 'measurements', 'dateOfBirth'] },
|
||||
{ entity: 'propertysex', name: 'Desiree Dulce', fields: ['avatar', 'description', 'gender', 'birthPlace', 'height', 'measurements', 'dateOfBirth', 'weight', 'hairColor', 'ethnicity', 'hasPiercings'] },
|
||||
{ entity: 'sexyhub', name: 'Angie Lynx', fields: ['avatar', 'description', 'gender', 'birthPlace', 'height', 'measurements', 'dateOfBirth'] },
|
||||
@@ -102,9 +116,10 @@ const actors = [
|
||||
{ entity: 'burningangel', name: 'Joanna Angel', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'chaosmen', name: 'Kenzo Alvarez', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'dogfartnetwork', name: 'Liz Jordan', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'devilsfilm', name: 'Katrina Colt', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'diabolic', name: 'Kira Noir', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'evilangel', name: 'Francesca Le', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'fantasymassage', name: 'Cherry Kiss', fields: ['avatar', 'gender', 'description', 'eyes', 'hairColor'] },
|
||||
{ entity: 'fantasymassage', name: 'Cherry Kiss', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'filthykings', name: 'Armani Black', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'gangbangcreampie', name: 'Luna Lovely', fields: ['avatar', 'gender', 'description'] },
|
||||
{ entity: 'girlsway', name: 'Adriana Chechik', fields: ['avatar', 'gender', 'description', 'eyes', 'hairColor'] },
|
||||
@@ -130,12 +145,12 @@ const actors = [
|
||||
// perv city
|
||||
{ entity: 'pervcity', name: 'Brooklyn Gray', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'dpdiva', name: 'Liz Jordan', fields: ['avatar', 'description', 'dateOfBirth', 'birthPlace', 'ethnicity', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'bamvisions', name: 'Abella Danger', fields: ['avatar', 'height', 'measurements'] },
|
||||
{ entity: 'bamvisions', name: 'Abella Danger', fields: ['avatar', 'height', 'measurements'] }, // site offline as of 2026-02-25
|
||||
// radical
|
||||
{ entity: 'bjraw', name: 'Nikki Knightly', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'gotfilled', name: 'Alexa Chains', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'inserted', name: 'Anissa Kate', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'topwebmodels', name: 'Lexi Belle', fields: ['avatar', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'topwebmodels', name: 'Lexi Belle', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'purgatoryx', name: 'Kenzie Reeves', fields: ['avatar', 'description', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'lucidflix', name: 'Ava Amira', fields: ['avatar', 'description', 'gender'] },
|
||||
// wankz
|
||||
@@ -155,11 +170,12 @@ const actors = [
|
||||
{ entity: 'nfbusty', name: 'Ella Reese', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||
{ entity: 'nubilefilms', name: 'Jade Kimiko', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||
{ entity: 'thatsitcomshow', name: 'Casey Calvert', fields: ['avatar', 'age', 'residencePlace', 'height', 'measurements', 'photos'] },
|
||||
{ entity: 'brattysis', name: 'Scarlett Alexis', fields: ['avatar', 'age', 'height', 'measurements', 'description', 'residencePlace', 'photos'] },
|
||||
// porndoe
|
||||
{ entity: 'vipsexvault', name: 'Amirah Adara', fields: ['avatar', 'nationality', 'placeOfBirth', 'age', 'naturalBoobs', 'hairColor', 'description'] },
|
||||
{ entity: 'amateureuro', name: 'Luna Oara', fields: ['avatar', 'nationality', 'placeOfBirth', 'age', 'naturalBoobs', 'description'] },
|
||||
{ entity: 'mamacitaz', name: 'Julia De Lucia', fields: ['avatar', 'nationality', 'placeOfBirth', 'age', 'naturalBoobs', 'description', 'hairColor'] },
|
||||
{ entity: 'transbella', name: 'Kalena Rios', fields: ['avatar', 'nationality', 'placeOfBirth', 'age', 'naturalBoobs', 'description', 'hairColor'] },
|
||||
{ entity: 'vipsexvault', name: 'Amirah Adara', fields: ['avatar', 'nationality', 'birthPlace', 'age', 'naturalBoobs', 'hairColor', 'description'] },
|
||||
{ entity: 'amateureuro', name: 'Luna Oara', fields: ['avatar', 'nationality', 'birthPlace', 'age', 'naturalBoobs', 'description'] },
|
||||
{ entity: 'mamacitaz', name: 'Julia De Lucia', fields: ['avatar', 'nationality', 'birthPlace', 'age', 'naturalBoobs', 'description', 'hairColor'] },
|
||||
{ entity: 'transbella', name: 'Kalena Rios', fields: ['avatar', 'nationality', 'birthPlace', 'age', 'naturalBoobs', 'description', 'hairColor'] },
|
||||
// snow valley group
|
||||
{ entity: 'spermmania', name: 'Lya Cutie', fields: ['avatar', 'age', 'height', 'cup', 'bust', 'waist', 'hip'] },
|
||||
{ entity: 'cospuri', name: 'Ria Kurumi', fields: ['avatar', 'birthPlace', 'description', 'height', 'cup', 'bust', 'waist', 'hip'] },
|
||||
@@ -200,46 +216,57 @@ const actors = [
|
||||
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
|
||||
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
|
||||
// exploitedx
|
||||
{ entity: 'exploitedx', name: 'Amber Moore', url: 'https://exploitedcollegegirls.com/models/AmberMoore.html', fields: ['avatar', 'description', 'age', 'height', 'measurements'] },
|
||||
{ entity: 'exploitedx', name: 'Amber Moore', url: 'https://exploitedcollegegirls.com/models/Amber-Moore.html', fields: ['avatar', 'description', 'age', 'height', 'measurements'] },
|
||||
{ entity: 'exploitedx', name: 'Amber Moore', url: 'https://bbcsurprise.com/models/AmberMoore.html', fields: ['avatar', 'age'] },
|
||||
{ entity: 'exploitedx', name: 'Megan Marx', url: 'https://excogigirls.com/models/megan-marx.html', fields: ['avatar', 'description', 'age', 'height', 'measurements'] },
|
||||
{ entity: 'exploitedx', name: 'Sophie Hunt', url: 'https://www.backroomcastingcouch.com/models/Sophie-Hunt.html', fields: ['avatar', 'age'] },
|
||||
{ entity: 'exploitedx', name: 'Lao Latina', url: 'https://hotmilfsfuck.com/models/Lao-Latina.html', fields: ['avatar', 'description', 'age', 'height', 'measurements'] },
|
||||
// model media
|
||||
{ entity: 'jerkaoke', name: 'Harley Haze', fields: ['avatar', 'description', 'height', 'weight', 'banner', 'photos'] },
|
||||
{ entity: 'modelmediaasia', name: 'Li WeiWei', fields: ['avatar', 'entryId', 'gender', 'alias', 'height', 'weight', 'bust', 'waist', 'hip', 'socials'] },
|
||||
{ entity: 'delphine', name: 'Bridgette B', fields: ['avatar', 'measurements', 'birthPlace'] },
|
||||
// etc.
|
||||
{ entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
|
||||
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
|
||||
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
|
||||
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
|
||||
{ entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] },
|
||||
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
|
||||
{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
|
||||
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
|
||||
{ entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] },
|
||||
{ entity: 'cumlouder', name: 'Valentina Nappi', fields: ['avatar', 'nationality', 'dateOfBirth', 'height', 'weight', 'eyes', 'hairColor', 'description', 'socials'] },
|
||||
{ entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] },
|
||||
{ entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
|
||||
{ entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
|
||||
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
|
||||
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||
{ entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] },
|
||||
{ entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||
{ entity: 'inthecrack', name: 'Vicki Chase', fields: ['dateOfBirth', 'height', 'weight', 'ethnicity', 'birthPlace'] },
|
||||
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
|
||||
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
|
||||
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
|
||||
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
|
||||
{ entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] },
|
||||
{ entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] },
|
||||
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
|
||||
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
||||
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||
{ entity: 'pornworld', name: 'Veronica Leal', fields: ['avatar', 'nationality', 'age'] },
|
||||
{ entity: 'private', name: 'Cherry Kiss', fields: ['avatar', 'description', 'nationality', 'measurements', 'height', 'weight', 'hairColor', 'eye', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings'] },
|
||||
{ entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
|
||||
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
||||
{ entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] },
|
||||
{ entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] },
|
||||
{ entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] },
|
||||
{ entity: 'tokyohot', name: 'Mai Kawana', url: 'https://my.tokyo-hot.com/cast/2099/', fields: ['avatar', 'birthPlace', 'height', 'cup', 'bust', 'waist', 'hip', 'hairStyle', 'shoeSize', 'bloodType'] },
|
||||
{ entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
|
||||
{ entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] },
|
||||
{ entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] },
|
||||
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
||||
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
||||
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
|
||||
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
|
||||
{ entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] },
|
||||
{ entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] },
|
||||
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
|
||||
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
|
||||
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
|
||||
{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
|
||||
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
|
||||
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
|
||||
{ entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] }, // sic
|
||||
{ entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] }, // sic
|
||||
{ entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, // sic
|
||||
{ entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] }, // sic
|
||||
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] }, // sic
|
||||
{ entity: 'wakeupnfuck', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
|
||||
];
|
||||
|
||||
const actorScrapers = scrapers.actors;
|
||||
const source = argv.source?.[0] || null;
|
||||
const sources = argv.sources || null;
|
||||
|
||||
async function validateUrl(url, mime = 'image/') {
|
||||
if (!url) {
|
||||
@@ -254,7 +281,7 @@ async function validateUrl(url, mime = 'image/') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const res = await fetch(href, {
|
||||
const res = await unprint.get(href, {
|
||||
headers: {
|
||||
Referer: url.referer || new URL(href).origin,
|
||||
},
|
||||
@@ -272,20 +299,21 @@ const validators = {
|
||||
description: (value) => typeof value === 'string' && value.length > 3,
|
||||
birthPlace: (value) => typeof value === 'string' && value.length > 1, // may return US or USA
|
||||
birthCountry: (value) => typeof value === 'string' && value.length > 1,
|
||||
nationality: (value) => typeof value === 'string' && value.length > 3,
|
||||
nationality: (value) => typeof value === 'string' && value.length > 2,
|
||||
// height: (value) => !!Number(value) || /\d'\d{1,2}"/.test(value), // ft in needs to be converted
|
||||
height: (value) => !!Number(value) && value > 130,
|
||||
weight: (value) => !!Number(value) && value > 40,
|
||||
eyes: (value) => typeof value === 'string' && value.length > 3,
|
||||
hairColor: (value) => typeof value === 'string' && value.length > 3,
|
||||
hairColor: (value) => typeof value === 'string' && value.length > 2,
|
||||
measurements: (value) => /(\d+)([a-z]+)?(?:\s*[-x]\s*(\d+)\s*[-x]\s*(\d+))?/i.test(value), // from actors module
|
||||
dateOfBirth: (value) => value instanceof Date && !Number.isNaN(value.getFullYear()),
|
||||
hasTattoos: (value) => typeof value === 'boolean',
|
||||
hasPiercings: (value) => typeof value === 'boolean',
|
||||
avatar: async (value) => [].concat(value).reduce(async (chain, url) => {
|
||||
// testing all avatar fallbacks is too time-consuming, just ensure one is valid
|
||||
const acc = await chain;
|
||||
|
||||
if (!acc) {
|
||||
if (acc) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
@@ -304,9 +332,9 @@ const validators = {
|
||||
|
||||
// profiler in this context is shorthand for profile scraper
|
||||
async function init() {
|
||||
const entitiesBySlug = await fetchEntitiesBySlug(Object.keys(actorScrapers), { types: ['channel', 'network', 'info'], prefer: 'channel' });
|
||||
const entitiesBySlug = await fetchEntitiesBySlug(Object.keys(actorScrapers), { types: ['channel', 'network', 'info'], prefer: 'options' });
|
||||
|
||||
Object.entries(actorScrapers).reduce(async (chain, [entitySlug, scraper]) => {
|
||||
await Object.entries(actorScrapers).reduce(async (chain, [entitySlug, scraper]) => {
|
||||
await chain;
|
||||
|
||||
const entity = entitiesBySlug[entitySlug] || null;
|
||||
@@ -314,13 +342,12 @@ async function init() {
|
||||
|
||||
const tests = actors.filter((actor) => actor.entity === entitySlug);
|
||||
|
||||
// TODO: remove when all tests are written
|
||||
if (tests.length === 0) {
|
||||
console.log('TODO', entitySlug);
|
||||
return;
|
||||
}
|
||||
|
||||
if (source && source !== entitySlug) {
|
||||
if (sources && !sources.includes(entitySlug)) {
|
||||
// console.log('____', entitySlug);
|
||||
return;
|
||||
}
|
||||
@@ -347,22 +374,27 @@ async function init() {
|
||||
assert.fail('profile not found');
|
||||
}
|
||||
|
||||
console.log(profile);
|
||||
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
|
||||
if (argv.inspect) {
|
||||
console.log(omit(profile, ['scenes']));
|
||||
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
|
||||
}
|
||||
|
||||
await Promise.all(actor.fields.map(async (field) => {
|
||||
assert.ok(
|
||||
validators[field]
|
||||
? await validators[field](profile[field])
|
||||
: typeof profile[field] !== 'undefined',
|
||||
`broken field ${field}, got ${profile[field]}`,
|
||||
`broken field ${field}, got ${profile[field]} for ${actor.name}`,
|
||||
);
|
||||
}));
|
||||
})));
|
||||
});
|
||||
}, Promise.resolve());
|
||||
|
||||
console.log(actors.length);
|
||||
|
||||
await knex.destroy();
|
||||
await unprint.closeAllBrowsers();
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
Reference in New Issue
Block a user