Compare commits
10 Commits
experiment
...
d59a57f311
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d59a57f311 | ||
|
|
5e499c3685 | ||
|
|
17e5ce71b2 | ||
|
|
5352186319 | ||
|
|
e9ba02d65d | ||
|
|
39813d4461 | ||
|
|
829a285a2d | ||
|
|
a19a77e165 | ||
|
|
122dd3eaee | ||
|
|
18b219850e |
@@ -203,6 +203,19 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
v-if="release.qualities"
|
||||||
|
class="row"
|
||||||
|
>
|
||||||
|
<span class="row-label">Available qualities</span>
|
||||||
|
|
||||||
|
<span
|
||||||
|
v-for="quality in release.qualities"
|
||||||
|
:key="quality"
|
||||||
|
class="quality"
|
||||||
|
>{{ quality }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
v-if="release.comment"
|
v-if="release.comment"
|
||||||
class="row"
|
class="row"
|
||||||
@@ -470,6 +483,16 @@ export default {
|
|||||||
text-overflow: ellipsis;
|
text-overflow: ellipsis;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.quality {
|
||||||
|
&::after {
|
||||||
|
content: 'p, ';
|
||||||
|
}
|
||||||
|
|
||||||
|
&:last-child::after {
|
||||||
|
content: 'p',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
.releases {
|
.releases {
|
||||||
margin: 0 0 .5rem 0;
|
margin: 0 0 .5rem 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ export default {
|
|||||||
selectableTags: [
|
selectableTags: [
|
||||||
'airtight',
|
'airtight',
|
||||||
'anal',
|
'anal',
|
||||||
|
'bdsm',
|
||||||
'blowbang',
|
'blowbang',
|
||||||
'blowjob',
|
'blowjob',
|
||||||
'creampie',
|
'creampie',
|
||||||
|
|||||||
@@ -367,6 +367,7 @@ const releaseFields = `
|
|||||||
date
|
date
|
||||||
datePrecision
|
datePrecision
|
||||||
slug
|
slug
|
||||||
|
qualities
|
||||||
shootId
|
shootId
|
||||||
productionDate
|
productionDate
|
||||||
comment
|
comment
|
||||||
@@ -475,6 +476,7 @@ const releaseFragment = `
|
|||||||
duration
|
duration
|
||||||
createdAt
|
createdAt
|
||||||
shootId
|
shootId
|
||||||
|
qualities
|
||||||
productionDate
|
productionDate
|
||||||
createdBatchId
|
createdBatchId
|
||||||
productionLocation
|
productionLocation
|
||||||
|
|||||||
@@ -89,6 +89,10 @@ module.exports = {
|
|||||||
'uksinners',
|
'uksinners',
|
||||||
// mindgeek
|
// mindgeek
|
||||||
'pornhub',
|
'pornhub',
|
||||||
|
// insex
|
||||||
|
'paintoy',
|
||||||
|
'aganmedon',
|
||||||
|
'sensualpain',
|
||||||
],
|
],
|
||||||
networks: [
|
networks: [
|
||||||
// dummy network for testing
|
// dummy network for testing
|
||||||
|
|||||||
25
migrations/20220330230122_stats.js
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
exports.up = async (knex) => knex.raw(`
|
||||||
|
CREATE MATERIALIZED VIEW entities_stats
|
||||||
|
AS
|
||||||
|
WITH RECURSIVE relations AS (
|
||||||
|
SELECT entities.id, entities.parent_id, count(releases.id) AS releases_count, count(releases.id) AS total_count
|
||||||
|
FROM entities
|
||||||
|
LEFT JOIN releases ON releases.entity_id = entities.id
|
||||||
|
GROUP BY entities.id
|
||||||
|
|
||||||
|
UNION ALL
|
||||||
|
|
||||||
|
SELECT entities.id AS entity_id, count(releases.id) AS releases_count, count(releases.id) + relations.total_count AS total_count
|
||||||
|
FROM entities
|
||||||
|
INNER JOIN relations ON relations.id = entities.parent_id
|
||||||
|
LEFT JOIN releases ON releases.entity_id = entities.id
|
||||||
|
GROUP BY entities.id
|
||||||
|
)
|
||||||
|
|
||||||
|
SELECT relations.id AS entity_id, relations.releases_count
|
||||||
|
FROM relations;
|
||||||
|
`);
|
||||||
|
|
||||||
|
exports.down = async (knex) => knex.raw(`
|
||||||
|
DROP MATERIALIZED VIEW entities_stats;
|
||||||
|
`);
|
||||||
7
migrations/20220331135618_qualities.js
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
exports.up = async (knex) => knex.schema.alterTable('releases', (table) => {
|
||||||
|
table.specificType('qualities', 'text[]');
|
||||||
|
});
|
||||||
|
|
||||||
|
exports.down = async (knex) => knex.schema.alterTable('releases', (table) => {
|
||||||
|
table.dropColumn('qualities');
|
||||||
|
});
|
||||||
12
migrations/_20220330230122_stats.js
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
exports.up = async (knex) => knex.raw(`
|
||||||
|
CREATE MATERIALIZED VIEW entities_stats
|
||||||
|
AS
|
||||||
|
SELECT entities.id AS entity_id, count(releases.id) AS releases_count
|
||||||
|
FROM entities
|
||||||
|
LEFT JOIN releases ON releases.entity_id = entities.id
|
||||||
|
GROUP BY entities.id;
|
||||||
|
`);
|
||||||
|
|
||||||
|
exports.down = async (knex) => knex.raw(`
|
||||||
|
DROP MATERIALIZED VIEW entities_stats;
|
||||||
|
`);
|
||||||
23
package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.212.9",
|
"version": "1.213.4",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.212.9",
|
"version": "1.213.4",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@casl/ability": "^5.2.2",
|
"@casl/ability": "^5.2.2",
|
||||||
@@ -11650,25 +11650,6 @@
|
|||||||
"webidl-conversions": "^3.0.0"
|
"webidl-conversions": "^3.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/node-fetch/node_modules/tr46": {
|
|
||||||
"version": "0.0.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
|
||||||
"integrity": "sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o="
|
|
||||||
},
|
|
||||||
"node_modules/node-fetch/node_modules/webidl-conversions": {
|
|
||||||
"version": "3.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
|
||||||
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
|
|
||||||
},
|
|
||||||
"node_modules/node-fetch/node_modules/whatwg-url": {
|
|
||||||
"version": "5.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
|
||||||
"integrity": "sha1-lmRU6HZUYuN2RNNib2dCzotwll0=",
|
|
||||||
"dependencies": {
|
|
||||||
"tr46": "~0.0.3",
|
|
||||||
"webidl-conversions": "^3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/node-gyp": {
|
"node_modules/node-gyp": {
|
||||||
"version": "7.1.2",
|
"version": "7.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-7.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-7.1.2.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "traxxx",
|
"name": "traxxx",
|
||||||
"version": "1.212.9",
|
"version": "1.213.4",
|
||||||
"description": "All the latest porn releases in one place",
|
"description": "All the latest porn releases in one place",
|
||||||
"main": "src/app.js",
|
"main": "src/app.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
BIN
public/img/logos/pervcity/dpdiva.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
|
Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB |
|
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
BIN
public/img/logos/pervcity/lazy/dpdiva.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.7 KiB |
BIN
public/img/logos/pervcity/lazy/favicon_dark.png
Normal file
|
After Width: | Height: | Size: 3.7 KiB |
BIN
public/img/logos/pervcity/lazy/favicon_light.png
Normal file
|
After Width: | Height: | Size: 3.7 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB |
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 3.8 KiB |
|
Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB |
|
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
BIN
public/img/logos/pervcity/thumbs/dpdiva.png
Normal file
|
After Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.7 KiB |
BIN
public/img/logos/pervcity/thumbs/favicon_dark.png
Normal file
|
After Width: | Height: | Size: 3.7 KiB |
BIN
public/img/logos/pervcity/thumbs/favicon_light.png
Normal file
|
After Width: | Height: | Size: 3.7 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
@@ -4219,7 +4219,6 @@ const sites = [
|
|||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
parameters: {
|
parameters: {
|
||||||
scraper: 'alt',
|
|
||||||
latest: 'https://www.sexuallybroken.com/sb',
|
latest: 'https://www.sexuallybroken.com/sb',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -4230,13 +4229,20 @@ const sites = [
|
|||||||
url: 'https://www.infernalrestraints.com',
|
url: 'https://www.infernalrestraints.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.infernalrestraints.com/ir',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'hardtied',
|
slug: 'hardtied',
|
||||||
name: 'Hardtied',
|
name: 'Hardtied',
|
||||||
|
alias: ['ht'],
|
||||||
url: 'https://www.hardtied.com',
|
url: 'https://www.hardtied.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.hardtied.com/ht',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'realtimebondage',
|
slug: 'realtimebondage',
|
||||||
@@ -4245,6 +4251,9 @@ const sites = [
|
|||||||
url: 'https://www.realtimebondage.com',
|
url: 'https://www.realtimebondage.com',
|
||||||
tags: ['bdsm', 'live'],
|
tags: ['bdsm', 'live'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.realtimebondage.com/rtb',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'topgrl',
|
slug: 'topgrl',
|
||||||
@@ -4254,7 +4263,6 @@ const sites = [
|
|||||||
tags: ['bdsm', 'femdom'],
|
tags: ['bdsm', 'femdom'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
parameters: {
|
parameters: {
|
||||||
scraper: 'alt',
|
|
||||||
latest: 'https://www.topgrl.com/tg',
|
latest: 'https://www.topgrl.com/tg',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -6909,6 +6917,13 @@ const sites = [
|
|||||||
tourId: 9,
|
tourId: 9,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'dpdiva',
|
||||||
|
name: 'DP Diva',
|
||||||
|
url: 'http://dpdiva.com',
|
||||||
|
parent: 'pervcity',
|
||||||
|
tags: ['dp', 'anal'],
|
||||||
|
},
|
||||||
// PIERRE WOODMAN
|
// PIERRE WOODMAN
|
||||||
{
|
{
|
||||||
slug: 'woodmancastingx',
|
slug: 'woodmancastingx',
|
||||||
|
|||||||
17
src/app.js
@@ -85,23 +85,6 @@ async function startMemorySample(snapshotTriggers = []) {
|
|||||||
}, config.memorySampling.sampleDuration);
|
}, config.memorySampling.sampleDuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function startMemorySample() {
|
|
||||||
await inspector.heap.enable();
|
|
||||||
await inspector.heap.startSampling();
|
|
||||||
|
|
||||||
// monitorMemory();
|
|
||||||
|
|
||||||
logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
|
|
||||||
|
|
||||||
setTimeout(async () => {
|
|
||||||
await stopMemorySample();
|
|
||||||
|
|
||||||
if (!done) {
|
|
||||||
await startMemorySample();
|
|
||||||
}
|
|
||||||
}, 30000);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
try {
|
try {
|
||||||
if (argv.server) {
|
if (argv.server) {
|
||||||
|
|||||||
@@ -194,6 +194,7 @@ const { argv } = yargs
|
|||||||
alias: 'pics',
|
alias: 'pics',
|
||||||
})
|
})
|
||||||
.option('videos', {
|
.option('videos', {
|
||||||
|
alias: 'video',
|
||||||
describe: 'Include any trailers or teasers',
|
describe: 'Include any trailers or teasers',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
|
|||||||
44
src/media.js
@@ -21,6 +21,7 @@ const argv = require('./argv');
|
|||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const http = require('./utils/http');
|
const http = require('./utils/http');
|
||||||
const bulkInsert = require('./utils/bulk-insert');
|
const bulkInsert = require('./utils/bulk-insert');
|
||||||
|
const chunk = require('./utils/chunk');
|
||||||
const { get } = require('./utils/qu');
|
const { get } = require('./utils/qu');
|
||||||
|
|
||||||
const pipeline = util.promisify(stream.pipeline);
|
const pipeline = util.promisify(stream.pipeline);
|
||||||
@@ -63,10 +64,10 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
|
|||||||
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
|
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
|
||||||
: chunks;
|
: chunks;
|
||||||
|
|
||||||
const groupedMedias = lastPreferredChunks.map((chunk) => {
|
const groupedMedias = lastPreferredChunks.map((mediaChunk) => {
|
||||||
// merge chunked medias into single media with grouped fallback priorities,
|
// merge chunked medias into single media with grouped fallback priorities,
|
||||||
// so the first sources of each media is preferred over all second sources, etc.
|
// so the first sources of each media is preferred over all second sources, etc.
|
||||||
const sources = chunk
|
const sources = mediaChunk
|
||||||
.reduce((accSources, media) => {
|
.reduce((accSources, media) => {
|
||||||
media.sources.forEach((source, index) => {
|
media.sources.forEach((source, index) => {
|
||||||
if (!accSources[index]) {
|
if (!accSources[index]) {
|
||||||
@@ -82,8 +83,8 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
|
|||||||
.flat();
|
.flat();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: chunk[0].id,
|
id: mediaChunk[0].id,
|
||||||
role: chunk[0].role,
|
role: mediaChunk[0].role,
|
||||||
sources,
|
sources,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
@@ -235,22 +236,41 @@ async function findSourceDuplicates(baseMedias) {
|
|||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
const [existingSourceMedia, existingExtractMedia] = await Promise.all([
|
const [existingSourceMedia, existingExtractMedia] = await Promise.all([
|
||||||
knex('media').whereIn('source', sourceUrls),
|
// my try to check thousands of URLs at once, don't pass all of them to a single query
|
||||||
knex('media').whereIn('source_page', extractUrls),
|
chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
|
||||||
|
const accUrls = await chain;
|
||||||
|
const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);
|
||||||
|
|
||||||
|
return [...accUrls, ...existingUrls];
|
||||||
|
}, []),
|
||||||
|
chunk(extractUrls).reduce(async (chain, extractUrlsChunk) => {
|
||||||
|
const accUrls = await chain;
|
||||||
|
const existingUrls = await knex('media').whereIn('source_page', extractUrlsChunk);
|
||||||
|
|
||||||
|
return [...accUrls, ...existingUrls];
|
||||||
|
}, []),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
|
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
|
||||||
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
|
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
|
||||||
|
|
||||||
return { existingSourceMediaByUrl, existingExtractMediaByUrl };
|
return {
|
||||||
|
existingSourceMediaByUrl,
|
||||||
|
existingExtractMediaByUrl,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function findHashDuplicates(medias) {
|
async function findHashDuplicates(medias) {
|
||||||
const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean);
|
const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean);
|
||||||
|
|
||||||
const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
|
const existingHashMediaEntries = await chunk(hashes, 2).reduce(async (chain, hashesChunk) => {
|
||||||
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
|
const accHashes = await chain;
|
||||||
|
const existingHashes = await knex('media').whereIn('hash', hashesChunk);
|
||||||
|
|
||||||
|
return [...accHashes, ...existingHashes];
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
|
||||||
const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
||||||
|
|
||||||
const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
|
const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
|
||||||
@@ -600,11 +620,11 @@ async function fetchSource(source, baseMedia) {
|
|||||||
const hashStream = new stream.PassThrough();
|
const hashStream = new stream.PassThrough();
|
||||||
let size = 0;
|
let size = 0;
|
||||||
|
|
||||||
hashStream.on('data', (chunk) => {
|
hashStream.on('data', (streamChunk) => {
|
||||||
size += chunk.length;
|
size += streamChunk.length;
|
||||||
|
|
||||||
if (hasherReady) {
|
if (hasherReady) {
|
||||||
hasher.write(chunk);
|
hasher.write(streamChunk);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,27 @@ const http = require('../utils/http');
|
|||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeLatest(scenes, site) {
|
function scrapeLatest(scenes, site) {
|
||||||
|
return scenes.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
|
||||||
|
|
||||||
|
release.title = query.cnt('.has-text-weight-bold, .is-size-6');
|
||||||
|
release.date = query.date('span.tag', 'YYYY-MM-DD');
|
||||||
|
release.actors = query.cnts('a.tag');
|
||||||
|
|
||||||
|
const cover = query.img('.image img');
|
||||||
|
|
||||||
|
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
|
||||||
|
release.covers = [cover];
|
||||||
|
|
||||||
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`;
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatestLegacy(scenes, site) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||||
const release = {};
|
const release = {};
|
||||||
@@ -47,28 +68,35 @@ function scrapeLatest(scenes, site) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatestAlt(scenes, site) {
|
async function scrapeScene({ query }, url, channel, parameters, session) {
|
||||||
return scenes.map(({ query }) => {
|
const release = {};
|
||||||
const release = {};
|
|
||||||
|
|
||||||
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
|
release.title = query.cnt('.columns div.is-size-5.has-text-weight-bold');
|
||||||
|
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
|
||||||
|
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
|
||||||
|
|
||||||
release.title = query.cnt('.has-text-weight-bold');
|
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
|
||||||
release.date = query.date('span.tag', 'YYYY-MM-DD');
|
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
|
||||||
release.actors = query.cnts('a.tag');
|
|
||||||
|
|
||||||
const cover = query.img('.image img');
|
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
|
||||||
|
release.photos = Array.from(query.html('body > div:nth-child(6)').matchAll(/src="(http.*jpg)"/g), (match) => match[1]);
|
||||||
|
|
||||||
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||||
release.covers = [cover];
|
|
||||||
|
|
||||||
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
release.trailer = query.video();
|
||||||
|
|
||||||
return release;
|
if (!release.trailer && parameters.includeTrailers) {
|
||||||
});
|
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
||||||
|
|
||||||
|
if (trailerRes.ok) {
|
||||||
|
release.trailer = trailerRes.body;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, site) {
|
function scrapeSceneLegacy({ query }, site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const titleEl = query.q('.articleTitleText');
|
const titleEl = query.q('.articleTitleText');
|
||||||
@@ -97,70 +125,34 @@ function scrapeScene({ query }, site) {
|
|||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeSceneAlt({ query }, url, channel, session) {
|
|
||||||
const release = {};
|
|
||||||
|
|
||||||
release.title = query.cnt('.columns div.is-size-5');
|
|
||||||
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
|
|
||||||
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
|
|
||||||
|
|
||||||
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
|
|
||||||
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
|
|
||||||
|
|
||||||
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
|
|
||||||
release.photos = query.imgs('body > div:nth-child(6) img');
|
|
||||||
|
|
||||||
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
|
||||||
|
|
||||||
release.trailer = query.video();
|
|
||||||
|
|
||||||
if (!release.trailer) {
|
|
||||||
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
|
||||||
|
|
||||||
if (trailerRes.ok) {
|
|
||||||
release.trailer = trailerRes.body;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`)
|
const url = `${site.parameters.latest}/home.php?o=latest&p=${page}`;
|
||||||
// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
|
const res = await qu.getAll(url, 'body > .columns .column', { cookie: 'consent=yes' });
|
||||||
|| `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
|
||||||
|
|
||||||
const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column'))
|
|
||||||
// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
|
|
||||||
|| qu.get(url)); // JSON containing html as a property
|
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (site.parameters?.scraper === 'alt') {
|
return scrapeLatest(res.items, site);
|
||||||
return scrapeLatestAlt(res.items, site);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
if (site.slug === 'paintoy') {
|
|
||||||
return scrapeLatest(res.items, site);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchLatestLegacy(site, page = 1) {
|
||||||
const session = http.session();
|
const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||||
const res = await qu.get(url, null, null, { session });
|
const res = await qu.get(url); // JSON containing html as a property
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (site.parameters?.scraper === 'alt') {
|
return scrapeLatestLegacy(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
|
||||||
return scrapeSceneAlt(res.item, url, site, session);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return scrapeScene(res.item, site);
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchScene(url, site, baseRelease, parameters) {
|
||||||
|
const session = http.session();
|
||||||
|
const res = await qu.get(url, null, { cookie: 'consent=yes' }, { session });
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeScene(res.item, url, site, parameters, session);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
@@ -169,4 +161,8 @@ async function fetchScene(url, site) {
|
|||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
legacy: {
|
||||||
|
fetchLatest: fetchLatestLegacy,
|
||||||
|
scrapeScene: scrapeSceneLegacy,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -12,6 +12,13 @@ const channelCodes = {
|
|||||||
uha: 'upherasshole',
|
uha: 'upherasshole',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const qualities = {
|
||||||
|
v4k: 2160,
|
||||||
|
vFullHD: 1080,
|
||||||
|
vHD: 720,
|
||||||
|
vSD: 480,
|
||||||
|
};
|
||||||
|
|
||||||
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
|
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
|
||||||
|
|
||||||
function scrapeAll(scenes, entity) {
|
function scrapeAll(scenes, entity) {
|
||||||
@@ -42,9 +49,12 @@ function scrapeScene({ query }) {
|
|||||||
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
|
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
|
||||||
|
|
||||||
release.title = query.cnt('.infoHeader h1');
|
release.title = query.cnt('.infoHeader h1');
|
||||||
release.description = query.cnt('.infoBox p');
|
release.description = query.cnt('.description');
|
||||||
|
release.duration = query.duration('.tRuntime');
|
||||||
|
|
||||||
release.actors = query.cnts('.infoBox .tour_update_models a');
|
release.actors = query.cnts('.infoBox .tour_update_models a');
|
||||||
|
release.tags = query.cnts('.tagcats a');
|
||||||
|
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
|
||||||
|
|
||||||
release.poster = query.img('.posterimg');
|
release.poster = query.img('.posterimg');
|
||||||
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
|
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality
|
||||||
|
|||||||
@@ -38,11 +38,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
|
|||||||
date_precision: release.datePrecision,
|
date_precision: release.datePrecision,
|
||||||
slug,
|
slug,
|
||||||
description: release.description,
|
description: release.description,
|
||||||
|
qualities: release.qualities?.map(Number).filter(Boolean),
|
||||||
comment: release.comment,
|
comment: release.comment,
|
||||||
// director: release.director,
|
|
||||||
// likes: release.rating && release.rating.likes,
|
|
||||||
// dislikes: release.rating && release.rating.dislikes,
|
|
||||||
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
|
|
||||||
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
deep: typeof release.deep === 'boolean' ? release.deep : false,
|
||||||
deep_url: release.deepUrl,
|
deep_url: release.deepUrl,
|
||||||
updated_batch_id: batchId,
|
updated_batch_id: batchId,
|
||||||
|
|||||||