Added chunking to media duplicate queries to prevent overloading parameters. Added DP Diva to Perv City (coming soon).

This commit is contained in:
DebaucheryLibrarian 2022-04-02 00:32:23 +02:00
parent 17e5ce71b2
commit 5e499c3685
28 changed files with 65 additions and 29 deletions

View File

@ -12,6 +12,7 @@ export default {
selectableTags: [ selectableTags: [
'airtight', 'airtight',
'anal', 'anal',
'bdsm',
'blowbang', 'blowbang',
'blowjob', 'blowjob',
'creampie', 'creampie',

View File

@ -0,0 +1,25 @@
exports.up = async (knex) => knex.raw(`
CREATE MATERIALIZED VIEW entities_stats
AS
WITH RECURSIVE relations AS (
SELECT entities.id, entities.parent_id, count(releases.id) AS releases_count, count(releases.id) AS total_count
FROM entities
LEFT JOIN releases ON releases.entity_id = entities.id
GROUP BY entities.id
UNION ALL
SELECT entities.id AS entity_id, count(releases.id) AS releases_count, count(releases.id) + relations.total_count AS total_count
FROM entities
INNER JOIN relations ON relations.id = entities.parent_id
LEFT JOIN releases ON releases.entity_id = entities.id
GROUP BY entities.id
)
SELECT relations.id AS entity_id, relations.releases_count
FROM relations;
`);
exports.down = async (knex) => knex.raw(`
DROP MATERIALIZED VIEW entities_stats;
`);

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -6917,6 +6917,13 @@ const sites = [
tourId: 9, tourId: 9,
}, },
}, },
{
slug: 'dpdiva',
name: 'DP Diva',
url: 'http://dpdiva.com',
parent: 'pervcity',
tags: ['dp', 'anal'],
},
// PIERRE WOODMAN // PIERRE WOODMAN
{ {
slug: 'woodmancastingx', slug: 'woodmancastingx',

View File

@ -85,23 +85,6 @@ async function startMemorySample(snapshotTriggers = []) {
}, config.memorySampling.sampleDuration); }, config.memorySampling.sampleDuration);
} }
async function startMemorySample() {
await inspector.heap.enable();
await inspector.heap.startSampling();
// monitorMemory();
logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
setTimeout(async () => {
await stopMemorySample();
if (!done) {
await startMemorySample();
}
}, 30000);
}
async function init() { async function init() {
try { try {
if (argv.server) { if (argv.server) {

View File

@ -21,6 +21,7 @@ const argv = require('./argv');
const knex = require('./knex'); const knex = require('./knex');
const http = require('./utils/http'); const http = require('./utils/http');
const bulkInsert = require('./utils/bulk-insert'); const bulkInsert = require('./utils/bulk-insert');
const chunk = require('./utils/chunk');
const { get } = require('./utils/qu'); const { get } = require('./utils/qu');
const pipeline = util.promisify(stream.pipeline); const pipeline = util.promisify(stream.pipeline);
@ -63,10 +64,10 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse()) ? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
: chunks; : chunks;
const groupedMedias = lastPreferredChunks.map((chunk) => { const groupedMedias = lastPreferredChunks.map((mediaChunk) => {
// merge chunked medias into single media with grouped fallback priorities, // merge chunked medias into single media with grouped fallback priorities,
// so the first sources of each media is preferred over all second sources, etc. // so the first sources of each media is preferred over all second sources, etc.
const sources = chunk const sources = mediaChunk
.reduce((accSources, media) => { .reduce((accSources, media) => {
media.sources.forEach((source, index) => { media.sources.forEach((source, index) => {
if (!accSources[index]) { if (!accSources[index]) {
@ -82,8 +83,8 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
.flat(); .flat();
return { return {
id: chunk[0].id, id: mediaChunk[0].id,
role: chunk[0].role, role: mediaChunk[0].role,
sources, sources,
}; };
}); });
@ -235,22 +236,41 @@ async function findSourceDuplicates(baseMedias) {
.filter(Boolean); .filter(Boolean);
const [existingSourceMedia, existingExtractMedia] = await Promise.all([ const [existingSourceMedia, existingExtractMedia] = await Promise.all([
knex('media').whereIn('source', sourceUrls), // my try to check thousands of URLs at once, don't pass all of them to a single query
knex('media').whereIn('source_page', extractUrls), chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
const accUrls = await chain;
const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);
return [...accUrls, ...existingUrls];
}, []),
chunk(extractUrls).reduce(async (chain, extractUrlsChunk) => {
const accUrls = await chain;
const existingUrls = await knex('media').whereIn('source_page', extractUrlsChunk);
return [...accUrls, ...existingUrls];
}, []),
]); ]);
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source'); const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page'); const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
return { existingSourceMediaByUrl, existingExtractMediaByUrl }; return {
existingSourceMediaByUrl,
existingExtractMediaByUrl,
};
} }
async function findHashDuplicates(medias) { async function findHashDuplicates(medias) {
const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean); const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean);
const existingHashMediaEntries = await knex('media').whereIn('hash', hashes); const existingHashMediaEntries = await chunk(hashes, 2).reduce(async (chain, hashesChunk) => {
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash'); const accHashes = await chain;
const existingHashes = await knex('media').whereIn('hash', hashesChunk);
return [...accHashes, ...existingHashes];
}, []);
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]); const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => { const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
@ -600,11 +620,11 @@ async function fetchSource(source, baseMedia) {
const hashStream = new stream.PassThrough(); const hashStream = new stream.PassThrough();
let size = 0; let size = 0;
hashStream.on('data', (chunk) => { hashStream.on('data', (streamChunk) => {
size += chunk.length; size += streamChunk.length;
if (hasherReady) { if (hasherReady) {
hasher.write(chunk); hasher.write(streamChunk);
} }
}); });