Added chunking to media duplicate queries to prevent overloading parameters. Added DP Diva to Perv City (coming soon).

2022-04-02 00:32:23 +02:00 · 2022-04-02 00:32:23 +02:00 · 5e499c3685
parent 17e5ce71b2
commit 5e499c3685
28 changed files with 65 additions and 29 deletions
--- a/assets/js/config/default.js
+++ b/assets/js/config/default.js
@ -12,6 +12,7 @@ export default {
 	selectableTags: [
 		'airtight',
 		'anal',
+		'bdsm',
 		'blowbang',
 		'blowjob',
 		'creampie',
--- a/migrations/20220330230122_stats.js
+++ b/migrations/20220330230122_stats.js
@ -0,0 +1,25 @@
+exports.up = async (knex) => knex.raw(`
+	CREATE MATERIALIZED VIEW entities_stats
+	AS
+		WITH RECURSIVE relations AS (
+			SELECT entities.id, entities.parent_id, count(releases.id) AS releases_count, count(releases.id) AS total_count
+			FROM entities
+			LEFT JOIN releases ON releases.entity_id = entities.id
+			GROUP BY entities.id
+
+			UNION ALL
+
+			SELECT entities.id AS entity_id, count(releases.id) AS releases_count, count(releases.id) + relations.total_count AS total_count
+			FROM entities
+			INNER JOIN relations ON relations.id = entities.parent_id
+			LEFT JOIN releases ON releases.entity_id = entities.id
+			GROUP BY entities.id
+		)
+
+		SELECT relations.id AS entity_id, relations.releases_count
+		FROM relations;
+`);
+
+exports.down = async (knex) => knex.raw(`
+	DROP MATERIALIZED VIEW entities_stats;
+`);
--- a/public/img/logos/pervcity/dpdiva.png
+++ b/public/img/logos/pervcity/dpdiva.png
--- a/public/img/logos/pervcity/lazy/analoverdose.png
+++ b/public/img/logos/pervcity/lazy/analoverdose.png
--- a/public/img/logos/pervcity/lazy/bangingbeauties.png
+++ b/public/img/logos/pervcity/lazy/bangingbeauties.png
--- a/public/img/logos/pervcity/lazy/chocolatebjs.png
+++ b/public/img/logos/pervcity/lazy/chocolatebjs.png
--- a/public/img/logos/pervcity/lazy/dpdiva.png
+++ b/public/img/logos/pervcity/lazy/dpdiva.png
--- a/public/img/logos/pervcity/lazy/favicon.png
+++ b/public/img/logos/pervcity/lazy/favicon.png
--- a/public/img/logos/pervcity/lazy/favicon_dark.png
+++ b/public/img/logos/pervcity/lazy/favicon_dark.png
--- a/public/img/logos/pervcity/lazy/favicon_light.png
+++ b/public/img/logos/pervcity/lazy/favicon_light.png
--- a/public/img/logos/pervcity/lazy/network.png
+++ b/public/img/logos/pervcity/lazy/network.png
--- a/public/img/logos/pervcity/lazy/oraloverdose.png
+++ b/public/img/logos/pervcity/lazy/oraloverdose.png
--- a/public/img/logos/pervcity/lazy/pervcity.png
+++ b/public/img/logos/pervcity/lazy/pervcity.png
--- a/public/img/logos/pervcity/lazy/upherasshole.png
+++ b/public/img/logos/pervcity/lazy/upherasshole.png
--- a/public/img/logos/pervcity/thumbs/analoverdose.png
+++ b/public/img/logos/pervcity/thumbs/analoverdose.png
--- a/public/img/logos/pervcity/thumbs/bangingbeauties.png
+++ b/public/img/logos/pervcity/thumbs/bangingbeauties.png
--- a/public/img/logos/pervcity/thumbs/chocolatebjs.png
+++ b/public/img/logos/pervcity/thumbs/chocolatebjs.png
--- a/public/img/logos/pervcity/thumbs/dpdiva.png
+++ b/public/img/logos/pervcity/thumbs/dpdiva.png
--- a/public/img/logos/pervcity/thumbs/favicon.png
+++ b/public/img/logos/pervcity/thumbs/favicon.png
--- a/public/img/logos/pervcity/thumbs/favicon_dark.png
+++ b/public/img/logos/pervcity/thumbs/favicon_dark.png
--- a/public/img/logos/pervcity/thumbs/favicon_light.png
+++ b/public/img/logos/pervcity/thumbs/favicon_light.png
--- a/public/img/logos/pervcity/thumbs/network.png
+++ b/public/img/logos/pervcity/thumbs/network.png
--- a/public/img/logos/pervcity/thumbs/oraloverdose.png
+++ b/public/img/logos/pervcity/thumbs/oraloverdose.png
--- a/public/img/logos/pervcity/thumbs/pervcity.png
+++ b/public/img/logos/pervcity/thumbs/pervcity.png
--- a/public/img/logos/pervcity/thumbs/upherasshole.png
+++ b/public/img/logos/pervcity/thumbs/upherasshole.png
--- a/seeds/02_sites.js
+++ b/seeds/02_sites.js
@ -6917,6 +6917,13 @@ const sites = [
 			tourId: 9,
 		},
 	},
+	{
+		slug: 'dpdiva',
+		name: 'DP Diva',
+		url: 'http://dpdiva.com',
+		parent: 'pervcity',
+		tags: ['dp', 'anal'],
+	},
 	// PIERRE WOODMAN
 	{
 		slug: 'woodmancastingx',
--- a/src/app.js
+++ b/src/app.js
@ -85,23 +85,6 @@ async function startMemorySample(snapshotTriggers = []) {
 	}, config.memorySampling.sampleDuration);
 }

-async function startMemorySample() {
-	await inspector.heap.enable();
-	await inspector.heap.startSampling();
-
-	// monitorMemory();
-
-	logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
-
-	setTimeout(async () => {
-		await stopMemorySample();
-
-		if (!done) {
-			await startMemorySample();
-		}
-	}, 30000);
-}
-
 async function init() {
 	try {
 		if (argv.server) {
--- a/src/media.js
+++ b/src/media.js
@ -21,6 +21,7 @@ const argv = require('./argv');
 const knex = require('./knex');
 const http = require('./utils/http');
 const bulkInsert = require('./utils/bulk-insert');
+const chunk = require('./utils/chunk');
 const { get } = require('./utils/qu');

 const pipeline = util.promisify(stream.pipeline);
@ -63,10 +64,10 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
 		? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
 		: chunks;

-	const groupedMedias = lastPreferredChunks.map((chunk) => {
+	const groupedMedias = lastPreferredChunks.map((mediaChunk) => {
 		// merge chunked medias into single media with grouped fallback priorities,
 		// so the first sources of each media is preferred over all second sources, etc.
-		const sources = chunk
+		const sources = mediaChunk
 			.reduce((accSources, media) => {
 				media.sources.forEach((source, index) => {
 					if (!accSources[index]) {
@ -82,8 +83,8 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
 			.flat();

 		return {
-			id: chunk[0].id,
-			role: chunk[0].role,
+			id: mediaChunk[0].id,
+			role: mediaChunk[0].role,
 			sources,
 		};
 	});
@ -235,22 +236,41 @@ async function findSourceDuplicates(baseMedias) {
 		.filter(Boolean);

 	const [existingSourceMedia, existingExtractMedia] = await Promise.all([
-		knex('media').whereIn('source', sourceUrls),
-		knex('media').whereIn('source_page', extractUrls),
+		// my try to check thousands of URLs at once, don't pass all of them to a single query
+		chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
+			const accUrls = await chain;
+			const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);
+
+			return [...accUrls, ...existingUrls];
+		}, []),
+		chunk(extractUrls).reduce(async (chain, extractUrlsChunk) => {
+			const accUrls = await chain;
+			const existingUrls = await knex('media').whereIn('source_page', extractUrlsChunk);
+
+			return [...accUrls, ...existingUrls];
+		}, []),
 	]);

 	const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
 	const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');

-	return { existingSourceMediaByUrl, existingExtractMediaByUrl };
+	return {
+		existingSourceMediaByUrl,
+		existingExtractMediaByUrl,
+	};
 }

 async function findHashDuplicates(medias) {
 	const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean);

-	const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
-	const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
+	const existingHashMediaEntries = await chunk(hashes, 2).reduce(async (chain, hashesChunk) => {
+		const accHashes = await chain;
+		const existingHashes = await knex('media').whereIn('hash', hashesChunk);

+		return [...accHashes, ...existingHashes];
+	}, []);
+
+	const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
 	const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);

 	const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
@ -600,11 +620,11 @@ async function fetchSource(source, baseMedia) {
 			const hashStream = new stream.PassThrough();
 			let size = 0;

-			hashStream.on('data', (chunk) => {
-				size += chunk.length;
+			hashStream.on('data', (streamChunk) => {
+				size += streamChunk.length;

 				if (hasherReady) {
-					hasher.write(chunk);
+					hasher.write(streamChunk);
 				}
 			});