Added chunking to media duplicate queries to prevent overloading parameters. Added DP Diva to Perv City (coming soon).

2022-04-02 00:32:23 +02:00 · 2022-04-02 00:32:23 +02:00 · 5e499c3685
parent 17e5ce71b2
commit 5e499c3685
28 changed files with 65 additions and 29 deletions
--- a/assets/js/config/default.js
+++ b/assets/js/config/default.js
@ -12,6 +12,7 @@ export default {
 	selectableTags: [
 		'airtight',
 		'anal',
 		'bdsm',
 		'blowbang',
 		'blowjob',
 		'creampie',
--- a/migrations/20220330230122_stats.js
+++ b/migrations/20220330230122_stats.js
@ -0,0 +1,25 @@
 exports.up = async (knex) => knex.raw(`
 	CREATE MATERIALIZED VIEW entities_stats
 	AS
 		WITH RECURSIVE relations AS (
 			SELECT entities.id, entities.parent_id, count(releases.id) AS releases_count, count(releases.id) AS total_count
 			FROM entities
 			LEFT JOIN releases ON releases.entity_id = entities.id
 			GROUP BY entities.id
 			UNION ALL
 			SELECT entities.id AS entity_id, count(releases.id) AS releases_count, count(releases.id) + relations.total_count AS total_count
 			FROM entities
 			INNER JOIN relations ON relations.id = entities.parent_id
 			LEFT JOIN releases ON releases.entity_id = entities.id
 			GROUP BY entities.id
 		)
 		SELECT relations.id AS entity_id, relations.releases_count
 		FROM relations;
 `);
 exports.down = async (knex) => knex.raw(`
 	DROP MATERIALIZED VIEW entities_stats;
 `);
--- a/public/img/logos/pervcity/dpdiva.png
+++ b/public/img/logos/pervcity/dpdiva.png
--- a/public/img/logos/pervcity/lazy/analoverdose.png
+++ b/public/img/logos/pervcity/lazy/analoverdose.png
--- a/public/img/logos/pervcity/lazy/bangingbeauties.png
+++ b/public/img/logos/pervcity/lazy/bangingbeauties.png
--- a/public/img/logos/pervcity/lazy/chocolatebjs.png
+++ b/public/img/logos/pervcity/lazy/chocolatebjs.png
--- a/public/img/logos/pervcity/lazy/dpdiva.png
+++ b/public/img/logos/pervcity/lazy/dpdiva.png
--- a/public/img/logos/pervcity/lazy/favicon.png
+++ b/public/img/logos/pervcity/lazy/favicon.png
--- a/public/img/logos/pervcity/lazy/favicon_dark.png
+++ b/public/img/logos/pervcity/lazy/favicon_dark.png
--- a/public/img/logos/pervcity/lazy/favicon_light.png
+++ b/public/img/logos/pervcity/lazy/favicon_light.png
--- a/public/img/logos/pervcity/lazy/network.png
+++ b/public/img/logos/pervcity/lazy/network.png
--- a/public/img/logos/pervcity/lazy/oraloverdose.png
+++ b/public/img/logos/pervcity/lazy/oraloverdose.png
--- a/public/img/logos/pervcity/lazy/pervcity.png
+++ b/public/img/logos/pervcity/lazy/pervcity.png
--- a/public/img/logos/pervcity/lazy/upherasshole.png
+++ b/public/img/logos/pervcity/lazy/upherasshole.png
--- a/public/img/logos/pervcity/thumbs/analoverdose.png
+++ b/public/img/logos/pervcity/thumbs/analoverdose.png
--- a/public/img/logos/pervcity/thumbs/bangingbeauties.png
+++ b/public/img/logos/pervcity/thumbs/bangingbeauties.png
--- a/public/img/logos/pervcity/thumbs/chocolatebjs.png
+++ b/public/img/logos/pervcity/thumbs/chocolatebjs.png
--- a/public/img/logos/pervcity/thumbs/dpdiva.png
+++ b/public/img/logos/pervcity/thumbs/dpdiva.png
--- a/public/img/logos/pervcity/thumbs/favicon.png
+++ b/public/img/logos/pervcity/thumbs/favicon.png
--- a/public/img/logos/pervcity/thumbs/favicon_dark.png
+++ b/public/img/logos/pervcity/thumbs/favicon_dark.png
--- a/public/img/logos/pervcity/thumbs/favicon_light.png
+++ b/public/img/logos/pervcity/thumbs/favicon_light.png
--- a/public/img/logos/pervcity/thumbs/network.png
+++ b/public/img/logos/pervcity/thumbs/network.png
--- a/public/img/logos/pervcity/thumbs/oraloverdose.png
+++ b/public/img/logos/pervcity/thumbs/oraloverdose.png
--- a/public/img/logos/pervcity/thumbs/pervcity.png
+++ b/public/img/logos/pervcity/thumbs/pervcity.png
--- a/public/img/logos/pervcity/thumbs/upherasshole.png
+++ b/public/img/logos/pervcity/thumbs/upherasshole.png
--- a/seeds/02_sites.js
+++ b/seeds/02_sites.js
@ -6917,6 +6917,13 @@ const sites = [
 			tourId: 9,
 		},
 	},
 	{
 		slug: 'dpdiva',
 		name: 'DP Diva',
 		url: 'http://dpdiva.com',
 		parent: 'pervcity',
 		tags: ['dp', 'anal'],
 	},
 	// PIERRE WOODMAN
 	{
 		slug: 'woodmancastingx',
--- a/src/app.js
+++ b/src/app.js
@ -85,23 +85,6 @@ async function startMemorySample(snapshotTriggers = []) {
 	}, config.memorySampling.sampleDuration);
 }
 async function startMemorySample() {
 	await inspector.heap.enable();
 	await inspector.heap.startSampling();
 	// monitorMemory();
 	logger.info(`Start heap sampling, memory usage: ${process.memoryUsage.rss() / 1000000} MB`);
 	setTimeout(async () => {
 		await stopMemorySample();
 		if (!done) {
 			await startMemorySample();
 		}
 	}, 30000);
 }
 async function init() {
 	try {
 		if (argv.server) {
--- a/src/media.js
+++ b/src/media.js
@ -21,6 +21,7 @@ const argv = require('./argv');
 const knex = require('./knex');
 const http = require('./utils/http');
 const bulkInsert = require('./utils/bulk-insert');
 const chunk = require('./utils/chunk');
 const { get } = require('./utils/qu');
 const pipeline = util.promisify(stream.pipeline);
@ -63,10 +64,10 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
 		? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
 		: chunks;
-	const groupedMedias = lastPreferredChunks.map((chunk) => {
+	const groupedMedias = lastPreferredChunks.map((mediaChunk) => {
 		// merge chunked medias into single media with grouped fallback priorities,
 		// so the first sources of each media is preferred over all second sources, etc.
-		const sources = chunk
+		const sources = mediaChunk
 			.reduce((accSources, media) => {
 				media.sources.forEach((source, index) => {
 					if (!accSources[index]) {
@ -82,8 +83,8 @@ function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
 			.flat();
 		return {
-			id: chunk[0].id,
+			id: mediaChunk[0].id,
-			role: chunk[0].role,
+			role: mediaChunk[0].role,
 			sources,
 		};
 	});
@ -235,22 +236,41 @@ async function findSourceDuplicates(baseMedias) {
 		.filter(Boolean);
 	const [existingSourceMedia, existingExtractMedia] = await Promise.all([
-		knex('media').whereIn('source', sourceUrls),
+		// my try to check thousands of URLs at once, don't pass all of them to a single query
-		knex('media').whereIn('source_page', extractUrls),
+		chunk(sourceUrls).reduce(async (chain, sourceUrlsChunk) => {
 			const accUrls = await chain;
 			const existingUrls = await knex('media').whereIn('source', sourceUrlsChunk);
 			return [...accUrls, ...existingUrls];
 		}, []),
 		chunk(extractUrls).reduce(async (chain, extractUrlsChunk) => {
 			const accUrls = await chain;
 			const existingUrls = await knex('media').whereIn('source_page', extractUrlsChunk);
 			return [...accUrls, ...existingUrls];
 		}, []),
 	]);
 	const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
 	const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
-	return { existingSourceMediaByUrl, existingExtractMediaByUrl };
+	return {
 		existingSourceMediaByUrl,
 		existingExtractMediaByUrl,
 	};
 }
 async function findHashDuplicates(medias) {
 	const hashes = medias.map((media) => media.meta?.hash || media.entry?.hash).filter(Boolean);
-	const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
+	const existingHashMediaEntries = await chunk(hashes, 2).reduce(async (chain, hashesChunk) => {
-	const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
+		const accHashes = await chain;
 		const existingHashes = await knex('media').whereIn('hash', hashesChunk);
 		return [...accHashes, ...existingHashes];
 	}, []);
 	const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
 	const uniqueHashMedias = medias.filter((media) => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
 	const { selfDuplicateMedias, selfUniqueMediasByHash } = uniqueHashMedias.reduce((acc, media) => {
@ -600,11 +620,11 @@ async function fetchSource(source, baseMedia) {
 			const hashStream = new stream.PassThrough();
 			let size = 0;
-			hashStream.on('data', (chunk) => {
+			hashStream.on('data', (streamChunk) => {
-				size += chunk.length;
+				size += streamChunk.length;
 				if (hasherReady) {
-					hasher.write(chunk);
+					hasher.write(streamChunk);
 				}
 			});