Generalized Adult Empire subsite scraper, added West Coast Productions.

This commit is contained in:
DebaucheryLibrarian
2021-08-30 01:13:32 +02:00
parent b8657cb6e6
commit 7bd858f96b
97 changed files with 161 additions and 47 deletions

View File

@@ -242,7 +242,7 @@ async function findSourceDuplicates(baseMedias) {
const existingSourceMediaByUrl = itemsByKey(existingSourceMedia, 'source');
const existingExtractMediaByUrl = itemsByKey(existingExtractMedia, 'source_page');
return [existingSourceMediaByUrl, existingExtractMediaByUrl];
return { existingSourceMediaByUrl, existingExtractMediaByUrl };
}
async function findHashDuplicates(medias) {
@@ -285,7 +285,7 @@ async function findHashDuplicates(medias) {
}))
.concat(selfDuplicateMedias);
return [selfUniqueHashMedias, existingHashMedias];
return { uniqueHashMedias: selfUniqueHashMedias, existingHashMedias };
}
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
@@ -741,7 +741,7 @@ function curateMediaEntry(media, index) {
async function storeMedias(baseMedias, options) {
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
const [existingSourceMediaByUrl, existingExtractMediaByUrl] = await findSourceDuplicates(baseMedias);
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
const fetchedMedias = await Promise.map(
baseMedias,
@@ -749,7 +749,7 @@ async function storeMedias(baseMedias, options) {
{ concurrency: 100 }, // don't overload disk (or network, although this has its own throttling)
);
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias);
const { uniqueHashMedias, existingHashMedias } = await findHashDuplicates(fetchedMedias);
const savedMedias = await Promise.map(
uniqueHashMedias,
@@ -770,7 +770,7 @@ async function storeMedias(baseMedias, options) {
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
try {
await bulkInsert('media', newMediaEntries);
await bulkInsert('media', newMediaEntries, false);
return [...newMediaWithEntries, ...existingHashMedias];
} catch (error) {