Using unprint as default, marked to-be-updated scrapers as deprecated. Merging movie tags and movie scene tags for manticore movies table. Removed poster set to null in deep merge; annotate if it has purpose. Refactored Brad Montana scraper.

This commit is contained in:
DebaucheryLibrarian
2024-08-20 02:33:42 +02:00
parent f81ec6f393
commit d3a978c501
29 changed files with 116 additions and 113 deletions

View File

@@ -199,11 +199,11 @@ function filterInternalDuplicateReleases(releases) {
.flat();
}
async function filterDuplicateReleases(releases) {
async function filterDuplicateReleases(releases, domain = 'releases') {
const internalUniqueReleases = filterInternalDuplicateReleases(releases);
const internalUniqueReleaseChunks = chunk(internalUniqueReleases);
const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex('releases')
const duplicateReleaseEntryChunks = await Promise.map(internalUniqueReleaseChunks, async (internalUniqueReleasesChunk) => knex(domain)
.whereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk.map((release) => [release.entryId, release.entity.id]))
.orWhereIn(['entry_id', 'entity_id'], internalUniqueReleasesChunk
// scene IDs shared across network, mark as duplicate so scene can be updated with channel if only available on release day (i.e. Perv City)
@@ -349,7 +349,7 @@ async function storeMovies(movies, useBatchId) {
return [];
}
const { uniqueReleases } = await filterDuplicateReleases(movies);
const { uniqueReleases } = await filterDuplicateReleases(movies, 'movies');
const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id');
const curatedMovieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'movie')));
@@ -357,9 +357,10 @@ async function storeMovies(movies, useBatchId) {
const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true);
const moviesWithId = attachReleaseIds(movies, storedMovies);
await updateMovieSearch(moviesWithId.map((movie) => movie.id));
await associateReleaseMedia(moviesWithId, 'movie');
await associateReleaseTags(moviesWithId, 'movie');
await updateMovieSearch(moviesWithId.map((movie) => movie.id));
await associateReleaseMedia(moviesWithId, 'movie');
return moviesWithId;
}
@@ -369,7 +370,7 @@ async function storeSeries(series, useBatchId) {
return [];
}
const { uniqueReleases } = await filterDuplicateReleases(series);
const { uniqueReleases } = await filterDuplicateReleases(series, 'series');
const [{ id: batchId }] = useBatchId ? [{ id: useBatchId }] : await knex('batches').insert({ showcased: argv.showcased, comment: null }).returning('id');
const curatedSerieEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId, null, 'serie')));
@@ -395,7 +396,7 @@ async function storeScenes(releases, useBatchId) {
const releasesWithStudios = await attachStudios(releasesWithBaseActors);
// uniqueness is entity ID + entry ID, filter uniques after adding entities
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios, 'releases');
const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map((release) => curateReleaseEntry(release, batchId)));
const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries);
@@ -433,6 +434,7 @@ async function storeScenes(releases, useBatchId) {
await associateSerieScenes(storedSeries, releasesWithId);
await associateDirectors(releasesWithId, batchId); // some directors may also be actors, don't associate at the same time
await updateSceneSearch(releasesWithId.map((release) => release.id));
// media is more error-prone, associate separately