Rescraping upcoming scenes. Fixed language and scene deep scraping for Dorcel scraper.

This commit is contained in:
DebaucheryLibrarian
2021-06-02 03:27:32 +02:00
parent 42791c528e
commit c979173422
15 changed files with 105 additions and 15 deletions

View File

@@ -315,11 +315,33 @@ async function storeScenes(releases) {
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
const curatedNewReleaseEntries = await Promise.all(uniqueReleases.map(release => curateReleaseEntry(release, batchId)));
const storedReleases = await bulkInsert('releases', curatedNewReleaseEntries);
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
const releasesWithId = attachReleaseIds([].concat(uniqueReleases, duplicateReleases), [].concat(storedReleaseEntries, duplicateReleaseEntries));
const uniqueReleasesWithId = attachReleaseIds(uniqueReleases, storedReleaseEntries);
const duplicateReleasesWithId = attachReleaseIds(duplicateReleases, duplicateReleaseEntries);
const releasesWithId = uniqueReleasesWithId.concat(duplicateReleasesWithId);
try {
await knex.raw(`
UPDATE releases
SET url = COALESCE(new.url, releases.url),
date = COALESCE(new.date, releases.date),
title = COALESCE(new.title, releases.title),
description = COALESCE(new.description, releases.description),
duration = COALESCE(new.duration, releases.duration),
deep = new.url IS NOT NULL,
updated_at = NOW()
FROM json_to_recordset(:scenes)
AS new(id int, url text, date timestamptz, title text, description text, duration integer, deep boolean)
WHERE releases.id = new.id;
`, {
scenes: JSON.stringify(duplicateReleasesWithId),
});
} catch (error) {
console.log(error);
}
const [actors] = await Promise.all([
associateActors(releasesWithId, batchId),