Using batch insert for various actor scraping inserts.

This commit is contained in:
DebaucheryLibrarian
2026-03-01 04:49:01 +01:00
parent 198f08cb3a
commit b89f25405a

View File

@@ -22,6 +22,7 @@ const actorScrapers = require('./scrapers/scrapers').actors;
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const bulkInsert = require('./utils/bulk-insert');
const batchInsert = require('./utils/batch-insert');
const chunk = require('./utils/chunk');
const logger = require('./logger')(__filename);
@@ -46,6 +47,7 @@ const commonContext = {
slugify,
omit,
unprint,
batchInsert,
};
const hairColors = {
@@ -544,7 +546,7 @@ async function curateProfile(profile, actor) {
async function insertProfiles(newProfiles) {
if (newProfiles.length > 0) {
const entries = await bulkInsert('actors_profiles', newProfiles);
const entries = await batchInsert('actors_profiles', newProfiles);
logger.info(`Saved ${newProfiles.length} actor profiles`);
@@ -606,10 +608,7 @@ async function upsertProfiles(profiles) {
}));
if (avatars.length > 0) {
await knex('actors_avatars')
.insert(avatars)
.onConflict()
.ignore();
await batchInsert('actors_avatars', avatars, { conflict: false });
}
}
}
@@ -759,7 +758,8 @@ function curateSocials(socials, platformsByHostname) {
async function associateSocials(profiles) {
const { platformsByHostname } = await actorsCommon;
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
const profileEntryChunks = await Promise.all(chunk(profiles).map((profilesChunk) => knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profilesChunk.map((profile) => [profile.actorId, profile.entity.id]))));
const profileEntries = profileEntryChunks.flat();
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
if (!acc[profileEntry.actor_id]) {
@@ -784,16 +784,14 @@ async function associateSocials(profiles) {
return;
}
await knex('actors_socials')
.insert(curateSocials(profile.social, platformsByHostname).map((social) => ({
platform: social.platform,
handle: social.handle,
url: social.url,
actor_id: profile.actorId,
// profile_id: profileId,
})))
.onConflict()
.ignore();
await batchInsert('actors_socials', curateSocials(profile.social, platformsByHostname).map((social) => ({
platform: social.platform,
handle: social.handle,
url: social.url,
actor_id: profile.actorId,
})), {
conflict: false,
});
}, Promise.resolve());
}