Using batch insert for various actor scraping inserts.

This commit is contained in:
DebaucheryLibrarian
2026-03-01 04:49:01 +01:00
parent 198f08cb3a
commit b89f25405a

View File

@@ -22,6 +22,7 @@ const actorScrapers = require('./scrapers/scrapers').actors;
const argv = require('./argv'); const argv = require('./argv');
const include = require('./utils/argv-include')(argv); const include = require('./utils/argv-include')(argv);
const bulkInsert = require('./utils/bulk-insert'); const bulkInsert = require('./utils/bulk-insert');
const batchInsert = require('./utils/batch-insert');
const chunk = require('./utils/chunk'); const chunk = require('./utils/chunk');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
@@ -46,6 +47,7 @@ const commonContext = {
slugify, slugify,
omit, omit,
unprint, unprint,
batchInsert,
}; };
const hairColors = { const hairColors = {
@@ -544,7 +546,7 @@ async function curateProfile(profile, actor) {
async function insertProfiles(newProfiles) { async function insertProfiles(newProfiles) {
if (newProfiles.length > 0) { if (newProfiles.length > 0) {
const entries = await bulkInsert('actors_profiles', newProfiles); const entries = await batchInsert('actors_profiles', newProfiles);
logger.info(`Saved ${newProfiles.length} actor profiles`); logger.info(`Saved ${newProfiles.length} actor profiles`);
@@ -606,10 +608,7 @@ async function upsertProfiles(profiles) {
})); }));
if (avatars.length > 0) { if (avatars.length > 0) {
await knex('actors_avatars') await batchInsert('actors_avatars', avatars, { conflict: false });
.insert(avatars)
.onConflict()
.ignore();
} }
} }
} }
@@ -759,7 +758,8 @@ function curateSocials(socials, platformsByHostname) {
async function associateSocials(profiles) { async function associateSocials(profiles) {
const { platformsByHostname } = await actorsCommon; const { platformsByHostname } = await actorsCommon;
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id])); const profileEntryChunks = await Promise.all(chunk(profiles).map((profilesChunk) => knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profilesChunk.map((profile) => [profile.actorId, profile.entity.id]))));
const profileEntries = profileEntryChunks.flat();
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => { const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
if (!acc[profileEntry.actor_id]) { if (!acc[profileEntry.actor_id]) {
@@ -784,16 +784,14 @@ async function associateSocials(profiles) {
return; return;
} }
await knex('actors_socials') await batchInsert('actors_socials', curateSocials(profile.social, platformsByHostname).map((social) => ({
.insert(curateSocials(profile.social, platformsByHostname).map((social) => ({ platform: social.platform,
platform: social.platform, handle: social.handle,
handle: social.handle, url: social.url,
url: social.url, actor_id: profile.actorId,
actor_id: profile.actorId, })), {
// profile_id: profileId, conflict: false,
}))) });
.onConflict()
.ignore();
}, Promise.resolve()); }, Promise.resolve());
} }