Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.

This commit is contained in:
2020-05-14 04:26:05 +02:00
parent f1eb29c713
commit 11eb66f834
178 changed files with 16594 additions and 16929 deletions

View File

@@ -4,54 +4,54 @@ const knex = require('../knex');
const logger = require('../logger')(__filename);
async function upsert(table, items, identifier = ['id'], _knex) {
const identifiers = Array.isArray(identifier) ? identifier : [identifier];
const identifiers = Array.isArray(identifier) ? identifier : [identifier];
const duplicates = await knex(table).whereIn(identifiers, items.map(item => identifiers.map(identifierX => item[identifierX])));
const duplicatesByIdentifiers = duplicates.reduce((acc, duplicate) => {
const duplicateIdentifier = identifiers.map(identifierX => duplicate[identifierX]).toString();
const duplicates = await knex(table).whereIn(identifiers, items.map(item => identifiers.map(identifierX => item[identifierX])));
const duplicatesByIdentifiers = duplicates.reduce((acc, duplicate) => {
const duplicateIdentifier = identifiers.map(identifierX => duplicate[identifierX]).toString();
return { ...acc, [duplicateIdentifier]: duplicate };
}, {});
return { ...acc, [duplicateIdentifier]: duplicate };
}, {});
const { insert, update } = items.reduce((acc, item) => {
const itemIdentifier = identifiers.map(identifierX => item[identifierX]).toString();
const { insert, update } = items.reduce((acc, item) => {
const itemIdentifier = identifiers.map(identifierX => item[identifierX]).toString();
if (duplicatesByIdentifiers[itemIdentifier]) {
acc.update.push(item);
return acc;
}
if (duplicatesByIdentifiers[itemIdentifier]) {
acc.update.push(item);
return acc;
}
acc.insert.push(item);
return acc;
}, {
insert: [],
update: [],
});
acc.insert.push(item);
return acc;
}, {
insert: [],
update: [],
});
if (knex) {
logger.debug(`${table}: Inserting ${insert.length}`);
logger.debug(`${table}: Updating ${update.length}`);
if (knex) {
logger.debug(`${table}: Inserting ${insert.length}`);
logger.debug(`${table}: Updating ${update.length}`);
const [inserted, updated] = await Promise.all([
knex(table).returning('*').insert(insert),
knex.transaction(async trx => Promise.all(update.map((item) => {
const clause = identifiers.reduce((acc, identifierX) => ({ ...acc, [identifierX]: item[identifierX] }), {});
const [inserted, updated] = await Promise.all([
knex(table).returning('*').insert(insert),
knex.transaction(async trx => Promise.all(update.map((item) => {
const clause = identifiers.reduce((acc, identifierX) => ({ ...acc, [identifierX]: item[identifierX] }), {});
return trx
.where(clause)
.update(item)
.into(table)
.returning('*');
}))),
]);
return trx
.where(clause)
.update(item)
.into(table)
.returning('*');
}))),
]);
return {
inserted: Array.isArray(inserted) ? inserted : [],
updated: updated.reduce((acc, updatedItems) => acc.concat(updatedItems), []),
};
}
return {
inserted: Array.isArray(inserted) ? inserted : [],
updated: updated.reduce((acc, updatedItems) => acc.concat(updatedItems), []),
};
}
return { insert, update };
return { insert, update };
}
module.exports = upsert;