Filtering invalid actors from releases before storing.
This commit is contained in:
parent
2801732f57
commit
c37d4ad01f
|
@ -114,29 +114,37 @@ function getAverage(items) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function toBaseActors(actorsOrNames, release) {
|
function toBaseActors(actorsOrNames, release) {
|
||||||
return actorsOrNames.map((actorOrName) => {
|
if (!actorsOrNames) {
|
||||||
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
const name = capitalize(baseName);
|
const baseActors = actorsOrNames
|
||||||
const slug = slugify(name);
|
.filter(actorOrName => actorOrName && (typeof actorOrName === 'string' || actorOrName.name))
|
||||||
|
.map((actorOrName) => {
|
||||||
|
const [baseName, entryId] = (actorOrName.name || actorOrName).split(':');
|
||||||
|
|
||||||
const baseActor = {
|
const name = capitalize(baseName);
|
||||||
name,
|
const slug = slugify(name);
|
||||||
slug,
|
|
||||||
entryId: entryId || actorOrName.entryId || null,
|
|
||||||
entity: release?.entity?.parent || release?.entity || null,
|
|
||||||
hasProfile: !!actorOrName.name, // actor contains profile information
|
|
||||||
};
|
|
||||||
|
|
||||||
if (actorOrName.name) {
|
const baseActor = {
|
||||||
return {
|
name,
|
||||||
...actorOrName,
|
slug,
|
||||||
...baseActor,
|
entryId: entryId || actorOrName.entryId || null,
|
||||||
|
entity: release?.entity?.parent || release?.entity || null,
|
||||||
|
hasProfile: !!actorOrName.name, // actor contains profile information
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
return baseActor;
|
if (actorOrName.name) {
|
||||||
});
|
return {
|
||||||
|
...actorOrName,
|
||||||
|
...baseActor,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return baseActor;
|
||||||
|
});
|
||||||
|
|
||||||
|
return baseActors;
|
||||||
}
|
}
|
||||||
|
|
||||||
function curateActor(actor, withDetails = false, isProfile = false) {
|
function curateActor(actor, withDetails = false, isProfile = false) {
|
||||||
|
@ -832,4 +840,5 @@ module.exports = {
|
||||||
fetchActor,
|
fetchActor,
|
||||||
scrapeActors,
|
scrapeActors,
|
||||||
searchActors,
|
searchActors,
|
||||||
|
toBaseActors,
|
||||||
};
|
};
|
||||||
|
|
|
@ -30,20 +30,20 @@ const { argv } = yargs
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'web',
|
alias: 'web',
|
||||||
})
|
})
|
||||||
.option('networks', {
|
.option('include-networks', {
|
||||||
describe: 'Network to scrape all channels from (overrides configuration)',
|
describe: 'Network to scrape all channels from (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'network',
|
alias: ['include-network', 'networks', 'network'],
|
||||||
})
|
})
|
||||||
.option('exclude-networks', {
|
.option('exclude-networks', {
|
||||||
describe: 'Network not to scrape any channels from (overrides configuration)',
|
describe: 'Network not to scrape any channels from (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'exclude-network',
|
alias: 'exclude-network',
|
||||||
})
|
})
|
||||||
.option('channels', {
|
.option('include-channels', {
|
||||||
describe: 'Channel to scrape (overrides configuration)',
|
describe: 'Channel to scrape (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'channel',
|
alias: ['include-channel', 'channels', 'channel'],
|
||||||
})
|
})
|
||||||
.option('exclude-channels', {
|
.option('exclude-channels', {
|
||||||
describe: 'Channel not to scrape (overrides configuration)',
|
describe: 'Channel not to scrape (overrides configuration)',
|
||||||
|
|
|
@ -64,8 +64,8 @@ async function fetchIncludedEntities() {
|
||||||
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
|
includeAll: !argv.networks && !argv.channels && !config.include?.networks && !config.include?.channels,
|
||||||
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
|
includedNetworks: argv.networks || (!argv.channels && config.include?.networks) || [],
|
||||||
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
|
includedChannels: argv.channels || (!argv.networks && config.include?.channels) || [],
|
||||||
excludedNetworks: argv.excludeNetworks || config.exclude?.networks || [],
|
excludedNetworks: argv.excludeNetworks || config.exclude?.networks.filter(network => !argv.networks?.includes(network)) || [], // ignore explicitly included networks
|
||||||
excludedChannels: argv.excludeChannels || config.exclude?.channels || [],
|
excludedChannels: argv.excludeChannels || config.exclude?.channels.filter(channel => !argv.channels?.includes(channel)) || [], // ignore explicitly included channels
|
||||||
};
|
};
|
||||||
|
|
||||||
const rawNetworks = await knex.raw(`
|
const rawNetworks = await knex.raw(`
|
||||||
|
|
|
@ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) {
|
||||||
.limit(faker.random.number({ min: 2, max: 15 }))
|
.limit(faker.random.number({ min: 2, max: 15 }))
|
||||||
.pluck('name');
|
.pluck('name');
|
||||||
|
|
||||||
release.actors = actors(release);
|
// release.actors = actors(release);
|
||||||
|
release.actors = [null, 'Charles Darwin'];
|
||||||
release.title = title(release);
|
release.title = title(release);
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
|
|
@ -9,7 +9,7 @@ const slugify = require('./utils/slugify');
|
||||||
const bulkInsert = require('./utils/bulk-insert');
|
const bulkInsert = require('./utils/bulk-insert');
|
||||||
const resolvePlace = require('./utils/resolve-place');
|
const resolvePlace = require('./utils/resolve-place');
|
||||||
const { formatDate } = require('./utils/qu');
|
const { formatDate } = require('./utils/qu');
|
||||||
const { associateActors, scrapeActors } = require('./actors');
|
const { associateActors, scrapeActors, toBaseActors } = require('./actors');
|
||||||
const { associateReleaseTags } = require('./tags');
|
const { associateReleaseTags } = require('./tags');
|
||||||
const { curateEntity } = require('./entities');
|
const { curateEntity } = require('./entities');
|
||||||
const { associateReleaseMedia } = require('./media');
|
const { associateReleaseMedia } = require('./media');
|
||||||
|
@ -291,7 +291,8 @@ async function storeScenes(releases) {
|
||||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||||
|
|
||||||
const releasesWithChannels = await attachChannelEntities(releases);
|
const releasesWithChannels = await attachChannelEntities(releases);
|
||||||
const releasesWithStudios = await attachStudios(releasesWithChannels);
|
const releasesWithBaseActors = releasesWithChannels.map(release => ({ ...release, actors: toBaseActors(release.actors) }));
|
||||||
|
const releasesWithStudios = await attachStudios(releasesWithBaseActors);
|
||||||
|
|
||||||
// uniqueness is entity ID + entry ID, filter uniques after adding entities
|
// uniqueness is entity ID + entry ID, filter uniques after adding entities
|
||||||
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
|
const { uniqueReleases, duplicateReleases, duplicateReleaseEntries } = await filterDuplicateReleases(releasesWithStudios);
|
||||||
|
|
Loading…
Reference in New Issue