Added movie support to MindGeek scraper.

This commit is contained in:
DebaucheryLibrarian
2022-03-04 23:31:59 +01:00
parent 50b7f521b5
commit c6e977f842
11 changed files with 122 additions and 50 deletions

View File

@@ -20,6 +20,7 @@ const scrapers = require('./scrapers/scrapers').actors;
const argv = require('./argv');
const include = require('./utils/argv-include')(argv);
const bulkInsert = require('./utils/bulk-insert');
const chunk = require('./utils/chunk');
const logger = require('./logger')(__filename);
const { toBaseReleases } = require('./deep');
@@ -1048,33 +1049,42 @@ async function flushProfiles(actorIdsOrNames) {
logger.info(`Removed ${deleteCount} profiles`);
}
async function deleteActors(actorIdsOrNames) {
const actors = await knex('actors')
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
.orWhere((builder) => {
builder
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
.whereNull('entity_id');
});
async function deleteActors(allActorIdsOrNames) {
const deleteCounts = await Promise.map(chunk(allActorIdsOrNames), async (actorIdsOrNames) => {
const actors = await knex('actors')
.whereIn('id', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'number'))
.orWhere((builder) => {
builder
.whereIn('name', actorIdsOrNames.filter((idOrName) => typeof idOrName === 'string'))
.whereNull('entity_id');
});
const actorIds = actors.map((actor) => actor.id);
const actorIds = actors.map((actor) => actor.id);
const sceneIds = await knex('releases_actors')
.select('releases.id')
.whereIn('actor_id', actorIds)
.leftJoin('releases', 'releases.id', 'releases_actors.release_id')
.pluck('id');
const sceneIds = await knex('releases_actors')
.select('releases.id')
.whereIn('actor_id', actorIds)
.leftJoin('releases', 'releases.id', 'releases_actors.release_id')
.pluck('id');
const [deletedScenesCount, deletedActorsCount] = await Promise.all([
deleteScenes(sceneIds),
knex('actors')
.whereIn('id', actorIds)
.delete(),
]);
const [deletedScenesCount, deletedActorsCount] = await Promise.all([
deleteScenes(sceneIds),
knex('actors')
.whereIn('id', actorIds)
.delete(),
]);
return { deletedScenesCount, deletedActorsCount };
}, { concurrency: 10 });
const deletedActorsCount = deleteCounts.reduce((acc, count) => acc + count.deletedActorsCount, 0);
const deletedScenesCount = deleteCounts.reduce((acc, count) => acc + count.deletedScenesCount, 0);
await flushOrphanedMedia();
logger.info(`Removed ${deletedActorsCount} actors with ${deletedScenesCount} scenes`);
return deletedActorsCount;
}
async function flushActors() {