Improved movie scraping.

This commit is contained in:
DebaucheryLibrarian 2021-01-29 02:38:05 +01:00
parent 4d89256a4c
commit b7aaeada45
2 changed files with 25 additions and 36 deletions

View File

@ -8,7 +8,7 @@ const initServer = require('./web/server');
const knex = require('./knex');
const fetchUpdates = require('./updates');
const { fetchScenes, fetchMovies } = require('./deep');
const { storeScenes, storeMovies, updateReleasesSearch } = require('./store-releases');
const { storeScenes, storeMovies, updateReleasesSearch, associateMovieScenes } = require('./store-releases');
const { scrapeActors, flushActors, flushProfiles, interpolateProfiles } = require('./actors');
const { flushEntities } = require('./entities');
const { deleteScenes, deleteMovies, flushScenes, flushBatches } = require('./releases');
@ -79,7 +79,7 @@ async function init() {
const sceneMovies = deepScenes ? deepScenes.map(scene => ({ ...scene.movie, entity: scene.entity })).filter(Boolean) : [];
const deepMovies = argv.sceneMovies || argv.movie ? await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]) : sceneMovies;
const movieScenes = argv.movieScenes ? deepMovies.map(movie => movie.scenes?.map(scene => ({ ...scene, entity: movie.entity }))).flat().filter(Boolean) : [];
const movieScenes = argv.movieScenes ? deepMovies.map(movie => movie.scenes?.map(scene => ({ ...scene, movie, entity: movie.entity }))).flat().filter(Boolean) : [];
const deepMovieScenes = argv.deep ? await fetchScenes(movieScenes) : movieScenes;
if (argv.report) {
@ -88,15 +88,10 @@ async function init() {
}
if (argv.save) {
const storedScenes = deepScenes.length > 0 || deepMovieScenes.length > 0
? await storeScenes(deepScenes)
: [];
const storedMovies = await storeMovies(deepMovies);
const storedScenes = await storeScenes([...(deepScenes || []), ...(deepMovieScenes || [])]);
if (deepMovies.length > 0) {
const storedMovieScenes = await storeScenes(deepMovieScenes);
await storeMovies(deepMovies, [...(storedMovieScenes || []), ...storedScenes]);
}
await associateMovieScenes(storedMovies, storedScenes);
}
knex.destroy();

View File

@ -290,7 +290,7 @@ async function storeClips(releases) {
}
async function storeScenes(releases) {
if (releases.length === 0) {
if (!releases || releases.length === 0) {
return [];
}
@ -331,42 +331,36 @@ async function storeScenes(releases) {
}
async function associateMovieScenes(movies, movieScenes) {
const movieScenesByEntityIdAndEntryId = movieScenes.reduce((acc, scene) => ({
const moviesByEntityIdAndEntryId = movies.reduce((acc, movie) => ({
...acc,
[scene.entity.id]: {
...acc[scene.entity.id],
[scene.entryId]: scene,
[movie.entity.id]: {
...acc[movie.entity.id],
[movie.entryId]: movie,
},
}), {});
console.log('movies', movies, movieScenes);
const associations = movies.map((movie) => {
if (!movie.scenes || !movie.id) {
const associations = movieScenes.map((scene) => {
if (!scene.movie) {
return null;
}
console.log(movie, movie.scenes);
const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId];
return movie.scenes.map((scene) => {
const movieScene = movieScenesByEntityIdAndEntryId[movie.entity.id]?.[scene.entryId];
if (sceneMovie?.id) {
return {
movie_id: sceneMovie.id,
scene_id: scene.id,
};
}
if (movieScene?.id) {
return {
movie_id: movie.id,
scene_id: movieScene.id,
};
}
return null;
});
}).flat().filter(Boolean);
return null;
}).filter(Boolean);
await bulkInsert('movies_scenes', associations, false);
}
async function storeMovies(movies, movieScenes) {
if (movies.length === 0) {
async function storeMovies(movies) {
if (!movies || movies.length === 0) {
return [];
}
@ -378,13 +372,13 @@ async function storeMovies(movies, movieScenes) {
const storedMovies = await bulkInsert('movies', curatedMovieEntries, ['entity_id', 'entry_id'], true);
const moviesWithId = attachReleaseIds(movies, storedMovies);
await associateMovieScenes(moviesWithId, movieScenes);
await associateReleaseMedia(moviesWithId, 'movie');
return storedMovies;
return moviesWithId;
}
module.exports = {
associateMovieScenes,
storeScenes,
storeMovies,
updateReleasesSearch,