Filtering undefined scenes property from movies. Added movie page scraper to Elegant Angel.

This commit is contained in:
DebaucheryLibrarian 2020-08-08 18:10:59 +02:00
parent 7bfa5a6cc4
commit a7d5bef93f
7 changed files with 177 additions and 128 deletions

View File

@ -43,6 +43,7 @@ export default {
.tiles {
display: grid;
grid-template-columns: repeat(auto-fill, 15rem);
grid-template-columns: repeat(auto-fill, minmax(30rem, 1fr));
grid-gap: 1rem;
}
</style>

View File

@ -1,21 +1,27 @@
<template>
<div class="tile">
<div class="cover">
<img
v-if="movie.covers[0]"
:src="`/media/${movie.covers[0].thumbnail}`"
class="front"
<div class="movie">
<router-link
:to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
class="cover"
>
<img
v-if="movie.covers[0]"
:src="`/media/${movie.covers[0].thumbnail}`"
>
</router-link>
<img
v-if="movie.covers[1]"
:src="`/media/${movie.covers[1].thumbnail}`"
class="back"
>
<div class="info">
<router-link
:to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
class="title-link"
>
<h3 class="title">{{ movie.title }}</h3>
</router-link>
</div>
</div>
<div class="details">{{ movie.entity.name }}</div>
<h3 class="title">{{ movie.title }}</h3>
</div>
</template>
@ -39,6 +45,15 @@ export default {
font-size: 0;
}
.movie {
display: flex;
}
.title-link {
color: var(--text);
text-decoration: none;
}
.details {
color: var(--text-light);
background: var(--profile);
@ -48,23 +63,11 @@ export default {
}
.cover {
width: 12rem;
img {
width: 100%;
}
.back {
display: none;
}
&:hover {
.back {
display: block;
}
.front {
display: none;
}
}
}
.title {

View File

@ -48,7 +48,7 @@ const routes = [
name: 'scene',
},
{
path: '/movie/:releaseId/:releaseSlug?',
path: '/movie/:movieId/:movieSlug?',
component: Release,
name: 'movie',
},

View File

@ -600,75 +600,6 @@ exports.up = knex => Promise.resolve()
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies', (table) => {
table.increments('id', 16);
table.integer('entity_id', 12)
.references('id')
.inTable('entities')
.notNullable();
table.integer('studio_id', 12)
.references('id')
.inTable('entities');
table.text('entry_id');
table.unique(['entity_id', 'entry_id']);
table.text('url', 1000);
table.text('title');
table.text('slug');
table.timestamp('date');
table.index('date');
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
.defaultTo('day');
table.text('description');
table.boolean('deep');
table.text('deep_url', 1000);
table.text('comment');
table.integer('created_batch_id', 12)
.references('id')
.inTable('batches');
table.integer('updated_batch_id', 12)
.references('id')
.inTable('batches');
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_covers', (table) => {
table.integer('release_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique(['release_id', 'media_id']);
}))
.then(() => knex.schema.createTable('movies_trailers', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique('movie_id');
}))
.then(() => knex.schema.createTable('releases', (table) => {
table.increments('id', 16);
@ -734,22 +665,6 @@ exports.up = knex => Promise.resolve()
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('releases_movies', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.integer('scene_id', 16)
.notNullable()
.references('id')
.inTable('releases');
table.unique(['movie_id', 'scene_id']);
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('releases_directors', (table) => {
table.integer('release_id', 16)
.notNullable()
@ -846,6 +761,90 @@ exports.up = knex => Promise.resolve()
.references('id')
.inTable('releases');
}))
.then(() => knex.schema.createTable('movies', (table) => {
table.increments('id', 16);
table.integer('entity_id', 12)
.references('id')
.inTable('entities')
.notNullable();
table.integer('studio_id', 12)
.references('id')
.inTable('entities');
table.text('entry_id');
table.unique(['entity_id', 'entry_id']);
table.text('url', 1000);
table.text('title');
table.text('slug');
table.timestamp('date');
table.index('date');
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
.defaultTo('day');
table.text('description');
table.boolean('deep');
table.text('deep_url', 1000);
table.text('comment');
table.integer('created_batch_id', 12)
.references('id')
.inTable('batches');
table.integer('updated_batch_id', 12)
.references('id')
.inTable('batches');
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_scenes', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.integer('scene_id', 16)
.notNullable()
.references('id')
.inTable('releases');
table.unique(['movie_id', 'scene_id']);
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_covers', (table) => {
table.integer('release_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique(['release_id', 'media_id']);
}))
.then(() => knex.schema.createTable('movies_trailers', (table) => {
table.integer('release_id', 16)
.unique()
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
}))
// SEARCH
.then(() => { // eslint-disable-line arrow-body-style
// allow vim fold
@ -992,6 +991,7 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style
DROP TABLE IF EXISTS releases_search CASCADE;
DROP TABLE IF EXISTS movies_covers CASCADE;
DROP TABLE IF EXISTS movies_scenes CASCADE;
DROP TABLE IF EXISTS movies_trailers CASCADE;
DROP TABLE IF EXISTS batches CASCADE;

View File

@ -8,7 +8,7 @@ const initServer = require('./web/server');
const knex = require('./knex');
const fetchUpdates = require('./updates');
const { fetchScenes, fetchMovies } = require('./deep');
const { storeReleases, storeMovies, updateReleasesSearch } = require('./store-releases');
const { storeScenes, storeMovies, updateReleasesSearch } = require('./store-releases');
const { scrapeActors } = require('./actors');
const getFileEntries = require('./utils/file-entries');
@ -37,21 +37,22 @@ async function init() {
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
const sceneMovies = deepScenes && argv.movie && deepScenes.map(scene => scene.movie).filter(Boolean);
const sceneMovies = deepScenes && deepScenes.map(scene => scene.movie).filter(Boolean);
const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]);
const movieScenes = deepMovies.map(movie => movie.scenes).flat().filter(Boolean);
const deepMovieScenes = await fetchScenes(movieScenes);
if (argv.inspect) {
console.log(util.inspect(deepScenes));
console.log(util.inspect(deepMovies));
}
if (argv.save) {
if (deepScenes.length > 0) {
await storeReleases(deepScenes);
if (deepScenes.length + deepMovieScenes.length > 0) {
await storeScenes(deepScenes.concat(deepMovieScenes));
}
console.log(deepMovies);
if (deepMovies.length > 0) {
await storeMovies(deepMovies);
}

View File

@ -1,6 +1,7 @@
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
@ -27,14 +28,30 @@ function scrapeAll(scenes, channel) {
});
}
async function scrapeScene({ query, html }, url) {
function scrapeMovieScenes(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.scene-title a');
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
release.duration = query.number('.scene-length') * 60;
release.actors = query.cnts('.scene-cast-list a');
release.poster = query.img('a img');
return release;
});
}
async function scrapeRelease({ query, html }, url, channel, type = 'scene') {
const release = {};
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
release.title = query.cnt('.scene-page .description');
release.title = query.cnt('.scene-page .description, .video-page .description');
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
release.duration = query.number('.release-date:last-child') * 60;
release.actors = query.all('.video-performer').map((el) => {
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
@ -48,8 +65,21 @@ async function scrapeScene({ query, html }, url) {
};
});
release.tags = query.cnts('.tags a');
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
release.tags = query.cnts('.tags a, .categories a');
release.studio = slugify(query.cnt('.studio span:last-child'), '');
if (type === 'scene') {
release.director = query.text('.director');
release.duration = query.number('.release-date:last-child') * 60;
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
}
if (type === 'movie') {
release.director = query.cnt('.director a');
release.covers = query.imgs('.carousel-item > img');
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
}
release.photos = query.imgs('#dv_frames a > img').map(photo => [
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
@ -70,7 +100,6 @@ async function scrapeScene({ query, html }, url) {
}
}
// console.log(release);
return release;
}
@ -116,7 +145,20 @@ async function fetchScene(url, channel) {
});
if (res.ok) {
return scrapeScene(res.item, url, channel);
return scrapeRelease(res.item, url, channel);
}
return res.status;
}
async function fetchMovie(url, channel) {
const res = await qu.get(url, null, null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeRelease(res.item, url, channel, 'movie');
}
return res.status;
@ -139,4 +181,5 @@ module.exports = {
fetchLatest,
fetchScene,
fetchMovies,
fetchMovie,
};

View File

@ -215,7 +215,7 @@ async function updateReleasesSearch(releaseIds) {
}
}
async function storeReleases(releases) {
async function storeScenes(releases) {
if (releases.length === 0) {
return [];
}
@ -256,9 +256,10 @@ async function storeReleases(releases) {
}
async function storeMovies(movies) {
const { uniqueReleases } = await filterDuplicateReleases(movies);
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const curatedMovieEntries = movies.map(release => curateReleaseEntry(release, batchId, null, 'movie'));
const curatedMovieEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId, null, 'movie'));
const storedMovies = await knex.batchInsert('movies', curatedMovieEntries).returning('*');
const moviesWithId = attachReleaseIds(movies, storedMovies);
@ -269,7 +270,7 @@ async function storeMovies(movies) {
}
module.exports = {
storeReleases,
storeScenes,
storeMovies,
updateReleasesSearch,
};