Filtering undefined scenes property from movies. Added movie page scraper to Elegant Angel.

This commit is contained in:
DebaucheryLibrarian 2020-08-08 18:10:59 +02:00
parent 7bfa5a6cc4
commit a7d5bef93f
7 changed files with 177 additions and 128 deletions

View File

@ -43,6 +43,7 @@ export default {
.tiles { .tiles {
display: grid; display: grid;
grid-template-columns: repeat(auto-fill, 15rem); grid-template-columns: repeat(auto-fill, minmax(30rem, 1fr));
grid-gap: 1rem;
} }
</style> </style>

View File

@ -1,21 +1,27 @@
<template> <template>
<div class="tile"> <div class="tile">
<div class="cover"> <div class="movie">
<router-link
:to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
class="cover"
>
<img <img
v-if="movie.covers[0]" v-if="movie.covers[0]"
:src="`/media/${movie.covers[0].thumbnail}`" :src="`/media/${movie.covers[0].thumbnail}`"
class="front"
> >
</router-link>
<img <div class="info">
v-if="movie.covers[1]" <router-link
:src="`/media/${movie.covers[1].thumbnail}`" :to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
class="back" class="title-link"
> >
<h3 class="title">{{ movie.title }}</h3>
</router-link>
</div>
</div> </div>
<div class="details">{{ movie.entity.name }}</div> <div class="details">{{ movie.entity.name }}</div>
<h3 class="title">{{ movie.title }}</h3>
</div> </div>
</template> </template>
@ -39,6 +45,15 @@ export default {
font-size: 0; font-size: 0;
} }
.movie {
display: flex;
}
.title-link {
color: var(--text);
text-decoration: none;
}
.details { .details {
color: var(--text-light); color: var(--text-light);
background: var(--profile); background: var(--profile);
@ -48,23 +63,11 @@ export default {
} }
.cover { .cover {
width: 12rem;
img { img {
width: 100%; width: 100%;
} }
.back {
display: none;
}
&:hover {
.back {
display: block;
}
.front {
display: none;
}
}
} }
.title { .title {

View File

@ -48,7 +48,7 @@ const routes = [
name: 'scene', name: 'scene',
}, },
{ {
path: '/movie/:releaseId/:releaseSlug?', path: '/movie/:movieId/:movieSlug?',
component: Release, component: Release,
name: 'movie', name: 'movie',
}, },

View File

@ -600,75 +600,6 @@ exports.up = knex => Promise.resolve()
table.datetime('created_at') table.datetime('created_at')
.defaultTo(knex.fn.now()); .defaultTo(knex.fn.now());
})) }))
.then(() => knex.schema.createTable('movies', (table) => {
table.increments('id', 16);
table.integer('entity_id', 12)
.references('id')
.inTable('entities')
.notNullable();
table.integer('studio_id', 12)
.references('id')
.inTable('entities');
table.text('entry_id');
table.unique(['entity_id', 'entry_id']);
table.text('url', 1000);
table.text('title');
table.text('slug');
table.timestamp('date');
table.index('date');
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
.defaultTo('day');
table.text('description');
table.boolean('deep');
table.text('deep_url', 1000);
table.text('comment');
table.integer('created_batch_id', 12)
.references('id')
.inTable('batches');
table.integer('updated_batch_id', 12)
.references('id')
.inTable('batches');
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_covers', (table) => {
table.integer('release_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique(['release_id', 'media_id']);
}))
.then(() => knex.schema.createTable('movies_trailers', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique('movie_id');
}))
.then(() => knex.schema.createTable('releases', (table) => { .then(() => knex.schema.createTable('releases', (table) => {
table.increments('id', 16); table.increments('id', 16);
@ -734,22 +665,6 @@ exports.up = knex => Promise.resolve()
table.datetime('created_at') table.datetime('created_at')
.defaultTo(knex.fn.now()); .defaultTo(knex.fn.now());
})) }))
.then(() => knex.schema.createTable('releases_movies', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.integer('scene_id', 16)
.notNullable()
.references('id')
.inTable('releases');
table.unique(['movie_id', 'scene_id']);
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('releases_directors', (table) => { .then(() => knex.schema.createTable('releases_directors', (table) => {
table.integer('release_id', 16) table.integer('release_id', 16)
.notNullable() .notNullable()
@ -846,6 +761,90 @@ exports.up = knex => Promise.resolve()
.references('id') .references('id')
.inTable('releases'); .inTable('releases');
})) }))
.then(() => knex.schema.createTable('movies', (table) => {
table.increments('id', 16);
table.integer('entity_id', 12)
.references('id')
.inTable('entities')
.notNullable();
table.integer('studio_id', 12)
.references('id')
.inTable('entities');
table.text('entry_id');
table.unique(['entity_id', 'entry_id']);
table.text('url', 1000);
table.text('title');
table.text('slug');
table.timestamp('date');
table.index('date');
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
.defaultTo('day');
table.text('description');
table.boolean('deep');
table.text('deep_url', 1000);
table.text('comment');
table.integer('created_batch_id', 12)
.references('id')
.inTable('batches');
table.integer('updated_batch_id', 12)
.references('id')
.inTable('batches');
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_scenes', (table) => {
table.integer('movie_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.integer('scene_id', 16)
.notNullable()
.references('id')
.inTable('releases');
table.unique(['movie_id', 'scene_id']);
table.datetime('created_at')
.defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('movies_covers', (table) => {
table.integer('release_id', 16)
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
table.unique(['release_id', 'media_id']);
}))
.then(() => knex.schema.createTable('movies_trailers', (table) => {
table.integer('release_id', 16)
.unique()
.notNullable()
.references('id')
.inTable('movies');
table.text('media_id', 21)
.notNullable()
.references('id')
.inTable('media');
}))
// SEARCH // SEARCH
.then(() => { // eslint-disable-line arrow-body-style .then(() => { // eslint-disable-line arrow-body-style
// allow vim fold // allow vim fold
@ -992,6 +991,7 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style
DROP TABLE IF EXISTS releases_search CASCADE; DROP TABLE IF EXISTS releases_search CASCADE;
DROP TABLE IF EXISTS movies_covers CASCADE; DROP TABLE IF EXISTS movies_covers CASCADE;
DROP TABLE IF EXISTS movies_scenes CASCADE;
DROP TABLE IF EXISTS movies_trailers CASCADE; DROP TABLE IF EXISTS movies_trailers CASCADE;
DROP TABLE IF EXISTS batches CASCADE; DROP TABLE IF EXISTS batches CASCADE;

View File

@ -8,7 +8,7 @@ const initServer = require('./web/server');
const knex = require('./knex'); const knex = require('./knex');
const fetchUpdates = require('./updates'); const fetchUpdates = require('./updates');
const { fetchScenes, fetchMovies } = require('./deep'); const { fetchScenes, fetchMovies } = require('./deep');
const { storeReleases, storeMovies, updateReleasesSearch } = require('./store-releases'); const { storeScenes, storeMovies, updateReleasesSearch } = require('./store-releases');
const { scrapeActors } = require('./actors'); const { scrapeActors } = require('./actors');
const getFileEntries = require('./utils/file-entries'); const getFileEntries = require('./utils/file-entries');
@ -37,21 +37,22 @@ async function init() {
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])]) ? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])]; : [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
const sceneMovies = deepScenes && argv.movie && deepScenes.map(scene => scene.movie).filter(Boolean); const sceneMovies = deepScenes && deepScenes.map(scene => scene.movie).filter(Boolean);
const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]); const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]);
const movieScenes = deepMovies.map(movie => movie.scenes).flat().filter(Boolean);
const deepMovieScenes = await fetchScenes(movieScenes);
if (argv.inspect) { if (argv.inspect) {
console.log(util.inspect(deepScenes)); console.log(util.inspect(deepScenes));
console.log(util.inspect(deepMovies)); console.log(util.inspect(deepMovies));
} }
if (argv.save) { if (argv.save) {
if (deepScenes.length > 0) { if (deepScenes.length + deepMovieScenes.length > 0) {
await storeReleases(deepScenes); await storeScenes(deepScenes.concat(deepMovieScenes));
} }
console.log(deepMovies);
if (deepMovies.length > 0) { if (deepMovies.length > 0) {
await storeMovies(deepMovies); await storeMovies(deepMovies);
} }

View File

@ -1,6 +1,7 @@
'use strict'; 'use strict';
const qu = require('../utils/q'); const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) { function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
@ -27,14 +28,30 @@ function scrapeAll(scenes, channel) {
}); });
} }
async function scrapeScene({ query, html }, url) { function scrapeMovieScenes(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.scene-title a');
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
release.duration = query.number('.scene-length') * 60;
release.actors = query.cnts('.scene-cast-list a');
release.poster = query.img('a img');
return release;
});
}
async function scrapeRelease({ query, html }, url, channel, type = 'scene') {
const release = {}; const release = {};
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1]; release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
release.title = query.cnt('.scene-page .description'); release.title = query.cnt('.scene-page .description, .video-page .description');
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/); release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
release.duration = query.number('.release-date:last-child') * 60;
release.actors = query.all('.video-performer').map((el) => { release.actors = query.all('.video-performer').map((el) => {
const avatar = qu.query.img(el, 'img', 'data-bgsrc'); const avatar = qu.query.img(el, 'img', 'data-bgsrc');
@ -48,8 +65,21 @@ async function scrapeScene({ query, html }, url) {
}; };
}); });
release.tags = query.cnts('.tags a'); release.tags = query.cnts('.tags a, .categories a');
release.studio = slugify(query.cnt('.studio span:last-child'), '');
if (type === 'scene') {
release.director = query.text('.director');
release.duration = query.number('.release-date:last-child') * 60;
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"'); release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
}
if (type === 'movie') {
release.director = query.cnt('.director a');
release.covers = query.imgs('.carousel-item > img');
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
}
release.photos = query.imgs('#dv_frames a > img').map(photo => [ release.photos = query.imgs('#dv_frames a > img').map(photo => [
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`), photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
@ -70,7 +100,6 @@ async function scrapeScene({ query, html }, url) {
} }
} }
// console.log(release);
return release; return release;
} }
@ -116,7 +145,20 @@ async function fetchScene(url, channel) {
}); });
if (res.ok) { if (res.ok) {
return scrapeScene(res.item, url, channel); return scrapeRelease(res.item, url, channel);
}
return res.status;
}
async function fetchMovie(url, channel) {
const res = await qu.get(url, null, null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeRelease(res.item, url, channel, 'movie');
} }
return res.status; return res.status;
@ -139,4 +181,5 @@ module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchMovies, fetchMovies,
fetchMovie,
}; };

View File

@ -215,7 +215,7 @@ async function updateReleasesSearch(releaseIds) {
} }
} }
async function storeReleases(releases) { async function storeScenes(releases) {
if (releases.length === 0) { if (releases.length === 0) {
return []; return [];
} }
@ -256,9 +256,10 @@ async function storeReleases(releases) {
} }
async function storeMovies(movies) { async function storeMovies(movies) {
const { uniqueReleases } = await filterDuplicateReleases(movies);
const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
const curatedMovieEntries = movies.map(release => curateReleaseEntry(release, batchId, null, 'movie')); const curatedMovieEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId, null, 'movie'));
const storedMovies = await knex.batchInsert('movies', curatedMovieEntries).returning('*'); const storedMovies = await knex.batchInsert('movies', curatedMovieEntries).returning('*');
const moviesWithId = attachReleaseIds(movies, storedMovies); const moviesWithId = attachReleaseIds(movies, storedMovies);
@ -269,7 +270,7 @@ async function storeMovies(movies) {
} }
module.exports = { module.exports = {
storeReleases, storeScenes,
storeMovies, storeMovies,
updateReleasesSearch, updateReleasesSearch,
}; };