forked from DebaucheryLibrarian/traxxx
Filtering undefined scenes property from movies. Added movie page scraper to Elegant Angel.
This commit is contained in:
parent
7bfa5a6cc4
commit
a7d5bef93f
|
@ -43,6 +43,7 @@ export default {
|
|||
|
||||
.tiles {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, 15rem);
|
||||
grid-template-columns: repeat(auto-fill, minmax(30rem, 1fr));
|
||||
grid-gap: 1rem;
|
||||
}
|
||||
</style>
|
||||
|
|
|
@ -1,21 +1,27 @@
|
|||
<template>
|
||||
<div class="tile">
|
||||
<div class="cover">
|
||||
<div class="movie">
|
||||
<router-link
|
||||
:to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
|
||||
class="cover"
|
||||
>
|
||||
<img
|
||||
v-if="movie.covers[0]"
|
||||
:src="`/media/${movie.covers[0].thumbnail}`"
|
||||
class="front"
|
||||
>
|
||||
</router-link>
|
||||
|
||||
<img
|
||||
v-if="movie.covers[1]"
|
||||
:src="`/media/${movie.covers[1].thumbnail}`"
|
||||
class="back"
|
||||
<div class="info">
|
||||
<router-link
|
||||
:to="{ name: 'movie', params: { movieId: movie.id, movieSlug: movie.slug } }"
|
||||
class="title-link"
|
||||
>
|
||||
<h3 class="title">{{ movie.title }}</h3>
|
||||
</router-link>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="details">{{ movie.entity.name }}</div>
|
||||
<h3 class="title">{{ movie.title }}</h3>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
|
@ -39,6 +45,15 @@ export default {
|
|||
font-size: 0;
|
||||
}
|
||||
|
||||
.movie {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.title-link {
|
||||
color: var(--text);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.details {
|
||||
color: var(--text-light);
|
||||
background: var(--profile);
|
||||
|
@ -48,23 +63,11 @@ export default {
|
|||
}
|
||||
|
||||
.cover {
|
||||
width: 12rem;
|
||||
|
||||
img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.back {
|
||||
display: none;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
.back {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.front {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.title {
|
||||
|
|
|
@ -48,7 +48,7 @@ const routes = [
|
|||
name: 'scene',
|
||||
},
|
||||
{
|
||||
path: '/movie/:releaseId/:releaseSlug?',
|
||||
path: '/movie/:movieId/:movieSlug?',
|
||||
component: Release,
|
||||
name: 'movie',
|
||||
},
|
||||
|
|
|
@ -600,75 +600,6 @@ exports.up = knex => Promise.resolve()
|
|||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies', (table) => {
|
||||
table.increments('id', 16);
|
||||
|
||||
table.integer('entity_id', 12)
|
||||
.references('id')
|
||||
.inTable('entities')
|
||||
.notNullable();
|
||||
|
||||
table.integer('studio_id', 12)
|
||||
.references('id')
|
||||
.inTable('entities');
|
||||
|
||||
table.text('entry_id');
|
||||
table.unique(['entity_id', 'entry_id']);
|
||||
|
||||
table.text('url', 1000);
|
||||
table.text('title');
|
||||
table.text('slug');
|
||||
|
||||
table.timestamp('date');
|
||||
table.index('date');
|
||||
|
||||
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
|
||||
.defaultTo('day');
|
||||
|
||||
table.text('description');
|
||||
|
||||
table.boolean('deep');
|
||||
table.text('deep_url', 1000);
|
||||
|
||||
table.text('comment');
|
||||
|
||||
table.integer('created_batch_id', 12)
|
||||
.references('id')
|
||||
.inTable('batches');
|
||||
|
||||
table.integer('updated_batch_id', 12)
|
||||
.references('id')
|
||||
.inTable('batches');
|
||||
|
||||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies_covers', (table) => {
|
||||
table.integer('release_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.text('media_id', 21)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media');
|
||||
|
||||
table.unique(['release_id', 'media_id']);
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies_trailers', (table) => {
|
||||
table.integer('movie_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.text('media_id', 21)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media');
|
||||
|
||||
table.unique('movie_id');
|
||||
}))
|
||||
.then(() => knex.schema.createTable('releases', (table) => {
|
||||
table.increments('id', 16);
|
||||
|
||||
|
@ -734,22 +665,6 @@ exports.up = knex => Promise.resolve()
|
|||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('releases_movies', (table) => {
|
||||
table.integer('movie_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.integer('scene_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('releases');
|
||||
|
||||
table.unique(['movie_id', 'scene_id']);
|
||||
|
||||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('releases_directors', (table) => {
|
||||
table.integer('release_id', 16)
|
||||
.notNullable()
|
||||
|
@ -846,6 +761,90 @@ exports.up = knex => Promise.resolve()
|
|||
.references('id')
|
||||
.inTable('releases');
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies', (table) => {
|
||||
table.increments('id', 16);
|
||||
|
||||
table.integer('entity_id', 12)
|
||||
.references('id')
|
||||
.inTable('entities')
|
||||
.notNullable();
|
||||
|
||||
table.integer('studio_id', 12)
|
||||
.references('id')
|
||||
.inTable('entities');
|
||||
|
||||
table.text('entry_id');
|
||||
table.unique(['entity_id', 'entry_id']);
|
||||
|
||||
table.text('url', 1000);
|
||||
table.text('title');
|
||||
table.text('slug');
|
||||
|
||||
table.timestamp('date');
|
||||
table.index('date');
|
||||
|
||||
table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second'])
|
||||
.defaultTo('day');
|
||||
|
||||
table.text('description');
|
||||
|
||||
table.boolean('deep');
|
||||
table.text('deep_url', 1000);
|
||||
|
||||
table.text('comment');
|
||||
|
||||
table.integer('created_batch_id', 12)
|
||||
.references('id')
|
||||
.inTable('batches');
|
||||
|
||||
table.integer('updated_batch_id', 12)
|
||||
.references('id')
|
||||
.inTable('batches');
|
||||
|
||||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies_scenes', (table) => {
|
||||
table.integer('movie_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.integer('scene_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('releases');
|
||||
|
||||
table.unique(['movie_id', 'scene_id']);
|
||||
|
||||
table.datetime('created_at')
|
||||
.defaultTo(knex.fn.now());
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies_covers', (table) => {
|
||||
table.integer('release_id', 16)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.text('media_id', 21)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media');
|
||||
|
||||
table.unique(['release_id', 'media_id']);
|
||||
}))
|
||||
.then(() => knex.schema.createTable('movies_trailers', (table) => {
|
||||
table.integer('release_id', 16)
|
||||
.unique()
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies');
|
||||
|
||||
table.text('media_id', 21)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('media');
|
||||
}))
|
||||
// SEARCH
|
||||
.then(() => { // eslint-disable-line arrow-body-style
|
||||
// allow vim fold
|
||||
|
@ -992,6 +991,7 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style
|
|||
DROP TABLE IF EXISTS releases_search CASCADE;
|
||||
|
||||
DROP TABLE IF EXISTS movies_covers CASCADE;
|
||||
DROP TABLE IF EXISTS movies_scenes CASCADE;
|
||||
DROP TABLE IF EXISTS movies_trailers CASCADE;
|
||||
|
||||
DROP TABLE IF EXISTS batches CASCADE;
|
||||
|
|
13
src/app.js
13
src/app.js
|
@ -8,7 +8,7 @@ const initServer = require('./web/server');
|
|||
const knex = require('./knex');
|
||||
const fetchUpdates = require('./updates');
|
||||
const { fetchScenes, fetchMovies } = require('./deep');
|
||||
const { storeReleases, storeMovies, updateReleasesSearch } = require('./store-releases');
|
||||
const { storeScenes, storeMovies, updateReleasesSearch } = require('./store-releases');
|
||||
const { scrapeActors } = require('./actors');
|
||||
const getFileEntries = require('./utils/file-entries');
|
||||
|
||||
|
@ -37,21 +37,22 @@ async function init() {
|
|||
? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])])
|
||||
: [...(updateBaseScenes || []), ...(actorBaseScenes || [])];
|
||||
|
||||
const sceneMovies = deepScenes && argv.movie && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
const sceneMovies = deepScenes && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]);
|
||||
|
||||
const movieScenes = deepMovies.map(movie => movie.scenes).flat().filter(Boolean);
|
||||
const deepMovieScenes = await fetchScenes(movieScenes);
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(util.inspect(deepScenes));
|
||||
console.log(util.inspect(deepMovies));
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
if (deepScenes.length > 0) {
|
||||
await storeReleases(deepScenes);
|
||||
if (deepScenes.length + deepMovieScenes.length > 0) {
|
||||
await storeScenes(deepScenes.concat(deepMovieScenes));
|
||||
}
|
||||
|
||||
console.log(deepMovies);
|
||||
|
||||
if (deepMovies.length > 0) {
|
||||
await storeMovies(deepMovies);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
|
@ -27,14 +28,30 @@ function scrapeAll(scenes, channel) {
|
|||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ query, html }, url) {
|
||||
function scrapeMovieScenes(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.cnt('.scene-title a');
|
||||
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.duration = query.number('.scene-length') * 60;
|
||||
release.actors = query.cnts('.scene-cast-list a');
|
||||
|
||||
release.poster = query.img('a img');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeRelease({ query, html }, url, channel, type = 'scene') {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.scene-page .description');
|
||||
release.title = query.cnt('.scene-page .description, .video-page .description');
|
||||
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
|
||||
release.duration = query.number('.release-date:last-child') * 60;
|
||||
|
||||
release.actors = query.all('.video-performer').map((el) => {
|
||||
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
|
||||
|
@ -48,8 +65,21 @@ async function scrapeScene({ query, html }, url) {
|
|||
};
|
||||
});
|
||||
|
||||
release.tags = query.cnts('.tags a');
|
||||
release.tags = query.cnts('.tags a, .categories a');
|
||||
release.studio = slugify(query.cnt('.studio span:last-child'), '');
|
||||
|
||||
if (type === 'scene') {
|
||||
release.director = query.text('.director');
|
||||
release.duration = query.number('.release-date:last-child') * 60;
|
||||
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
|
||||
}
|
||||
|
||||
if (type === 'movie') {
|
||||
release.director = query.cnt('.director a');
|
||||
release.covers = query.imgs('.carousel-item > img');
|
||||
|
||||
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
|
||||
}
|
||||
|
||||
release.photos = query.imgs('#dv_frames a > img').map(photo => [
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
|
||||
|
@ -70,7 +100,6 @@ async function scrapeScene({ query, html }, url) {
|
|||
}
|
||||
}
|
||||
|
||||
// console.log(release);
|
||||
return release;
|
||||
}
|
||||
|
||||
|
@ -116,7 +145,20 @@ async function fetchScene(url, channel) {
|
|||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
return scrapeRelease(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeRelease(res.item, url, channel, 'movie');
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
@ -139,4 +181,5 @@ module.exports = {
|
|||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchMovies,
|
||||
fetchMovie,
|
||||
};
|
||||
|
|
|
@ -215,7 +215,7 @@ async function updateReleasesSearch(releaseIds) {
|
|||
}
|
||||
}
|
||||
|
||||
async function storeReleases(releases) {
|
||||
async function storeScenes(releases) {
|
||||
if (releases.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
@ -256,9 +256,10 @@ async function storeReleases(releases) {
|
|||
}
|
||||
|
||||
async function storeMovies(movies) {
|
||||
const { uniqueReleases } = await filterDuplicateReleases(movies);
|
||||
const [batchId] = await knex('batches').insert({ comment: null }).returning('id');
|
||||
|
||||
const curatedMovieEntries = movies.map(release => curateReleaseEntry(release, batchId, null, 'movie'));
|
||||
const curatedMovieEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId, null, 'movie'));
|
||||
const storedMovies = await knex.batchInsert('movies', curatedMovieEntries).returning('*');
|
||||
|
||||
const moviesWithId = attachReleaseIds(movies, storedMovies);
|
||||
|
@ -269,7 +270,7 @@ async function storeMovies(movies) {
|
|||
}
|
||||
|
||||
module.exports = {
|
||||
storeReleases,
|
||||
storeScenes,
|
||||
storeMovies,
|
||||
updateReleasesSearch,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue