From a7d5bef93fc254acfda1f68b44328a96882f2f49 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sat, 8 Aug 2020 18:10:59 +0200 Subject: [PATCH] Filtering undefined scenes property from movies. Added movie page scraper to Elegant Angel. --- assets/components/movies/movies.vue | 3 +- assets/components/movies/tile.vue | 53 ++++---- assets/js/router.js | 2 +- migrations/20190325001339_releases.js | 170 +++++++++++++------------- src/app.js | 13 +- src/scrapers/elegantangel.js | 57 +++++++-- src/store-releases.js | 7 +- 7 files changed, 177 insertions(+), 128 deletions(-) diff --git a/assets/components/movies/movies.vue b/assets/components/movies/movies.vue index a5e3fad9..678079a8 100644 --- a/assets/components/movies/movies.vue +++ b/assets/components/movies/movies.vue @@ -43,6 +43,7 @@ export default { .tiles { display: grid; - grid-template-columns: repeat(auto-fill, 15rem); + grid-template-columns: repeat(auto-fill, minmax(30rem, 1fr)); + grid-gap: 1rem; } diff --git a/assets/components/movies/tile.vue b/assets/components/movies/tile.vue index a004d443..45881d4e 100644 --- a/assets/components/movies/tile.vue +++ b/assets/components/movies/tile.vue @@ -1,21 +1,27 @@ @@ -39,6 +45,15 @@ export default { font-size: 0; } +.movie { + display: flex; +} + +.title-link { + color: var(--text); + text-decoration: none; +} + .details { color: var(--text-light); background: var(--profile); @@ -48,23 +63,11 @@ export default { } .cover { + width: 12rem; + img { width: 100%; } - - .back { - display: none; - } - - &:hover { - .back { - display: block; - } - - .front { - display: none; - } - } } .title { diff --git a/assets/js/router.js b/assets/js/router.js index d6d6d478..6303a195 100644 --- a/assets/js/router.js +++ b/assets/js/router.js @@ -48,7 +48,7 @@ const routes = [ name: 'scene', }, { - path: '/movie/:releaseId/:releaseSlug?', + path: '/movie/:movieId/:movieSlug?', component: Release, name: 'movie', }, diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 8960fbc0..de54d1a6 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -600,75 +600,6 @@ exports.up = knex => Promise.resolve() table.datetime('created_at') .defaultTo(knex.fn.now()); })) - .then(() => knex.schema.createTable('movies', (table) => { - table.increments('id', 16); - - table.integer('entity_id', 12) - .references('id') - .inTable('entities') - .notNullable(); - - table.integer('studio_id', 12) - .references('id') - .inTable('entities'); - - table.text('entry_id'); - table.unique(['entity_id', 'entry_id']); - - table.text('url', 1000); - table.text('title'); - table.text('slug'); - - table.timestamp('date'); - table.index('date'); - - table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second']) - .defaultTo('day'); - - table.text('description'); - - table.boolean('deep'); - table.text('deep_url', 1000); - - table.text('comment'); - - table.integer('created_batch_id', 12) - .references('id') - .inTable('batches'); - - table.integer('updated_batch_id', 12) - .references('id') - .inTable('batches'); - - table.datetime('created_at') - .defaultTo(knex.fn.now()); - })) - .then(() => knex.schema.createTable('movies_covers', (table) => { - table.integer('release_id', 16) - .notNullable() - .references('id') - .inTable('movies'); - - table.text('media_id', 21) - .notNullable() - .references('id') - .inTable('media'); - - table.unique(['release_id', 'media_id']); - })) - .then(() => knex.schema.createTable('movies_trailers', (table) => { - table.integer('movie_id', 16) - .notNullable() - .references('id') - .inTable('movies'); - - table.text('media_id', 21) - .notNullable() - .references('id') - .inTable('media'); - - table.unique('movie_id'); - })) .then(() => knex.schema.createTable('releases', (table) => { table.increments('id', 16); @@ -734,22 +665,6 @@ exports.up = knex => Promise.resolve() table.datetime('created_at') .defaultTo(knex.fn.now()); })) - .then(() => knex.schema.createTable('releases_movies', (table) => { - table.integer('movie_id', 16) - .notNullable() - .references('id') - .inTable('movies'); - - table.integer('scene_id', 16) - .notNullable() - .references('id') - .inTable('releases'); - - table.unique(['movie_id', 'scene_id']); - - table.datetime('created_at') - .defaultTo(knex.fn.now()); - })) .then(() => knex.schema.createTable('releases_directors', (table) => { table.integer('release_id', 16) .notNullable() @@ -846,6 +761,90 @@ exports.up = knex => Promise.resolve() .references('id') .inTable('releases'); })) + .then(() => knex.schema.createTable('movies', (table) => { + table.increments('id', 16); + + table.integer('entity_id', 12) + .references('id') + .inTable('entities') + .notNullable(); + + table.integer('studio_id', 12) + .references('id') + .inTable('entities'); + + table.text('entry_id'); + table.unique(['entity_id', 'entry_id']); + + table.text('url', 1000); + table.text('title'); + table.text('slug'); + + table.timestamp('date'); + table.index('date'); + + table.enum('date_precision', ['year', 'month', 'day', 'hour', 'minute', 'second']) + .defaultTo('day'); + + table.text('description'); + + table.boolean('deep'); + table.text('deep_url', 1000); + + table.text('comment'); + + table.integer('created_batch_id', 12) + .references('id') + .inTable('batches'); + + table.integer('updated_batch_id', 12) + .references('id') + .inTable('batches'); + + table.datetime('created_at') + .defaultTo(knex.fn.now()); + })) + .then(() => knex.schema.createTable('movies_scenes', (table) => { + table.integer('movie_id', 16) + .notNullable() + .references('id') + .inTable('movies'); + + table.integer('scene_id', 16) + .notNullable() + .references('id') + .inTable('releases'); + + table.unique(['movie_id', 'scene_id']); + + table.datetime('created_at') + .defaultTo(knex.fn.now()); + })) + .then(() => knex.schema.createTable('movies_covers', (table) => { + table.integer('release_id', 16) + .notNullable() + .references('id') + .inTable('movies'); + + table.text('media_id', 21) + .notNullable() + .references('id') + .inTable('media'); + + table.unique(['release_id', 'media_id']); + })) + .then(() => knex.schema.createTable('movies_trailers', (table) => { + table.integer('release_id', 16) + .unique() + .notNullable() + .references('id') + .inTable('movies'); + + table.text('media_id', 21) + .notNullable() + .references('id') + .inTable('media'); + })) // SEARCH .then(() => { // eslint-disable-line arrow-body-style // allow vim fold @@ -992,6 +991,7 @@ exports.down = (knex) => { // eslint-disable-line arrow-body-style DROP TABLE IF EXISTS releases_search CASCADE; DROP TABLE IF EXISTS movies_covers CASCADE; + DROP TABLE IF EXISTS movies_scenes CASCADE; DROP TABLE IF EXISTS movies_trailers CASCADE; DROP TABLE IF EXISTS batches CASCADE; diff --git a/src/app.js b/src/app.js index 0f7fdbd3..38c615ac 100644 --- a/src/app.js +++ b/src/app.js @@ -8,7 +8,7 @@ const initServer = require('./web/server'); const knex = require('./knex'); const fetchUpdates = require('./updates'); const { fetchScenes, fetchMovies } = require('./deep'); -const { storeReleases, storeMovies, updateReleasesSearch } = require('./store-releases'); +const { storeScenes, storeMovies, updateReleasesSearch } = require('./store-releases'); const { scrapeActors } = require('./actors'); const getFileEntries = require('./utils/file-entries'); @@ -37,21 +37,22 @@ async function init() { ? await fetchScenes([...(sceneUrls), ...(updateBaseScenes || []), ...(actorBaseScenes || [])]) : [...(updateBaseScenes || []), ...(actorBaseScenes || [])]; - const sceneMovies = deepScenes && argv.movie && deepScenes.map(scene => scene.movie).filter(Boolean); + const sceneMovies = deepScenes && deepScenes.map(scene => scene.movie).filter(Boolean); const deepMovies = await fetchMovies([...(argv.movie || []), ...(sceneMovies || [])]); + const movieScenes = deepMovies.map(movie => movie.scenes).flat().filter(Boolean); + const deepMovieScenes = await fetchScenes(movieScenes); + if (argv.inspect) { console.log(util.inspect(deepScenes)); console.log(util.inspect(deepMovies)); } if (argv.save) { - if (deepScenes.length > 0) { - await storeReleases(deepScenes); + if (deepScenes.length + deepMovieScenes.length > 0) { + await storeScenes(deepScenes.concat(deepMovieScenes)); } - console.log(deepMovies); - if (deepMovies.length > 0) { await storeMovies(deepMovies); } diff --git a/src/scrapers/elegantangel.js b/src/scrapers/elegantangel.js index e95df5ea..bfb6eddf 100644 --- a/src/scrapers/elegantangel.js +++ b/src/scrapers/elegantangel.js @@ -1,6 +1,7 @@ 'use strict'; const qu = require('../utils/q'); +const slugify = require('../utils/slugify'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { @@ -27,14 +28,30 @@ function scrapeAll(scenes, channel) { }); } -async function scrapeScene({ query, html }, url) { +function scrapeMovieScenes(scenes) { + return scenes.map(({ query }) => { + const release = {}; + + release.title = query.cnt('.scene-title a'); + release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' }); + release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; + + release.duration = query.number('.scene-length') * 60; + release.actors = query.cnts('.scene-cast-list a'); + + release.poster = query.img('a img'); + + return release; + }); +} + +async function scrapeRelease({ query, html }, url, channel, type = 'scene') { const release = {}; release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1]; - release.title = query.cnt('.scene-page .description'); + release.title = query.cnt('.scene-page .description, .video-page .description'); release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/); - release.duration = query.number('.release-date:last-child') * 60; release.actors = query.all('.video-performer').map((el) => { const avatar = qu.query.img(el, 'img', 'data-bgsrc'); @@ -48,8 +65,21 @@ async function scrapeScene({ query, html }, url) { }; }); - release.tags = query.cnts('.tags a'); - release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"'); + release.tags = query.cnts('.tags a, .categories a'); + release.studio = slugify(query.cnt('.studio span:last-child'), ''); + + if (type === 'scene') { + release.director = query.text('.director'); + release.duration = query.number('.release-date:last-child') * 60; + release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"'); + } + + if (type === 'movie') { + release.director = query.cnt('.director a'); + release.covers = query.imgs('.carousel-item > img'); + + release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel); + } release.photos = query.imgs('#dv_frames a > img').map(photo => [ photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`), @@ -70,7 +100,6 @@ async function scrapeScene({ query, html }, url) { } } - // console.log(release); return release; } @@ -116,7 +145,20 @@ async function fetchScene(url, channel) { }); if (res.ok) { - return scrapeScene(res.item, url, channel); + return scrapeRelease(res.item, url, channel); + } + + return res.status; +} + +async function fetchMovie(url, channel) { + const res = await qu.get(url, null, null, { + // invalid certificate + rejectUnauthorized: false, + }); + + if (res.ok) { + return scrapeRelease(res.item, url, channel, 'movie'); } return res.status; @@ -139,4 +181,5 @@ module.exports = { fetchLatest, fetchScene, fetchMovies, + fetchMovie, }; diff --git a/src/store-releases.js b/src/store-releases.js index 854dd945..a9c1671b 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -215,7 +215,7 @@ async function updateReleasesSearch(releaseIds) { } } -async function storeReleases(releases) { +async function storeScenes(releases) { if (releases.length === 0) { return []; } @@ -256,9 +256,10 @@ async function storeReleases(releases) { } async function storeMovies(movies) { + const { uniqueReleases } = await filterDuplicateReleases(movies); const [batchId] = await knex('batches').insert({ comment: null }).returning('id'); - const curatedMovieEntries = movies.map(release => curateReleaseEntry(release, batchId, null, 'movie')); + const curatedMovieEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId, null, 'movie')); const storedMovies = await knex.batchInsert('movies', curatedMovieEntries).returning('*'); const moviesWithId = attachReleaseIds(movies, storedMovies); @@ -269,7 +270,7 @@ async function storeMovies(movies) { } module.exports = { - storeReleases, + storeScenes, storeMovies, updateReleasesSearch, };