diff --git a/assets/components/releases/release.vue b/assets/components/releases/release.vue index f5ad458e..a04dd9ed 100644 --- a/assets/components/releases/release.vue +++ b/assets/components/releases/release.vue @@ -98,7 +98,7 @@

{{ release.title }}

-
+
+ +
+ +
-
+

Scenes

-
-
-

Movie

- - -
-
curateRelease(scene)); + if (release.movies) curatedRelease.movies = release.movies.map(({ movie }) => curateRelease(movie)); if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media); if (release.covers) curatedRelease.covers = release.covers.map(({ media }) => media); if (release.trailer) curatedRelease.trailer = release.trailer.media; if (release.teaser) curatedRelease.teaser = release.teaser.media; if (release.actors) curatedRelease.actors = release.actors.map(({ actor }) => curateActor(actor, curatedRelease)); + if (release.movieActors && release.movieActors.length > 0) curatedRelease.actors = release.movieActors.map(({ actor }) => curateActor(actor, curatedRelease)); return curatedRelease; } diff --git a/assets/js/fragments.js b/assets/js/fragments.js index 555194c7..0862281e 100644 --- a/assets/js/fragments.js +++ b/assets/js/fragments.js @@ -30,9 +30,7 @@ const sitesFragment = ` } `; -const releaseActorsFragment = ` - actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) { - actor { +const actorFields = ` id name slug @@ -49,6 +47,12 @@ const releaseActorsFragment = ` thumbnail } } +`; + +const releaseActorsFragment = ` + actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) { + actor { + ${actorFields} } } `; @@ -186,6 +190,35 @@ const releaseFragment = ` ${releaseTrailerFragment} ${releaseTeaserFragment} ${siteFragment} + movieActors: movieActorsByMovieId(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) { + actor { + ${actorFields} + } + } + movies: releasesMoviesBySceneId { + movie { + id + title + date + slug + createdAt + url + ${releaseCoversFragment} + ${siteFragment} + actors: movieActorsByMovieId { + actor { + id + name + slug + } + } + } + } + scenes: releasesMoviesByMovieId { + scene { + ${releaseFields} + } + } studio { id name diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index f33e1f9d..ddafae7b 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -362,10 +362,6 @@ exports.up = knex => Promise.resolve() table.integer('duration') .unsigned(); - table.integer('parent_id', 16) - .references('id') - .inTable('releases'); - table.boolean('deep'); table.string('deep_url', 1000); @@ -392,6 +388,25 @@ exports.up = knex => Promise.resolve() .inTable('actors'); table.unique(['release_id', 'actor_id']); + + table.datetime('created_at') + .defaultTo(knex.fn.now()); + })) + .then(() => knex.schema.createTable('releases_movies', (table) => { + table.integer('movie_id', 16) + .notNullable() + .references('id') + .inTable('releases'); + + table.integer('scene_id', 16) + .notNullable() + .references('id') + .inTable('releases'); + + table.unique(['movie_id', 'scene_id']); + + table.datetime('created_at') + .defaultTo(knex.fn.now()); })) .then(() => knex.schema.createTable('releases_directors', (table) => { table.integer('release_id', 16) @@ -526,6 +541,14 @@ exports.up = knex => Promise.resolve() SELECT NOT EXISTS(SELECT true FROM batches WHERE batches.id = release.created_batch_id + 1 LIMIT 1); $$ LANGUAGE sql STABLE; + CREATE VIEW movie_actors AS + SELECT releases_movies.movie_id, releases_actors.actor_id FROM releases_movies + LEFT JOIN releases ON releases.id = releases_movies.scene_id + LEFT JOIN releases_actors ON releases_actors.release_id = releases.id + GROUP BY movie_id, actor_id; + + COMMENT ON VIEW movie_actors IS E'@foreignKey (movie_id) references releases (id)\n@foreignKey (actor_id) references actors (id)'; + COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many'; COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many'; `)); @@ -534,9 +557,10 @@ exports.down = knex => knex.raw(` DROP FUNCTION IF EXISTS releases_by_tag_slugs; DROP FUNCTION IF EXISTS search_sites; - DROP VIEW IF EXISTS releases_actors_view; + DROP VIEW IF EXISTS movie_actors; DROP TABLE IF EXISTS releases_actors CASCADE; + DROP TABLE IF EXISTS releases_movies CASCADE; DROP TABLE IF EXISTS releases_directors CASCADE; DROP TABLE IF EXISTS releases_posters CASCADE; DROP TABLE IF EXISTS releases_photos CASCADE; diff --git a/src/app.js b/src/app.js index 82b5bf9c..31b754eb 100644 --- a/src/app.js +++ b/src/app.js @@ -5,7 +5,7 @@ const knex = require('./knex'); const initServer = require('./web/server'); const scrapeSites = require('./scrape-sites'); -const { scrapeReleases, deepFetchReleases } = require('./scrape-releases'); +const { scrapeScenes, scrapeMovies, deepFetchReleases } = require('./scrape-releases'); const { storeReleases } = require('./releases'); const { scrapeActors, scrapeBasicActors } = require('./actors'); @@ -15,11 +15,11 @@ if (process.env.NODE_ENV === 'development') { async function init() { if (argv.scene) { - await scrapeReleases(argv.scene, null, 'scene'); + await scrapeScenes(argv.scene); } if (argv.movie) { - await scrapeReleases(argv.movie, null, 'movie'); + await scrapeMovies(argv.movie); } if (argv.scrape || argv.networks || argv.sites) { diff --git a/src/argv.js b/src/argv.js index 294450ac..94578fb1 100644 --- a/src/argv.js +++ b/src/argv.js @@ -29,12 +29,17 @@ const { argv } = yargs type: 'array', alias: 'actor', }) - .option('with-releases', { - describe: 'Fetch all releases for an actor', + .option('with-scenes', { + describe: 'Fetch all scenes for an actor or movie', type: 'boolean', - alias: 'with-scenes', + alias: 'with-releases', default: false, }) + .option('with-movies', { + describe: 'Fetch movies for scenes', + type: 'boolean', + default: true, + }) .option('with-profiles', { describe: 'Scrape profiles for new actors after fetching scenes', type: 'boolean', @@ -44,12 +49,12 @@ const { argv } = yargs .option('scene', { describe: 'Scrape scene info from URL', type: 'array', - alias: 'release', + alias: 'scenes', }) .option('movie', { describe: 'Scrape movie info from URL', type: 'array', - alias: 'dvd', + alias: 'movies', }) .option('sources', { describe: 'Use these scrapers for actor data', @@ -121,11 +126,13 @@ const { argv } = yargs describe: 'Include release posters', type: 'boolean', default: true, + alias: 'poster', }) .option('covers', { describe: 'Include release covers', type: 'boolean', default: true, + alias: 'cover', }) .option('photos', { describe: 'Include release photos', @@ -136,11 +143,13 @@ const { argv } = yargs describe: 'Include release trailers', type: 'boolean', default: true, + alias: 'trailer', }) .option('teasers', { describe: 'Include release teasers', type: 'boolean', default: true, + alias: 'teaser', }) .option('avatars', { describe: 'Include actor avatars', diff --git a/src/releases.js b/src/releases.js index 93234be3..27636035 100644 --- a/src/releases.js +++ b/src/releases.js @@ -214,7 +214,6 @@ async function curateReleaseEntry(release, batchId, existingRelease) { studio_id: release.studio ? release.studio.id : null, shoot_id: release.shootId || null, entry_id: release.entryId || null, - parent_id: release.parentId, type: release.type, url: release.url, title: release.title, @@ -327,21 +326,6 @@ function accumulateActors(releases) { }, {}); } -function accumulateMovies(releases) { - return releases.reduce((acc, release) => { - if (release.movie) { - if (acc[release.movie]) { - acc[release.movie] = acc[release.movie].concat(release.id); - return acc; - } - - acc[release.movie] = [release.id]; - } - - return acc; - }, {}); -} - async function storeReleaseAssets(releases) { if (!argv.media) { return; @@ -501,7 +485,6 @@ async function storeReleases(releases) { logger.info(`Stored ${storedReleases.length} new releases`); const actors = accumulateActors(storedReleases); - const movies = accumulateMovies(storedReleases); await associateActors(actors, storedReleases); @@ -518,7 +501,6 @@ async function storeReleases(releases) { return { releases: storedReleases, actors, - movies, }; } diff --git a/src/scrape-releases.js b/src/scrape-releases.js index 2c377e18..82de59bb 100644 --- a/src/scrape-releases.js +++ b/src/scrape-releases.js @@ -5,6 +5,8 @@ const Promise = require('bluebird'); const logger = require('./logger')(__filename); const argv = require('./argv'); +const include = require('./utils/argv-include')(argv); +const knex = require('./knex'); const scrapers = require('./scrapers/scrapers'); const { findSiteByUrl } = require('./sites'); const { findNetworkByUrl } = require('./networks'); @@ -33,7 +35,7 @@ async function findSite(url, release) { return null; } -async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) { +async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) { // profile scraper may return either URLs or pre-scraped scenes const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined; const url = sourceIsUrlOrEmpty ? source : source?.url; @@ -72,8 +74,8 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli } const scrapedRelease = type === 'scene' - ? await scraper.fetchScene(url, site, release, preflight) - : await scraper.fetchMovie(url, site, release, preflight); + ? await scraper.fetchScene(url, site, release, beforeFetchLatest, include) + : await scraper.fetchMovie(url, site, release, beforeFetchLatest, include); return { ...release, @@ -85,8 +87,42 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli }; } -async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) { - const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), { +async function accumulateMovies(releases) { + if (!argv.withMovies) return []; + + const moviesByUrl = releases.reduce((acc, release) => { + if (!release.movie) return acc; + const movie = release.movie.url ? release.movie : { url: release.movie }; + + if (!acc[movie.url]) { + acc[movie.url] = { + ...movie, + type: 'movie', + sceneIds: [], + }; + } + + acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id); + + return acc; + }, {}); + + const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie')); + const { releases: storedMovies } = await storeReleases(movies); + + const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({ + movie_id: movie.id, + scene_id: sceneId, + }))), []); + + await knex('releases_movies').insert(movieAssociations); + + // console.log(moviesByUrl); + return movies; +} + +async function scrapeReleases(sources, type = 'scene') { + const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), { concurrency: 5, }).filter(Boolean); @@ -97,26 +133,26 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight } if (argv.save) { - /* - const movie = scrapedRelease.movie - ? await scrapeRelease(scrapedRelease.movie, null, false, 'movie') - : null; - - if (movie) { - const { releases: [storedMovie] } = await storeReleases([movie]); - curatedRelease.parentId = storedMovie.id; - } - */ - const { releases: storedReleases } = await storeReleases(curatedReleases); - const movieScenes = storedReleases.map(movie => movie.scenes).flat(); - // console.log(movieScenes); + await accumulateMovies(storedReleases); if (storedReleases) { logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join('')); } + + return storedReleases; } + + return curatedReleases; +} + +async function scrapeScenes(sources) { + return scrapeReleases(sources, 'scene'); +} + +async function scrapeMovies(sources) { + return scrapeReleases(sources, 'movie'); } async function deepFetchReleases(baseReleases, beforeFetchLatest) { @@ -151,13 +187,13 @@ async function deepFetchReleases(baseReleases, beforeFetchLatest) { concurrency: 2, }); - // console.log(deepReleases); - return deepReleases; } module.exports = { deepFetchReleases, + scrapeMovies, scrapeRelease, scrapeReleases, + scrapeScenes, }; diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 3a55e1b8..e980219e 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -4,6 +4,7 @@ const Promise = require('bluebird'); const moment = require('moment'); const argv = require('./argv'); +const include = require('./utils/argv-include')(argv); const logger = require('./logger')(__filename); const knex = require('./knex'); const { fetchIncludedSites } = require('./sites'); @@ -42,7 +43,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel return []; } - const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases); + const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases, include); if (!Array.isArray(latestReleases)) { logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`); @@ -89,7 +90,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) { if (argv.upcoming && scraper.fetchUpcoming) { - const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest); + const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest, include); return upcomingReleases ? upcomingReleases.map(release => ({ ...release, site, upcoming: true })) diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 2799123f..1e092dc3 100644 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -135,10 +135,10 @@ function getEntryId(html) { } function scrapeAll(scenes, site) { - return scenes.map(({ qu }) => { + return scenes.map(({ el, qu }) => { const release = {}; - release.entryId = qu.el.dataset.setid || qu.q('.rating_box')?.dataset.id; + release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id; release.url = qu.url('.update_title, .dvd_info > a, a ~ a'); release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true); @@ -160,7 +160,7 @@ function scrapeAll(scenes, site) { } : null; }).filter(Boolean); - const teaserScript = qu.content('script'); + const teaserScript = qu.html('script'); if (teaserScript) { const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); if (src) release.teaser = { src }; @@ -220,17 +220,19 @@ function scrapeUpcoming(html, site) { }); } -async function scrapeScene({ qu }, url, site) { +async function scrapeScene({ html, qu }, url, site, include) { const release = { url, site }; - release.entryId = getEntryId(qu.html); + release.entryId = getEntryId(html); release.title = qu.q('.title_bar_hilite', true); release.description = qu.q('.update_description', true); release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML'); - release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true); - const posterPath = qu.html.match(/useimage = "(.*)"/)?.[1]; + release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true); + release.tags = qu.all('.update_tags a', true); + + const posterPath = html.match(/useimage = "(.*)"/)?.[1]; if (posterPath) { const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`; @@ -243,8 +245,8 @@ async function scrapeScene({ qu }, url, site) { } } - if (site.slug !== 'manuelferrara') { - const trailerLines = qu.html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line)); + if (include.trailer && site.slug !== 'manuelferrara') { + const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line)); if (trailerLines.length) { release.trailer = trailerLines.map((trailerLine) => { @@ -259,8 +261,7 @@ async function scrapeScene({ qu }, url, site) { } } - release.photos = await getPhotos(release.entryId, site); - release.tags = qu.all('.update_tags a', true); + if (include.photos) release.photos = await getPhotos(release.entryId, site); if (qu.exists('.update_dvds a')) { release.movie = { @@ -275,27 +276,27 @@ async function scrapeScene({ qu }, url, site) { return release; } -function scrapeMovie({ el, q, qus }, url, site) { +function scrapeMovie({ el, qu }, url, site) { const movie = { url, site }; - movie.entryId = q('.dvd_details_overview .rating_box').dataset.id; - movie.title = q('.title_bar span', true); - movie.covers = qus('#dvd-cover-flip > a'); - movie.channel = q('.update_date a', true); + movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id; + movie.title = qu.q('.title_bar span', true); + movie.covers = qu.urls('#dvd-cover-flip > a'); + movie.channel = qu.q('.update_date a', true); // movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href); - const sceneQs = ctxa(el, '.dvd_details'); - const scenes = scrapeAll(sceneQs, site); + const sceneQus = ctxa(el, '.dvd_details'); + const scenes = scrapeAll(sceneQus, site); const curatedScenes = scenes - .map(scene => ({ ...scene, movie })) + ?.map(scene => ({ ...scene, movie })) .sort((sceneA, sceneB) => sceneA.date - sceneB.date); - movie.date = curatedScenes[0].date; + movie.date = curatedScenes?.[0].date; return { ...movie, - scenes: curatedScenes, + ...(curatedScenes && { scenes: curatedScenes }), }; } @@ -358,10 +359,10 @@ async function fetchUpcoming(site) { return res.statusCode; } -async function fetchScene(url, site) { +async function fetchScene(url, site, baseRelease, preflight, include) { const res = await get(url); - return res.ok ? scrapeScene(res.item, url, site) : res.status; + return res.ok ? scrapeScene(res.item, url, site, include) : res.status; } async function fetchMovie(url, site) { diff --git a/src/utils/qu.js b/src/utils/qu.js index d9563092..afb75f09 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -70,7 +70,7 @@ function exists(context, selector) { return !!q(context, selector); } -function content(context, selector) { +function html(context, selector) { const el = q(context, selector, null, true); return el && el.innerHTML; @@ -176,8 +176,8 @@ const legacyFuncs = { qall: all, qd: date, qdate: date, - qh: content, - qhtml: content, + qh: html, + qhtml: html, qi: image, qimage: image, qimages: images, @@ -207,8 +207,7 @@ const legacyFuncs = { const quFuncs = { all, - body: content, - content, + html, date, dur: duration, duration, @@ -217,7 +216,6 @@ const quFuncs = { images, img: image, imgs: images, - inner: content, length: duration, meta, poster,