+
Scenes
-
-
-
Movie
-
-
-
-
curateRelease(scene));
+ if (release.movies) curatedRelease.movies = release.movies.map(({ movie }) => curateRelease(movie));
if (release.photos) curatedRelease.photos = release.photos.map(({ media }) => media);
if (release.covers) curatedRelease.covers = release.covers.map(({ media }) => media);
if (release.trailer) curatedRelease.trailer = release.trailer.media;
if (release.teaser) curatedRelease.teaser = release.teaser.media;
if (release.actors) curatedRelease.actors = release.actors.map(({ actor }) => curateActor(actor, curatedRelease));
+ if (release.movieActors && release.movieActors.length > 0) curatedRelease.actors = release.movieActors.map(({ actor }) => curateActor(actor, curatedRelease));
return curatedRelease;
}
diff --git a/assets/js/fragments.js b/assets/js/fragments.js
index 555194c7..0862281e 100644
--- a/assets/js/fragments.js
+++ b/assets/js/fragments.js
@@ -30,9 +30,7 @@ const sitesFragment = `
}
`;
-const releaseActorsFragment = `
- actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
- actor {
+const actorFields = `
id
name
slug
@@ -49,6 +47,12 @@ const releaseActorsFragment = `
thumbnail
}
}
+`;
+
+const releaseActorsFragment = `
+ actors: releasesActors(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
+ actor {
+ ${actorFields}
}
}
`;
@@ -186,6 +190,35 @@ const releaseFragment = `
${releaseTrailerFragment}
${releaseTeaserFragment}
${siteFragment}
+ movieActors: movieActorsByMovieId(orderBy: ACTOR_BY_ACTOR_ID__GENDER_ASC) {
+ actor {
+ ${actorFields}
+ }
+ }
+ movies: releasesMoviesBySceneId {
+ movie {
+ id
+ title
+ date
+ slug
+ createdAt
+ url
+ ${releaseCoversFragment}
+ ${siteFragment}
+ actors: movieActorsByMovieId {
+ actor {
+ id
+ name
+ slug
+ }
+ }
+ }
+ }
+ scenes: releasesMoviesByMovieId {
+ scene {
+ ${releaseFields}
+ }
+ }
studio {
id
name
diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js
index f33e1f9d..ddafae7b 100644
--- a/migrations/20190325001339_releases.js
+++ b/migrations/20190325001339_releases.js
@@ -362,10 +362,6 @@ exports.up = knex => Promise.resolve()
table.integer('duration')
.unsigned();
- table.integer('parent_id', 16)
- .references('id')
- .inTable('releases');
-
table.boolean('deep');
table.string('deep_url', 1000);
@@ -392,6 +388,25 @@ exports.up = knex => Promise.resolve()
.inTable('actors');
table.unique(['release_id', 'actor_id']);
+
+ table.datetime('created_at')
+ .defaultTo(knex.fn.now());
+ }))
+ .then(() => knex.schema.createTable('releases_movies', (table) => {
+ table.integer('movie_id', 16)
+ .notNullable()
+ .references('id')
+ .inTable('releases');
+
+ table.integer('scene_id', 16)
+ .notNullable()
+ .references('id')
+ .inTable('releases');
+
+ table.unique(['movie_id', 'scene_id']);
+
+ table.datetime('created_at')
+ .defaultTo(knex.fn.now());
}))
.then(() => knex.schema.createTable('releases_directors', (table) => {
table.integer('release_id', 16)
@@ -526,6 +541,14 @@ exports.up = knex => Promise.resolve()
SELECT NOT EXISTS(SELECT true FROM batches WHERE batches.id = release.created_batch_id + 1 LIMIT 1);
$$ LANGUAGE sql STABLE;
+ CREATE VIEW movie_actors AS
+ SELECT releases_movies.movie_id, releases_actors.actor_id FROM releases_movies
+ LEFT JOIN releases ON releases.id = releases_movies.scene_id
+ LEFT JOIN releases_actors ON releases_actors.release_id = releases.id
+ GROUP BY movie_id, actor_id;
+
+ COMMENT ON VIEW movie_actors IS E'@foreignKey (movie_id) references releases (id)\n@foreignKey (actor_id) references actors (id)';
+
COMMENT ON COLUMN actors.height IS E'@omit read,update,create,delete,all,many';
COMMENT ON COLUMN actors.weight IS E'@omit read,update,create,delete,all,many';
`));
@@ -534,9 +557,10 @@ exports.down = knex => knex.raw(`
DROP FUNCTION IF EXISTS releases_by_tag_slugs;
DROP FUNCTION IF EXISTS search_sites;
- DROP VIEW IF EXISTS releases_actors_view;
+ DROP VIEW IF EXISTS movie_actors;
DROP TABLE IF EXISTS releases_actors CASCADE;
+ DROP TABLE IF EXISTS releases_movies CASCADE;
DROP TABLE IF EXISTS releases_directors CASCADE;
DROP TABLE IF EXISTS releases_posters CASCADE;
DROP TABLE IF EXISTS releases_photos CASCADE;
diff --git a/src/app.js b/src/app.js
index 82b5bf9c..31b754eb 100644
--- a/src/app.js
+++ b/src/app.js
@@ -5,7 +5,7 @@ const knex = require('./knex');
const initServer = require('./web/server');
const scrapeSites = require('./scrape-sites');
-const { scrapeReleases, deepFetchReleases } = require('./scrape-releases');
+const { scrapeScenes, scrapeMovies, deepFetchReleases } = require('./scrape-releases');
const { storeReleases } = require('./releases');
const { scrapeActors, scrapeBasicActors } = require('./actors');
@@ -15,11 +15,11 @@ if (process.env.NODE_ENV === 'development') {
async function init() {
if (argv.scene) {
- await scrapeReleases(argv.scene, null, 'scene');
+ await scrapeScenes(argv.scene);
}
if (argv.movie) {
- await scrapeReleases(argv.movie, null, 'movie');
+ await scrapeMovies(argv.movie);
}
if (argv.scrape || argv.networks || argv.sites) {
diff --git a/src/argv.js b/src/argv.js
index 294450ac..94578fb1 100644
--- a/src/argv.js
+++ b/src/argv.js
@@ -29,12 +29,17 @@ const { argv } = yargs
type: 'array',
alias: 'actor',
})
- .option('with-releases', {
- describe: 'Fetch all releases for an actor',
+ .option('with-scenes', {
+ describe: 'Fetch all scenes for an actor or movie',
type: 'boolean',
- alias: 'with-scenes',
+ alias: 'with-releases',
default: false,
})
+ .option('with-movies', {
+ describe: 'Fetch movies for scenes',
+ type: 'boolean',
+ default: true,
+ })
.option('with-profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
@@ -44,12 +49,12 @@ const { argv } = yargs
.option('scene', {
describe: 'Scrape scene info from URL',
type: 'array',
- alias: 'release',
+ alias: 'scenes',
})
.option('movie', {
describe: 'Scrape movie info from URL',
type: 'array',
- alias: 'dvd',
+ alias: 'movies',
})
.option('sources', {
describe: 'Use these scrapers for actor data',
@@ -121,11 +126,13 @@ const { argv } = yargs
describe: 'Include release posters',
type: 'boolean',
default: true,
+ alias: 'poster',
})
.option('covers', {
describe: 'Include release covers',
type: 'boolean',
default: true,
+ alias: 'cover',
})
.option('photos', {
describe: 'Include release photos',
@@ -136,11 +143,13 @@ const { argv } = yargs
describe: 'Include release trailers',
type: 'boolean',
default: true,
+ alias: 'trailer',
})
.option('teasers', {
describe: 'Include release teasers',
type: 'boolean',
default: true,
+ alias: 'teaser',
})
.option('avatars', {
describe: 'Include actor avatars',
diff --git a/src/releases.js b/src/releases.js
index 93234be3..27636035 100644
--- a/src/releases.js
+++ b/src/releases.js
@@ -214,7 +214,6 @@ async function curateReleaseEntry(release, batchId, existingRelease) {
studio_id: release.studio ? release.studio.id : null,
shoot_id: release.shootId || null,
entry_id: release.entryId || null,
- parent_id: release.parentId,
type: release.type,
url: release.url,
title: release.title,
@@ -327,21 +326,6 @@ function accumulateActors(releases) {
}, {});
}
-function accumulateMovies(releases) {
- return releases.reduce((acc, release) => {
- if (release.movie) {
- if (acc[release.movie]) {
- acc[release.movie] = acc[release.movie].concat(release.id);
- return acc;
- }
-
- acc[release.movie] = [release.id];
- }
-
- return acc;
- }, {});
-}
-
async function storeReleaseAssets(releases) {
if (!argv.media) {
return;
@@ -501,7 +485,6 @@ async function storeReleases(releases) {
logger.info(`Stored ${storedReleases.length} new releases`);
const actors = accumulateActors(storedReleases);
- const movies = accumulateMovies(storedReleases);
await associateActors(actors, storedReleases);
@@ -518,7 +501,6 @@ async function storeReleases(releases) {
return {
releases: storedReleases,
actors,
- movies,
};
}
diff --git a/src/scrape-releases.js b/src/scrape-releases.js
index 2c377e18..82de59bb 100644
--- a/src/scrape-releases.js
+++ b/src/scrape-releases.js
@@ -5,6 +5,8 @@ const Promise = require('bluebird');
const logger = require('./logger')(__filename);
const argv = require('./argv');
+const include = require('./utils/argv-include')(argv);
+const knex = require('./knex');
const scrapers = require('./scrapers/scrapers');
const { findSiteByUrl } = require('./sites');
const { findNetworkByUrl } = require('./networks');
@@ -33,7 +35,7 @@ async function findSite(url, release) {
return null;
}
-async function scrapeRelease(source, basicRelease = null, type = 'scene', preflight) {
+async function scrapeRelease(source, basicRelease = null, type = 'scene', beforeFetchLatest) {
// profile scraper may return either URLs or pre-scraped scenes
const sourceIsUrlOrEmpty = typeof source === 'string' || source === undefined;
const url = sourceIsUrlOrEmpty ? source : source?.url;
@@ -72,8 +74,8 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
}
const scrapedRelease = type === 'scene'
- ? await scraper.fetchScene(url, site, release, preflight)
- : await scraper.fetchMovie(url, site, release, preflight);
+ ? await scraper.fetchScene(url, site, release, beforeFetchLatest, include)
+ : await scraper.fetchMovie(url, site, release, beforeFetchLatest, include);
return {
...release,
@@ -85,8 +87,42 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
};
}
-async function scrapeReleases(sources, release = null, type = 'scene', preflight = null) {
- const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, release, type, preflight), {
+async function accumulateMovies(releases) {
+ if (!argv.withMovies) return [];
+
+ const moviesByUrl = releases.reduce((acc, release) => {
+ if (!release.movie) return acc;
+ const movie = release.movie.url ? release.movie : { url: release.movie };
+
+ if (!acc[movie.url]) {
+ acc[movie.url] = {
+ ...movie,
+ type: 'movie',
+ sceneIds: [],
+ };
+ }
+
+ acc[movie.url].sceneIds = acc[movie.url].sceneIds.concat(release.id);
+
+ return acc;
+ }, {});
+
+ const movies = await Promise.map(Object.values(moviesByUrl), async movie => scrapeRelease(movie, null, 'movie'));
+ const { releases: storedMovies } = await storeReleases(movies);
+
+ const movieAssociations = storedMovies.reduce((acc, movie) => acc.concat(movie.sceneIds.map(sceneId => ({
+ movie_id: movie.id,
+ scene_id: sceneId,
+ }))), []);
+
+ await knex('releases_movies').insert(movieAssociations);
+
+ // console.log(moviesByUrl);
+ return movies;
+}
+
+async function scrapeReleases(sources, type = 'scene') {
+ const scrapedReleases = await Promise.map(sources, async source => scrapeRelease(source, null, type), {
concurrency: 5,
}).filter(Boolean);
@@ -97,26 +133,26 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
}
if (argv.save) {
- /*
- const movie = scrapedRelease.movie
- ? await scrapeRelease(scrapedRelease.movie, null, false, 'movie')
- : null;
-
- if (movie) {
- const { releases: [storedMovie] } = await storeReleases([movie]);
- curatedRelease.parentId = storedMovie.id;
- }
- */
-
const { releases: storedReleases } = await storeReleases(curatedReleases);
- const movieScenes = storedReleases.map(movie => movie.scenes).flat();
- // console.log(movieScenes);
+ await accumulateMovies(storedReleases);
if (storedReleases) {
logger.info(storedReleases.map(storedRelease => `\nhttp://${config.web.host}:${config.web.port}/scene/${storedRelease.id}/${storedRelease.slug}`).join(''));
}
+
+ return storedReleases;
}
+
+ return curatedReleases;
+}
+
+async function scrapeScenes(sources) {
+ return scrapeReleases(sources, 'scene');
+}
+
+async function scrapeMovies(sources) {
+ return scrapeReleases(sources, 'movie');
}
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
@@ -151,13 +187,13 @@ async function deepFetchReleases(baseReleases, beforeFetchLatest) {
concurrency: 2,
});
- // console.log(deepReleases);
-
return deepReleases;
}
module.exports = {
deepFetchReleases,
+ scrapeMovies,
scrapeRelease,
scrapeReleases,
+ scrapeScenes,
};
diff --git a/src/scrape-sites.js b/src/scrape-sites.js
index 3a55e1b8..e980219e 100644
--- a/src/scrape-sites.js
+++ b/src/scrape-sites.js
@@ -4,6 +4,7 @@ const Promise = require('bluebird');
const moment = require('moment');
const argv = require('./argv');
+const include = require('./utils/argv-include')(argv);
const logger = require('./logger')(__filename);
const knex = require('./knex');
const { fetchIncludedSites } = require('./sites');
@@ -42,7 +43,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel
return [];
}
- const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases);
+ const latestReleases = await scraper.fetchLatest(site, page, beforeFetchLatest, accSiteReleases, include);
if (!Array.isArray(latestReleases)) {
logger.warn(`Scraper returned ${latestReleases || 'null'} when fetching latest from '${site.name}' on '${site.network.name}'`);
@@ -89,7 +90,7 @@ async function scrapeUniqueReleases(scraper, site, beforeFetchLatest, accSiteRel
async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
if (argv.upcoming && scraper.fetchUpcoming) {
- const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest);
+ const upcomingReleases = await scraper.fetchUpcoming(site, 1, beforeFetchLatest, include);
return upcomingReleases
? upcomingReleases.map(release => ({ ...release, site, upcoming: true }))
diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js
index 2799123f..1e092dc3 100644
--- a/src/scrapers/julesjordan.js
+++ b/src/scrapers/julesjordan.js
@@ -135,10 +135,10 @@ function getEntryId(html) {
}
function scrapeAll(scenes, site) {
- return scenes.map(({ qu }) => {
+ return scenes.map(({ el, qu }) => {
const release = {};
- release.entryId = qu.el.dataset.setid || qu.q('.rating_box')?.dataset.id;
+ release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
@@ -160,7 +160,7 @@ function scrapeAll(scenes, site) {
} : null;
}).filter(Boolean);
- const teaserScript = qu.content('script');
+ const teaserScript = qu.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
@@ -220,17 +220,19 @@ function scrapeUpcoming(html, site) {
});
}
-async function scrapeScene({ qu }, url, site) {
+async function scrapeScene({ html, qu }, url, site, include) {
const release = { url, site };
- release.entryId = getEntryId(qu.html);
+ release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
- release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
- const posterPath = qu.html.match(/useimage = "(.*)"/)?.[1];
+ release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
+ release.tags = qu.all('.update_tags a', true);
+
+ const posterPath = html.match(/useimage = "(.*)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
@@ -243,8 +245,8 @@ async function scrapeScene({ qu }, url, site) {
}
}
- if (site.slug !== 'manuelferrara') {
- const trailerLines = qu.html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
+ if (include.trailer && site.slug !== 'manuelferrara') {
+ const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
if (trailerLines.length) {
release.trailer = trailerLines.map((trailerLine) => {
@@ -259,8 +261,7 @@ async function scrapeScene({ qu }, url, site) {
}
}
- release.photos = await getPhotos(release.entryId, site);
- release.tags = qu.all('.update_tags a', true);
+ if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (qu.exists('.update_dvds a')) {
release.movie = {
@@ -275,27 +276,27 @@ async function scrapeScene({ qu }, url, site) {
return release;
}
-function scrapeMovie({ el, q, qus }, url, site) {
+function scrapeMovie({ el, qu }, url, site) {
const movie = { url, site };
- movie.entryId = q('.dvd_details_overview .rating_box').dataset.id;
- movie.title = q('.title_bar span', true);
- movie.covers = qus('#dvd-cover-flip > a');
- movie.channel = q('.update_date a', true);
+ movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
+ movie.title = qu.q('.title_bar span', true);
+ movie.covers = qu.urls('#dvd-cover-flip > a');
+ movie.channel = qu.q('.update_date a', true);
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
- const sceneQs = ctxa(el, '.dvd_details');
- const scenes = scrapeAll(sceneQs, site);
+ const sceneQus = ctxa(el, '.dvd_details');
+ const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes
- .map(scene => ({ ...scene, movie }))
+ ?.map(scene => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
- movie.date = curatedScenes[0].date;
+ movie.date = curatedScenes?.[0].date;
return {
...movie,
- scenes: curatedScenes,
+ ...(curatedScenes && { scenes: curatedScenes }),
};
}
@@ -358,10 +359,10 @@ async function fetchUpcoming(site) {
return res.statusCode;
}
-async function fetchScene(url, site) {
+async function fetchScene(url, site, baseRelease, preflight, include) {
const res = await get(url);
- return res.ok ? scrapeScene(res.item, url, site) : res.status;
+ return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
}
async function fetchMovie(url, site) {
diff --git a/src/utils/qu.js b/src/utils/qu.js
index d9563092..afb75f09 100644
--- a/src/utils/qu.js
+++ b/src/utils/qu.js
@@ -70,7 +70,7 @@ function exists(context, selector) {
return !!q(context, selector);
}
-function content(context, selector) {
+function html(context, selector) {
const el = q(context, selector, null, true);
return el && el.innerHTML;
@@ -176,8 +176,8 @@ const legacyFuncs = {
qall: all,
qd: date,
qdate: date,
- qh: content,
- qhtml: content,
+ qh: html,
+ qhtml: html,
qi: image,
qimage: image,
qimages: images,
@@ -207,8 +207,7 @@ const legacyFuncs = {
const quFuncs = {
all,
- body: content,
- content,
+ html,
date,
dur: duration,
duration,
@@ -217,7 +216,6 @@ const quFuncs = {
images,
img: image,
imgs: images,
- inner: content,
length: duration,
meta,
poster,