Fixed Whale Member posters and photos.
This commit is contained in:
parent
b6691e1991
commit
b1b7cd6d50
|
@ -1,5 +1,7 @@
|
|||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
|
||||
const argv = require('./argv');
|
||||
const initServer = require('./web/server');
|
||||
|
||||
|
@ -39,6 +41,11 @@ async function init() {
|
|||
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
|
||||
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(util.inspect(deepScenes));
|
||||
console.log(util.inspect(deepMovies));
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
await storeReleases([
|
||||
...(deepScenes || []),
|
||||
|
|
14
src/deep.js
14
src/deep.js
|
@ -14,7 +14,8 @@ function urlToSiteSlug(url) {
|
|||
try {
|
||||
const slug = new URL(url)
|
||||
.hostname
|
||||
.match(/([\w-]+)\.\w+$/)?.[1];
|
||||
.match(/([\w-]+)\.\w+$/)?.[1]
|
||||
.replace(/[-_]+/g, '');
|
||||
|
||||
return slug;
|
||||
} catch (error) {
|
||||
|
@ -90,7 +91,6 @@ function toBaseReleases(baseReleasesOrUrls) {
|
|||
|
||||
async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
||||
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
|
||||
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
|
||||
|
||||
if (!site) {
|
||||
logger.warn(`No site available for ${baseRelease.url}`);
|
||||
|
@ -104,6 +104,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
|||
};
|
||||
}
|
||||
|
||||
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
|
||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||
|
||||
if (!scraper) {
|
||||
|
@ -131,7 +132,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
|||
};
|
||||
|
||||
if (!mergedRelease.entryId) {
|
||||
throw new Error('No entry ID supplied');
|
||||
throw Object.assign(new Error('No entry ID supplied'), { code: 'NO_ENTRY_ID' });
|
||||
}
|
||||
|
||||
if (scrapedRelease && baseRelease?.tags) {
|
||||
|
@ -142,6 +143,11 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
|
|||
return mergedRelease;
|
||||
} catch (error) {
|
||||
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
|
||||
|
||||
if (error.code === 'NO_ENTRY_ID') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return baseRelease;
|
||||
}
|
||||
}
|
||||
|
@ -160,7 +166,7 @@ async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
|
|||
|
||||
const deepReleases = await scrapeReleases(baseReleases, sites, type);
|
||||
|
||||
return deepReleases;
|
||||
return deepReleases.filter(Boolean);
|
||||
}
|
||||
|
||||
async function fetchScenes(baseReleasesOrUrls) {
|
||||
|
|
|
@ -67,7 +67,7 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
|
|||
row_to_json(sites) as site,
|
||||
row_to_json(networks) as network,
|
||||
row_to_json(site_networks) as site_network,
|
||||
json_agg(DISTINCT actors) as actors
|
||||
COALESCE(json_agg(DISTINCT actors) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors
|
||||
`))
|
||||
.where('type', type)
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
|
@ -84,7 +84,7 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
|
|||
queryBuilder
|
||||
.select(knex.raw(`
|
||||
row_to_json(posters) as poster,
|
||||
json_agg(DISTINCT photos) as photos
|
||||
COALESCE(json_agg(DISTINCT photos) FILTER (WHERE photos.id IS NOT NULL), '[]') as photos
|
||||
`))
|
||||
.leftJoin('releases_posters', 'releases_posters.release_id', 'releases.id')
|
||||
.leftJoin('media as posters', 'posters.id', 'releases_posters.media_id')
|
||||
|
|
|
@ -19,8 +19,10 @@ function scrapeLatest(html, site) {
|
|||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
const poster = scene.querySelector('.single-image').dataset.src;
|
||||
release.poster = /^http/.test(poster) ? poster : `https:${poster}`;
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => (/^http/.test(el.dataset.src) ? el.dataset.src : `https:${el.dataset.src}`));
|
||||
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
|
@ -49,13 +51,13 @@ function scrapeScene(html, site, url) {
|
|||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => (/^http/.test(el.src) ? el.src : `https:${el.src}`));
|
||||
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
if (posterEl) release.poster = /^http/.test(posterEl.src) ? posterEl.src : `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = /^http/.test(videoEl.poster) ? videoEl.poster : `https:${videoEl.poster}`;
|
||||
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
|
|
Loading…
Reference in New Issue