Fixed Whale Member posters and photos.

This commit is contained in:
ThePendulum 2020-05-20 02:23:45 +02:00
parent b6691e1991
commit b1b7cd6d50
4 changed files with 26 additions and 11 deletions

View File

@ -1,5 +1,7 @@
'use strict';
const util = require('util');
const argv = require('./argv');
const initServer = require('./web/server');
@ -39,6 +41,11 @@ async function init() {
const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean);
const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]);
if (argv.inspect) {
console.log(util.inspect(deepScenes));
console.log(util.inspect(deepMovies));
}
if (argv.save) {
await storeReleases([
...(deepScenes || []),

View File

@ -14,7 +14,8 @@ function urlToSiteSlug(url) {
try {
const slug = new URL(url)
.hostname
.match(/([\w-]+)\.\w+$/)?.[1];
.match(/([\w-]+)\.\w+$/)?.[1]
.replace(/[-_]+/g, '');
return slug;
} catch (error) {
@ -90,7 +91,6 @@ function toBaseReleases(baseReleasesOrUrls) {
async function scrapeRelease(baseRelease, sites, type = 'scene') {
const site = baseRelease.site || sites[urlToSiteSlug(baseRelease.url)];
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
if (!site) {
logger.warn(`No site available for ${baseRelease.url}`);
@ -104,6 +104,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
};
}
const siteWithFallbackNetwork = site.isNetwork ? { ...site, network: site } : site; // make site.network available, even when site is network fallback
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
if (!scraper) {
@ -131,7 +132,7 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
};
if (!mergedRelease.entryId) {
throw new Error('No entry ID supplied');
throw Object.assign(new Error('No entry ID supplied'), { code: 'NO_ENTRY_ID' });
}
if (scrapedRelease && baseRelease?.tags) {
@ -142,6 +143,11 @@ async function scrapeRelease(baseRelease, sites, type = 'scene') {
return mergedRelease;
} catch (error) {
logger.error(`Deep scrape failed for ${baseRelease.url}: ${error.message}`);
if (error.code === 'NO_ENTRY_ID') {
return null;
}
return baseRelease;
}
}
@ -160,7 +166,7 @@ async function fetchReleases(baseReleasesOrUrls, type = 'scene') {
const deepReleases = await scrapeReleases(baseReleases, sites, type);
return deepReleases;
return deepReleases.filter(Boolean);
}
async function fetchScenes(baseReleasesOrUrls) {

View File

@ -67,7 +67,7 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
row_to_json(sites) as site,
row_to_json(networks) as network,
row_to_json(site_networks) as site_network,
json_agg(DISTINCT actors) as actors
COALESCE(json_agg(DISTINCT actors) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors
`))
.where('type', type)
.leftJoin('sites', 'sites.id', 'releases.site_id')
@ -84,7 +84,7 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
queryBuilder
.select(knex.raw(`
row_to_json(posters) as poster,
json_agg(DISTINCT photos) as photos
COALESCE(json_agg(DISTINCT photos) FILTER (WHERE photos.id IS NOT NULL), '[]') as photos
`))
.leftJoin('releases_posters', 'releases_posters.release_id', 'releases.id')
.leftJoin('media as posters', 'posters.id', 'releases_posters.media_id')

View File

@ -19,8 +19,10 @@ function scrapeLatest(html, site) {
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
const poster = scene.querySelector('.single-image').dataset.src;
release.poster = /^http/.test(poster) ? poster : `https:${poster}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => (/^http/.test(el.dataset.src) ? el.dataset.src : `https:${el.dataset.src}`));
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
@ -49,13 +51,13 @@ function scrapeScene(html, site, url) {
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
}
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => (/^http/.test(el.src) ? el.src : `https:${el.src}`));
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
if (posterEl) release.poster = /^http/.test(posterEl.src) ? posterEl.src : `https:${posterEl.src}`;
else if (videoEl) release.poster = /^http/.test(videoEl.poster) ? videoEl.poster : `https:${videoEl.poster}`;
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };