When poster is not available during scraping, first photo is stored as poster; removed client-side fallback. Added screencap album fallback to Jules Jordan scraper. Simplified JJ page traversal.
This commit is contained in:
parent
5b5d383363
commit
fed2b0be8a
|
@ -22,7 +22,7 @@
|
||||||
<video
|
<video
|
||||||
v-if="release.trailer"
|
v-if="release.trailer"
|
||||||
:src="`/media/${release.trailer.path}`"
|
:src="`/media/${release.trailer.path}`"
|
||||||
:poster="`/media/${(release.poster && release.poster.thumbnail) || (release.photos.length && release.photos[Math.floor(Math.random() * release.photos.length)].path)}`"
|
:poster="`/media/${(release.poster && release.poster.thumbnail)}`"
|
||||||
:alt="release.title"
|
:alt="release.title"
|
||||||
class="item trailer-video"
|
class="item trailer-video"
|
||||||
controls
|
controls
|
||||||
|
@ -47,21 +47,16 @@
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
function photos() {
|
function photos() {
|
||||||
if (this.release.photos.length) {
|
if (this.release.trailer) {
|
||||||
const set = this.release.photos.sort(({ index: indexA }, { index: indexB }) => indexA - indexB);
|
// poster will be on trailer video
|
||||||
|
return this.release.photos;
|
||||||
if (this.release.trailer) {
|
|
||||||
return set;
|
|
||||||
}
|
|
||||||
|
|
||||||
return [this.release.poster].concat(set);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.release.poster && !this.release.trailer) {
|
if (this.release.poster) {
|
||||||
return [this.release.poster];
|
return [this.release.poster].concat(this.release.photos);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return this.release.photos;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrollBanner(event) {
|
function scrollBanner(event) {
|
||||||
|
|
|
@ -56,13 +56,6 @@
|
||||||
class="thumbnail"
|
class="thumbnail"
|
||||||
>
|
>
|
||||||
|
|
||||||
<img
|
|
||||||
v-else-if="release.photos.length > 0"
|
|
||||||
:src="`/media/${release.photos[0].thumbnail}`"
|
|
||||||
:alt="release.title"
|
|
||||||
class="thumbnail"
|
|
||||||
>
|
|
||||||
|
|
||||||
<div
|
<div
|
||||||
v-else
|
v-else
|
||||||
:title="release.title"
|
:title="release.title"
|
||||||
|
|
|
@ -101,7 +101,6 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
|
||||||
|
|
||||||
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||||
if (Array.isArray(photoUrl)) {
|
if (Array.isArray(photoUrl)) {
|
||||||
// return fetchPhoto(photoUrl[0], index, identifier);
|
|
||||||
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
||||||
const photo = await fetchPhoto(url, index, identifier);
|
const photo = await fetchPhoto(url, index, identifier);
|
||||||
|
|
||||||
|
@ -156,7 +155,7 @@ async function savePhotos(files, {
|
||||||
const thumbnail = await createThumbnail(file.photo);
|
const thumbnail = await createThumbnail(file.photo);
|
||||||
|
|
||||||
const filename = naming === 'index'
|
const filename = naming === 'index'
|
||||||
? `${file.role || role}-${index}`
|
? `${file.role || role}-${index + 1}`
|
||||||
: `${timestamp + index}`;
|
: `${timestamp + index}`;
|
||||||
|
|
||||||
const filepath = path.join(domain, subpath, `${filename}.${file.extension}`);
|
const filepath = path.join(domain, subpath, `${filename}.${file.extension}`);
|
||||||
|
@ -214,6 +213,8 @@ async function storePhotos(photos, {
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (primaryRole && !primaryPhoto) {
|
if (primaryRole && !primaryPhoto) {
|
||||||
|
console.log(`Setting first photo as ${primaryRole} for ${identifier}`);
|
||||||
|
|
||||||
uniquePhotos[0].role = primaryRole;
|
uniquePhotos[0].role = primaryRole;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ async function curateRelease(release) {
|
||||||
target_id: release.id,
|
target_id: release.id,
|
||||||
domain: 'releases',
|
domain: 'releases',
|
||||||
})
|
})
|
||||||
.orderBy('role'),
|
.orderBy(['role', 'index']),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -255,6 +255,7 @@ async function storeReleaseAssets(release, releaseId) {
|
||||||
storePhotos(release.photos, {
|
storePhotos(release.photos, {
|
||||||
targetId: releaseId,
|
targetId: releaseId,
|
||||||
subpath,
|
subpath,
|
||||||
|
primaryRole: release.poster ? null : 'poster',
|
||||||
}, identifier),
|
}, identifier),
|
||||||
release.poster && storePhotos([release.poster], {
|
release.poster && storePhotos([release.poster], {
|
||||||
role: 'poster',
|
role: 'poster',
|
||||||
|
|
|
@ -32,7 +32,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
||||||
const latestReleases = await scraper.fetchLatest(site, page);
|
const latestReleases = await scraper.fetchLatest(site, page);
|
||||||
|
|
||||||
if (latestReleases.length === 0) {
|
if (latestReleases.length === 0) {
|
||||||
return [];
|
return accReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||||
|
@ -44,8 +44,6 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
||||||
|
|
||||||
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
|
console.log(`\x1b[90m${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases\x1b[0m`);
|
||||||
|
|
||||||
console.log(oldestReleaseOnPage, afterDate, moment(oldestReleaseOnPage).isAfter(afterDate));
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
uniqueReleases.length > 0
|
uniqueReleases.length > 0
|
||||||
&& (oldestReleaseOnPage || page < argv.pages)
|
&& (oldestReleaseOnPage || page < argv.pages)
|
||||||
|
|
|
@ -14,7 +14,7 @@ async function fetchPhotos(url) {
|
||||||
return res.body.toString();
|
return res.body.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapePhotos(html) {
|
function scrapePhotos(html, type) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||||
|
@ -23,6 +23,13 @@ function scrapePhotos(html) {
|
||||||
const src = $(photoElement).attr('src');
|
const src = $(photoElement).attr('src');
|
||||||
|
|
||||||
// high res often available in alternative directories, but not always, provide original as fallback
|
// high res often available in alternative directories, but not always, provide original as fallback
|
||||||
|
if (type === 'caps') {
|
||||||
|
return [
|
||||||
|
src.replace('capthumbs/', 'caps/'),
|
||||||
|
src,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
src.replace('thumbs/', 'photos/'),
|
src.replace('thumbs/', 'photos/'),
|
||||||
src.replace('thumbs/', '1024watermarked/'),
|
src.replace('thumbs/', '1024watermarked/'),
|
||||||
|
@ -33,28 +40,35 @@ function scrapePhotos(html) {
|
||||||
return photos;
|
return photos;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getPhotos(entryId, site, page = 1) {
|
async function getPhotos(entryId, site, page = 1, type = 'highres') {
|
||||||
const albumUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${page}`;
|
const albumUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||||
|
|
||||||
const html = await fetchPhotos(albumUrl);
|
const html = await fetchPhotos(albumUrl);
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
const photos = scrapePhotos(html);
|
// don't add first URL to pages to prevent unnecessary duplicate request
|
||||||
const pagesString = $('.page_totals').text().trim();
|
const photos = scrapePhotos(html, type);
|
||||||
const pages = pagesString.length > 0 ? Number($('.page_totals').text().trim().match(/\d+$/)[0]) : null;
|
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
|
||||||
|
|
||||||
const otherPhotos = pages
|
const otherPhotos = pages
|
||||||
? await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
|
? await Promise.map(pages, async (pageX) => {
|
||||||
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
|
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
|
||||||
const pageHtml = await fetchPhotos(pageUrl);
|
const pageHtml = await fetchPhotos(pageUrl);
|
||||||
|
|
||||||
return scrapePhotos(pageHtml);
|
return scrapePhotos(pageHtml, type);
|
||||||
}, {
|
}, {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
})
|
})
|
||||||
: [];
|
: [];
|
||||||
|
|
||||||
return photos.concat(otherPhotos.flat());
|
const allPhotos = photos.concat(otherPhotos.flat());
|
||||||
|
|
||||||
|
if (allPhotos.length === 0 && type === 'highres') {
|
||||||
|
// photos not available, try for screencaps instead
|
||||||
|
return getPhotos(entryId, site, 1, 'caps');
|
||||||
|
}
|
||||||
|
|
||||||
|
return allPhotos;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
function scrapeLatest(html, site) {
|
||||||
|
@ -153,55 +167,42 @@ function scrapeUpcoming(html, site) {
|
||||||
async function scrapeScene(html, url, site) {
|
async function scrapeScene(html, url, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
const title = $('.title_bar_hilite').text().trim();
|
const release = { url, site };
|
||||||
const entryId = $('.suggest_tags a').attr('href').match(/\d+/)[0];
|
|
||||||
const date = moment
|
release.title = $('.title_bar_hilite').text().trim();
|
||||||
|
[release.entryId] = $('.suggest_tags a').attr('href').match(/\d+/);
|
||||||
|
release.date = moment
|
||||||
.utc($('.update_date').text(), 'MM/DD/YYYY')
|
.utc($('.update_date').text(), 'MM/DD/YYYY')
|
||||||
.toDate();
|
.toDate();
|
||||||
|
|
||||||
const actors = $('.backgroundcolor_info > .update_models a')
|
release.description = $('.update_description').text().trim();
|
||||||
|
|
||||||
|
release.actors = $('.backgroundcolor_info > .update_models a')
|
||||||
.map((_actorIndex, actorElement) => $(actorElement).text())
|
.map((_actorIndex, actorElement) => $(actorElement).text())
|
||||||
.toArray();
|
.toArray();
|
||||||
|
|
||||||
const description = $('.update_description').text().trim();
|
|
||||||
|
|
||||||
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
|
||||||
|
|
||||||
const infoLines = $('script:contains("useimage")')
|
const infoLines = $('script:contains("useimage")')
|
||||||
.html()
|
.html()
|
||||||
.split('\n');
|
.split('\n');
|
||||||
|
|
||||||
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
|
||||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
if (posterPath) release.poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||||
|
|
||||||
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
||||||
const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
|
|
||||||
|
|
||||||
const photos = await getPhotos(entryId, site);
|
release.trailer = {
|
||||||
|
src: trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie')),
|
||||||
const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
quality: 720,
|
||||||
const movie = $('.update_dvds a').attr('href');
|
|
||||||
|
|
||||||
return {
|
|
||||||
url,
|
|
||||||
entryId,
|
|
||||||
title,
|
|
||||||
date,
|
|
||||||
actors,
|
|
||||||
description,
|
|
||||||
poster,
|
|
||||||
photos,
|
|
||||||
movie,
|
|
||||||
trailer: {
|
|
||||||
src: trailer,
|
|
||||||
quality: 720,
|
|
||||||
},
|
|
||||||
rating: {
|
|
||||||
stars,
|
|
||||||
},
|
|
||||||
tags,
|
|
||||||
site,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
release.photos = await getPhotos(release.entryId, site);
|
||||||
|
|
||||||
|
release.tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
|
release.movie = $('.update_dvds a').attr('href');
|
||||||
|
|
||||||
|
release.stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
|
||||||
|
|
||||||
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeMovie(html, url, site) {
|
function scrapeMovie(html, url, site) {
|
||||||
|
|
Loading…
Reference in New Issue