Added Manuel Ferrara site to Jules Jordan. Refactored Jules Jordan photo scraper for better compatability and quality.
This commit is contained in:
@@ -7,6 +7,10 @@ const slugify = require('../utils/slugify');
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
@@ -53,8 +57,8 @@ function scrapeScene(scene, site) {
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
|
||||
release.poster = `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${poster.screenId}.jpg`;
|
||||
release.photos = photos.map(photo => `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${photo.screenId}.jpg`);
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
// const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
@@ -32,6 +32,8 @@ function scrapePhotos(html, type) {
|
||||
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1600watermarked/'),
|
||||
src.replace('thumbs/', '1280watermarked/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
@@ -40,8 +42,10 @@ function scrapePhotos(html, type) {
|
||||
return photos;
|
||||
}
|
||||
|
||||
async function getPhotos(entryId, site, page = 1, type = 'highres') {
|
||||
const albumUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
console.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
|
||||
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
@@ -65,12 +69,48 @@ async function getPhotos(entryId, site, page = 1, type = 'highres') {
|
||||
|
||||
if (allPhotos.length === 0 && type === 'highres') {
|
||||
// photos not available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 1, 'caps');
|
||||
return getPhotosLegacy(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
return allPhotos;
|
||||
}
|
||||
|
||||
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
const sources = sourceLines.reduce((acc, sourceLine) => {
|
||||
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
|
||||
const source = sourceLine.slice(sourceLine.indexOf('/trial'), sourceLine.indexOf('.jpg') + 4);
|
||||
|
||||
if (!source) return acc;
|
||||
if (!acc[quality]) acc[quality] = [];
|
||||
|
||||
acc[quality].push(`${site.url}${source}`);
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (type === 'highres') {
|
||||
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
|
||||
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
|
||||
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
|
||||
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
|
||||
|
||||
// no photos available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
|
||||
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
|
||||
|
||||
// no screencaps available either, try legacy scraper just in case
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.update_details').toArray();
|
||||
@@ -171,9 +211,24 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
release.title = $('.title_bar_hilite').text().trim();
|
||||
[release.entryId] = $('.suggest_tags a').attr('href').match(/\d+/);
|
||||
release.date = moment
|
||||
.utc($('.update_date').text(), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
|
||||
const dateElement = $('.update_date').text().trim();
|
||||
const dateComment = $('*')
|
||||
.contents()
|
||||
.toArray()
|
||||
.find(({ type, data }) => type === 'comment' && data.match('Date OFF'));
|
||||
|
||||
if (dateElement) {
|
||||
release.date = moment
|
||||
.utc($('.update_date').text(), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
}
|
||||
|
||||
if (dateComment) {
|
||||
release.date = moment
|
||||
.utc(dateComment.nodeValue.match(/\d{2}\/\d{2}\/\d{4}/), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
}
|
||||
|
||||
release.description = $('.update_description').text().trim();
|
||||
|
||||
@@ -190,10 +245,12 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
|
||||
|
||||
release.trailer = {
|
||||
src: trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie')),
|
||||
quality: 720,
|
||||
};
|
||||
if (site.slug !== 'manuelferrara') {
|
||||
release.trailer = {
|
||||
src: trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie')),
|
||||
quality: 720,
|
||||
};
|
||||
}
|
||||
|
||||
release.photos = await getPhotos(release.entryId, site);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user