Changed q get and geta APIs to include status, refactored scrapers. Showing front- and back-cover on movie tiles and release page (fix). Removed icons from main navigation. Returning scenes from Jules Jordan movie scraper.

This commit is contained in:
2020-03-08 04:23:10 +01:00
parent b45bb0cfbc
commit acad99bdfe
15 changed files with 222 additions and 119 deletions

View File

@@ -8,6 +8,7 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
const logger = require('../logger')(__filename);
const { get, geta, ctxa } = require('../utils/q');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@@ -117,41 +118,33 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
return getPhotosLegacy(entryId, site, 'highres', 1);
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.update_details').toArray();
return scenesElements.map((element) => {
function scrapeAll(scenes, site) {
return scenes.map(({ el, q, qa, qh, qu, qd, qi, qis }) => {
const release = {};
const sceneLinkElement = $(element).find('a[title], .update_title a');
release.url = sceneLinkElement.attr('href');
release.title = sceneLinkElement.text()?.trim() || sceneLinkElement.attr('alt')?.trim();
release.entryId = el.dataset.setid || q('.rating_box')?.dataset.id;
release.entryId = $(element).attr('data-setid');
release.url = qu('.update_title, .dvd_info > a, a ~ a');
release.title = q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qd('.update_date', 'MM/DD/YYYY');
release.date = moment
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
.toDate();
release.actors = qa('.update_models a', true);
release.actors = $(element).find('.update_models a')
.map((actorIndex, actorElement) => $(actorElement).text())
.toArray();
const dvdPhotos = qis('.dvd_preview_thumb');
const photoCount = Number(q('a img.thumbs', 'cnt')) || 1;
const photoElement = $(element).find('a img.thumbs');
const photoCount = Number(photoElement.attr('cnt')) || 1;
[release.poster, ...release.photos] = Array.from({ length: photoCount }, (value, index) => {
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`) || photoElement.attr('src');
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qi('a img.thumbs', `src${index}_1x`) || qi('a img.thumbs', `src${index}`) || qi('a img.thumbs');
if (!src) return null;
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
return {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
};
}).filter(photoUrl => photoUrl);
const teaserScript = $(element).find('script').html();
const teaserScript = qh('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
@@ -294,18 +287,28 @@ async function scrapeScene(html, url, site) {
return release;
}
function scrapeMovie(html, url, site) {
const { document } = new JSDOM(html).window;
function scrapeMovie({ el, q, qus }, url, site) {
const movie = { url, site };
movie.entryId = document.querySelector('.dvd_details_overview .rating_box').dataset.id;
movie.title = document.querySelector('.title_bar span').textContent;
movie.covers = Array.from(document.querySelectorAll('#dvd-cover-flip > a'), el => el.href);
movie.channel = document.querySelector('.update_date a').textContent;
movie.date = new Date();
movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
movie.entryId = q('.dvd_details_overview .rating_box').dataset.id;
movie.title = q('.title_bar span', true);
movie.covers = qus('#dvd-cover-flip > a');
movie.channel = q('.update_date a', true);
return movie;
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQs = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQs, site);
const curatedScenes = scenes
.map(scene => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = curatedScenes[0].date;
return {
...movie,
scenes: curatedScenes,
};
}
function scrapeProfile(html, url, actorName) {
@@ -344,14 +347,14 @@ function scrapeProfile(html, url, actorName) {
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.latest ? util.format(site.parameters.latest, page) : `${site.url}/trial/categories/movies_${page}_d.html`;
const res = await bhttp.get(url);
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
return res.statusCode;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
@@ -374,9 +377,9 @@ async function fetchScene(url, site) {
}
async function fetchMovie(url, site) {
const res = await bhttp.get(url);
const res = await get(url);
return scrapeMovie(res.body.toString(), url, site);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {