forked from DebaucheryLibrarian/traxxx
Changed q get and geta APIs to include status, refactored scrapers. Showing front- and back-cover on movie tiles and release page (fix). Removed icons from main navigation. Returning scenes from Jules Jordan movie scraper.
This commit is contained in:
@@ -8,6 +8,7 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const { get, geta, ctxa } = require('../utils/q');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
@@ -117,41 +118,33 @@ async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.update_details').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ el, q, qa, qh, qu, qd, qi, qis }) => {
|
||||
const release = {};
|
||||
|
||||
const sceneLinkElement = $(element).find('a[title], .update_title a');
|
||||
release.url = sceneLinkElement.attr('href');
|
||||
release.title = sceneLinkElement.text()?.trim() || sceneLinkElement.attr('alt')?.trim();
|
||||
release.entryId = el.dataset.setid || q('.rating_box')?.dataset.id;
|
||||
|
||||
release.entryId = $(element).attr('data-setid');
|
||||
release.url = qu('.update_title, .dvd_info > a, a ~ a');
|
||||
release.title = q('.update_title, .dvd_info > a, a ~ a', true);
|
||||
release.date = qd('.update_date', 'MM/DD/YYYY');
|
||||
|
||||
release.date = moment
|
||||
.utc($(element).find('.update_date').text(), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
release.actors = qa('.update_models a', true);
|
||||
|
||||
release.actors = $(element).find('.update_models a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text())
|
||||
.toArray();
|
||||
const dvdPhotos = qis('.dvd_preview_thumb');
|
||||
const photoCount = Number(q('a img.thumbs', 'cnt')) || 1;
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const photoCount = Number(photoElement.attr('cnt')) || 1;
|
||||
[release.poster, ...release.photos] = Array.from({ length: photoCount }, (value, index) => {
|
||||
const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`) || photoElement.attr('src');
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qi('a img.thumbs', `src${index}_1x`) || qi('a img.thumbs', `src${index}`) || qi('a img.thumbs');
|
||||
|
||||
if (!src) return null;
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
}).filter(Boolean);
|
||||
|
||||
return {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
};
|
||||
}).filter(photoUrl => photoUrl);
|
||||
|
||||
const teaserScript = $(element).find('script').html();
|
||||
const teaserScript = qh('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
@@ -294,18 +287,28 @@ async function scrapeScene(html, url, site) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeMovie(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
function scrapeMovie({ el, q, qus }, url, site) {
|
||||
const movie = { url, site };
|
||||
|
||||
movie.entryId = document.querySelector('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = document.querySelector('.title_bar span').textContent;
|
||||
movie.covers = Array.from(document.querySelectorAll('#dvd-cover-flip > a'), el => el.href);
|
||||
movie.channel = document.querySelector('.update_date a').textContent;
|
||||
movie.date = new Date();
|
||||
movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
movie.entryId = q('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = q('.title_bar span', true);
|
||||
movie.covers = qus('#dvd-cover-flip > a');
|
||||
movie.channel = q('.update_date a', true);
|
||||
|
||||
return movie;
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQs = ctxa(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQs, site);
|
||||
|
||||
const curatedScenes = scenes
|
||||
.map(scene => ({ ...scene, movie }))
|
||||
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
||||
|
||||
movie.date = curatedScenes[0].date;
|
||||
|
||||
return {
|
||||
...movie,
|
||||
scenes: curatedScenes,
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
@@ -344,14 +347,14 @@ function scrapeProfile(html, url, actorName) {
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.latest ? util.format(site.parameters.latest, page) : `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
|
||||
return res.statusCode;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
@@ -374,9 +377,9 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchMovie(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return scrapeMovie(res.body.toString(), url, site);
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
|
||||
Reference in New Issue
Block a user