diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index c6090e27..ba0d8bfb 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -1,44 +1,38 @@ 'use strict'; const util = require('util'); -const Promise = require('bluebird'); const unprint = require('unprint'); const argv = require('../argv'); const { heightToCm } = require('../utils/convert'); const slugify = require('../utils/slugify'); +const tryUrls = require('../utils/try-urls'); -function getEntryId(html) { - const entryId = html.match(/showtagform\((\d+)\)/); - - if (entryId) { - return entryId[1]; +function getEntryId(url) { + if (!url) { + return null; } - const setIdIndex = html.indexOf('setid:"'); + const entryId = new URL(url).pathname.split('/').at(-1).match('(.*?)_vids.html'); - if (setIdIndex) { - return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)?.[0]; + if (entryId) { + return slugify(entryId[1]); } return null; } -function getEntryIdFromTitle(release) { - // return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page - // return slugify(release.title); - return slugify([release.title, ...(release.actors?.map((actor) => actor.name || actor).toSorted() || [])]); -} - -function scrapeAll(scenes, site, entryIdFromTitle) { - return scenes.map(({ element, query }) => { +function scrapeAll(scenes, site) { + return scenes.map(({ query }) => { const release = {}; - const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text') + const title = query.content('.jj-card-title, .content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text') || query.content('a[title*=" "]'); release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim(); - release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]'); - release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']); + release.url = query.url('.jj-card-thumb, .content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]'); + release.date = query.date('.jj-card-date, .update_date', ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']); + + release.entryId = getEntryId(release.url); release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), @@ -70,17 +64,16 @@ function scrapeAll(scenes, site, entryIdFromTitle) { return null; }).filter(Boolean); - const teaserScript = query.html('script'); + release.teaser = query.video('.jj-card-video', { attribute: 'data-src' }); - if (teaserScript) { - release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); + if (!release.teaser) { + const teaserScript = query.html('script'); + + if (teaserScript) { + release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); + } } - release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release)) - || element.dataset.setid - || query.element('.rating_box')?.dataset.id - || query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1]; - return release; }); } @@ -91,7 +84,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = : `${site.url}/trial/categories/movies_${page}_d.html`; // const res = await http.get(url); - const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' }); + const res = await unprint.get(url, { selectAll: '.scenes-listing-grid .jj-content-card' }); if (res.ok) { return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle); @@ -100,7 +93,8 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = return res.status; } -function scrapeUpcoming(scenes, channel) { +/* disable until we have entryId solution +function scrapeUpcoming(scenes, _channel) { return scenes.map(({ query, html }) => { const release = {}; @@ -135,6 +129,7 @@ async function fetchUpcoming(site) { return res.status; } +*/ function extractLegacyTrailer(html, context) { const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line)); @@ -194,17 +189,19 @@ function getPhotos(query, release, context) { async function scrapeScene({ html, query }, context) { const release = {}; - release.title = query.content('.title_bar_hilite, .movie_title'); - release.description = query.content('.update_description') || query.text('//div[./span[contains(text(), "Description")]]'); + release.entryId = getEntryId(context.url); - release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']); + release.title = query.content('.scene-title, .title_bar_hilite, .movie_title'); + release.description = query.content('.scene-desc, .update_description') || query.text('//div[./span[contains(text(), "Description")]]'); - release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({ + release.date = query.date(['.meta-item:nth-child(2) .val, .update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']); + + release.actors = query.all('.meta-item .update_models a, .backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); - release.tags = query.contents('.update_tags a, .player-scene-description a[href*="/categories"]'); + release.tags = query.contents('.scene-cats a, .update_tags a, .player-scene-description a[href*="/categories"]'); release.director = release.tags?.find((tag) => ['mike john', 'van styles'].includes(tag?.trim().toLowerCase())); const posterPath = query.poster('#video-player', { forceGetAttribute: true }) // without getAttribute, missing poster is returned as page URL @@ -245,7 +242,7 @@ async function scrapeScene({ html, query }, context) { // #images img selects a list of images that is present on every page; the JJ website removes the ones that failed to load with JS (lol) release.photos = [ ...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [], - ...query.imgs('#images img'), + ...query.imgs('.tp-photos-strip img, #images img'), ...query.imgs('img.update_thumb', { attribute: 'src0_1x' }), ].filter(Boolean).map((source) => Array.from(new Set([ source.replace(/.jpg$/, '-full.jpg'), @@ -270,9 +267,7 @@ async function scrapeScene({ html, query }, context) { release.stars = query.number('.avg_rating'); - release.entryId = context.entity.parameters?.entryIdFromTitle - ? getEntryIdFromTitle(release) - : getEntryId(html); + release.qualities = query.contents('.res-item .res-lbl').map((resolution) => Number(resolution.match(/\d+$/)?.[0])).filter(Boolean); return release; } @@ -296,7 +291,7 @@ function scrapeMovie({ query }, { url }) { scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY'); scene.actors = unprint.query.contents(sceneEl, '.update_models a'); - scene.entryId = getEntryIdFromTitle(scene); + scene.entryId = getEntryId(scene.url); return scene; }); @@ -339,48 +334,34 @@ function scrapeProfile({ query }, url, entity) { verifyType: 'image', })); - profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true); + profile.scenes = scrapeAll(unprint.initAll(query.all('.mbp-scenes-grid .jj-content-card, .grid-item')), entity, true); return profile; } -async function fetchProfile({ name: actorName, url }, entity) { +async function fetchProfile({ name: actorName, url: actorUrl }, entity) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName, '-'); const urls = [ - url, + actorUrl, `${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`, `${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`, ]; - return urls.reduce(async (chain, profileUrl) => { - const profile = await chain; + const { res, url } = await tryUrls(urls); - if (profile) { - return profile; - } + if (res.ok) { + return scrapeProfile(res.context, url, entity); + } - if (!profileUrl) { - return null; - } - - const res = await unprint.get(profileUrl, { - followRedirects: false, - }); - - if (res.ok) { - return scrapeProfile(res.context, profileUrl, entity); - } - - return null; - }, Promise.resolve()); + return res.status; } module.exports = { fetchLatest, fetchProfile, - fetchUpcoming, + // fetchUpcoming, scrapeScene, scrapeMovie, };