From b93a5715cb10c647768519329fbeb41512f992db Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 1 Dec 2021 00:00:24 +0100 Subject: [PATCH] Updated Jules Jordan scraper for new update page layout. --- src/scrapers/julesjordan.js | 43 ++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index a1132bd6..3790eccc 100644 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -136,14 +136,18 @@ function getEntryId(html) { function scrapeAll(scenes, site, entryIdFromTitle) { return scenes.map(({ el, query }) => { const release = {}; + const title = query.cnt('.content_img div, .dvd_info > a, a ~ a'); - release.url = query.url('.update_title a, .dvd_info > a, a ~ a'); - release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true); + release.title = title.slice(0, title.match(/starring:/i)?.index || Infinity).trim(); + release.url = query.url('.content_img a, .dvd_info > a, a ~ a'); release.date = query.date('.update_date', 'MM/DD/YYYY'); release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id; - release.actors = query.all('.update_models a', true); + release.actors = query.all('.content_img .update_models a').map((actorEl) => ({ + name: query.cnt(actorEl), + url: query.url(actorEl, null), + })); const dvdPhotos = query.imgs('.dvd_preview_thumb'); const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1; @@ -183,9 +187,9 @@ function scrapeAll(scenes, site, entryIdFromTitle) { }).filter(Boolean); const teaserScript = query.html('script'); + if (teaserScript) { - const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); - if (src) release.teaser = { src }; + release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); } return release; @@ -235,17 +239,21 @@ function scrapeUpcoming(html, site) { }); } -async function scrapeScene({ html, query }, url, site, include) { - const release = { url, site }; +async function scrapeScene({ html, query }, url, site, options) { + const release = {}; release.entryId = getEntryId(html); - release.title = query.q('.title_bar_hilite', true); - release.description = query.q('.update_description', true); + release.title = query.cnt('.title_bar_hilite'); + release.description = query.cnt('.update_description'); release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML'); - release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true); - release.tags = query.all('.update_tags a', true); + release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a').map((actorEl) => ({ + name: query.cnt(actorEl), + url: query.url(actorEl, null), + })); + + release.tags = query.cnts('.update_tags a'); const posterPath = html.match(/useimage = "(.*)"/)?.[1]; @@ -260,7 +268,7 @@ async function scrapeScene({ html, query }, url, site, include) { } } - if (include.trailer && site.slug !== 'manuelferrara') { + if (options.includeTrailers && site.slug !== 'manuelferrara') { const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line)); if (trailerLines.length) { @@ -277,19 +285,20 @@ async function scrapeScene({ html, query }, url, site, include) { } } - if (include.photos) release.photos = await getPhotos(release.entryId, site); + if (options.includePhotos) { + release.photos = await getPhotos(release.entryId, site); + } if (query.exists('.update_dvds a')) { release.movie = { url: query.url('.update_dvds a'), - title: query.q('.update_dvds a', true), + title: query.cnt('.update_dvds a'), }; release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', ''); } - const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, '')); - if (stars) release.stars = stars; + release.stars = query.number('.avg_rating'); return release; } @@ -298,7 +307,7 @@ function scrapeMovie({ el, query }, url, site) { const movie = { url, site }; movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', ''); - movie.title = query.q('.title_bar span', true); + movie.title = query.cnt('.title_bar span'); movie.covers = query.urls('#dvd-cover-flip > a'); movie.channel = slugify(query.q('.update_date a', true), '');