From 128f9950ec9dfcd191c87b1e2c5467eb173ef219 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 15 Jun 2023 19:53:42 +0200 Subject: [PATCH] Prefer HTML over data titles for capitalization in Bang scraper. --- src/scrapers/bang.js | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/scrapers/bang.js b/src/scrapers/bang.js index 2248e685..e8d43708 100755 --- a/src/scrapers/bang.js +++ b/src/scrapers/bang.js @@ -95,8 +95,9 @@ async function scrapeScene({ query }, { url, entity }) { release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); - release.title = data?.name || query.content('.video-container + div h1'); - release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p'); + // data title is not capitalized, prefer markup + release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name; + release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p'); release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); @@ -117,14 +118,14 @@ async function scrapeScene({ query }, { url, entity }) { release.tags = query.contents('.actions .genres'); - const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' }); + const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' }); release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content'); - release.teaser = (videoData && [ - videoData.mp4_large, - videoData.webm_large, - videoData.mp4, - videoData.webm, + release.teaser = (sourcesData && [ + sourcesData.mp4_large, + sourcesData.webm_large, + sourcesData.mp4, + sourcesData.webm, ]) || data?.contentUrl || query.attribute('meta[property="og:video"]')