Prefer HTML over data titles for capitalization in Bang scraper.

This commit is contained in:
DebaucheryLibrarian 2023-06-15 19:53:42 +02:00
parent c2c329e00a
commit 128f9950ec
1 changed files with 9 additions and 8 deletions

View File

@ -95,8 +95,9 @@ async function scrapeScene({ query }, { url, entity }) {
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = data?.name || query.content('.video-container + div h1');
release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
// data title is not capitalized, prefer markup
release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name;
release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
@ -117,14 +118,14 @@ async function scrapeScene({ query }, { url, entity }) {
release.tags = query.contents('.actions .genres');
const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
release.teaser = (videoData && [
videoData.mp4_large,
videoData.webm_large,
videoData.mp4,
videoData.webm,
release.teaser = (sourcesData && [
sourcesData.mp4_large,
sourcesData.webm_large,
sourcesData.mp4,
sourcesData.webm,
])
|| data?.contentUrl
|| query.attribute('meta[property="og:video"]')