forked from DebaucheryLibrarian/traxxx
Prefer HTML over data titles for capitalization in Bang scraper.
This commit is contained in:
parent
c2c329e00a
commit
128f9950ec
|
@ -95,8 +95,9 @@ async function scrapeScene({ query }, { url, entity }) {
|
||||||
|
|
||||||
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
|
||||||
|
|
||||||
release.title = data?.name || query.content('.video-container + div h1');
|
// data title is not capitalized, prefer markup
|
||||||
release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
|
release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name;
|
||||||
|
release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
|
||||||
|
|
||||||
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
|
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
|
||||||
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
|
||||||
|
@ -117,14 +118,14 @@ async function scrapeScene({ query }, { url, entity }) {
|
||||||
|
|
||||||
release.tags = query.contents('.actions .genres');
|
release.tags = query.contents('.actions .genres');
|
||||||
|
|
||||||
const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
|
const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
|
||||||
|
|
||||||
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
|
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
|
||||||
release.teaser = (videoData && [
|
release.teaser = (sourcesData && [
|
||||||
videoData.mp4_large,
|
sourcesData.mp4_large,
|
||||||
videoData.webm_large,
|
sourcesData.webm_large,
|
||||||
videoData.mp4,
|
sourcesData.mp4,
|
||||||
videoData.webm,
|
sourcesData.webm,
|
||||||
])
|
])
|
||||||
|| data?.contentUrl
|
|| data?.contentUrl
|
||||||
|| query.attribute('meta[property="og:video"]')
|
|| query.attribute('meta[property="og:video"]')
|
||||||
|
|
Loading…
Reference in New Issue