Compare commits

...

2 Commits

Author SHA1 Message Date
DebaucheryLibrarian 20f82c4006 1.227.12 2023-06-15 19:53:44 +02:00
DebaucheryLibrarian 128f9950ec Prefer HTML over data titles for capitalization in Bang scraper. 2023-06-15 19:53:42 +02:00
3 changed files with 12 additions and 11 deletions

4
package-lock.json generated
View File

@ -1,12 +1,12 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.227.11", "version": "1.227.12",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "traxxx", "name": "traxxx",
"version": "1.227.11", "version": "1.227.12",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@casl/ability": "^5.2.2", "@casl/ability": "^5.2.2",

View File

@ -1,6 +1,6 @@
{ {
"name": "traxxx", "name": "traxxx",
"version": "1.227.11", "version": "1.227.12",
"description": "All the latest porn releases in one place", "description": "All the latest porn releases in one place",
"main": "src/app.js", "main": "src/app.js",
"scripts": { "scripts": {

View File

@ -95,8 +95,9 @@ async function scrapeScene({ query }, { url, entity }) {
release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]); release.entryId = data?.['@id'] || decodeId(new URL(url).pathname.match(/\/video\/([\w-]+)\//)?.[1]);
release.title = data?.name || query.content('.video-container + div h1'); // data title is not capitalized, prefer markup
release.description = data?.description || query.content('//div[contains(@class, "actions")]/preceding-sibling::p'); release.title = query.attribute('meta[property="og:title"]', 'content') || query.content('.video-container + div h1') || data?.name;
release.description = data?.description || query.attribute('meta[property="og:description"]', 'content') || query.content('//div[contains(@class, "actions")]/preceding-sibling::p');
release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY'); release.date = unprint.extractDate(data?.datePublished, 'YYYY-MM-DD') || query.date('//p[contains(text(), "Date:")]', 'MMM DD, YYYY');
release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span'); release.duration = unprint.extractTimestamp(data?.duration) || query.duration('//p[contains(text(), "Playtime:")]//span');
@ -117,14 +118,14 @@ async function scrapeScene({ query }, { url, entity }) {
release.tags = query.contents('.actions .genres'); release.tags = query.contents('.actions .genres');
const videoData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' }); const sourcesData = query.json('.video-container [data-videopreview-sources-value]', { attribute: 'data-videopreview-sources-value' });
release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content'); release.poster = data?.thumbnailUrl || query.attribute('meta[property="og:image"]', 'content');
release.teaser = (videoData && [ release.teaser = (sourcesData && [
videoData.mp4_large, sourcesData.mp4_large,
videoData.webm_large, sourcesData.webm_large,
videoData.mp4, sourcesData.mp4,
videoData.webm, sourcesData.webm,
]) ])
|| data?.contentUrl || data?.contentUrl
|| query.attribute('meta[property="og:video"]') || query.attribute('meta[property="og:video"]')