From b8e7029cef2f8f1c2bfc073bfb9d03b92d3ea1b6 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 9 Jul 2024 02:19:23 +0200 Subject: [PATCH] Using base poster as fallback for deep poster in merge. Improved Naughty America scraper for live scenes. --- src/deep.js | 7 +++++++ src/scrapers/naughtyamerica.js | 14 ++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/deep.js b/src/deep.js index 6eb2f772..49466739 100755 --- a/src/deep.js +++ b/src/deep.js @@ -191,11 +191,18 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { }), }), {}); + curatedScrapedRelease.poster = null; + const mergedRelease = { ...merge(baseRelease, curatedScrapedRelease, { dedupeStringsInArrayValues: true, hardMergeKeys: ['actors', 'covers', 'poster', 'trailer', 'teaser'], + ignoreKeys: ['poster'], }), + poster: Array.from(new Set([ + ...[].concat(curatedScrapedRelease.poster), + ...[].concat(baseRelease.poster), + ])).filter(Boolean), photos: curatedScrapedRelease.photos?.length > 0 ? curatedScrapedRelease.photos : baseRelease.photos, diff --git a/src/scrapers/naughtyamerica.js b/src/scrapers/naughtyamerica.js index f5d2448e..a0c1e876 100755 --- a/src/scrapers/naughtyamerica.js +++ b/src/scrapers/naughtyamerica.js @@ -25,9 +25,9 @@ function scrapeLatest(scenes, channel) { release.poster = [ ...(query.sourceSet('source[data-srcset*="scenes/"][type="image/jpeg"]', 'data-srcset') || []), - query.img('.main-scene-img', { attribute: 'data-srcset' }), + query.img('.main-scene-img', { attribute: 'srcset' }), query.img('.scene-thumb'), - ]; + ].filter(Boolean); release.tags = query.contents('.flag-bg'); @@ -44,16 +44,17 @@ function scrapeLatest(scenes, channel) { function scrapeScene({ query }, { url }) { const release = {}; + release.entryId = new URL(url).pathname.match(/-(\d+)$/)?.[1]; - release.title = query.content('.scene-title, .grey-title'); + release.title = query.content('.breadcrumb-item.active') || query.content('.scene-title, .grey-title'); // main title has performer name instead of scene title in live scenes release.description = query.text('.synopsis, .scene-description'); release.date = query.date('.entry-date, .released-date', ['MMM D, YYYY', 'MM/DD/YY']); release.duration = query.duration('.duration'); - release.actors = query.exists('.performer-list') - ? query.all('.performer-list a, .grey-performers a').map((actorEl) => ({ + release.actors = query.exists('.performer-list') || query.exists('.scene-info a[href*="/pornstar"].scene-title') // title links to performer in live scenes + ? query.all('.performer-list a, .grey-performers a, .scene-info a[href*="/pornstar"].scene-title').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })) @@ -64,7 +65,8 @@ function scrapeScene({ query }, { url }) { ...(query.sourceSet('.scenepage-video source[srcset*="scenes/"][type="image/jpeg"]', 'srcset') || []), query.img('.play-trailer img[data-srcset*="scenes/"]', { attribute: 'data-srcset' }), query.img('.scenepage-video .playcard'), - ]; + query.img('.scene-page .start-card'), + ].filter(Boolean); release.photos = query.els('.contain-scene-images.desktop-only .scene-image').map((imgEl) => [ unprint.query.url(imgEl, null),