From 1163b010fb513023bda9c96f983638382d414698 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 17 Oct 2024 23:50:55 +0200 Subject: [PATCH] Added actor page deep scrape for the few Sperm Mania scenes not on the homepage. --- src/deep.js | 2 +- src/scrapers/snowvalley.js | 38 +++++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/deep.js b/src/deep.js index e1d75b17..6dd7ae02 100755 --- a/src/deep.js +++ b/src/deep.js @@ -185,7 +185,7 @@ async function scrapeRelease(baseRelease, entitiesByHostname, type = 'scene') { if (!scrapedRelease || typeof scrapedRelease !== 'object' || Array.isArray(scrapedRelease)) { // scraper is unable to fetch the releases and returned a HTTP code or null - throw new Error(`Scraper returned '${scrapedRelease}' when fetching latest from '${entity.name}' (${entity.parent?.name})`); + throw new Error(`Scraper returned '${scrapedRelease}' when deep fetching (${entity.name}, ${entity.parent?.name}) ${baseRelease.url || baseRelease.path}`); } // object-merge-advance will use null as explicit false on hard merged keys, even when null as explicit falls is disabled diff --git a/src/scrapers/snowvalley.js b/src/scrapers/snowvalley.js index 1158c6ea..a6743056 100755 --- a/src/scrapers/snowvalley.js +++ b/src/scrapers/snowvalley.js @@ -15,8 +15,18 @@ const tagsMap = { 'pussy bukkake': ['cum-on-pussy'], }; -function entryIdFromMedia(release) { - return [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1].toLowerCase(); +function entryIdFromMedia(release, toLowercase = true) { + const originalEntryId = [release.poster, release.trailer, ...(release.photos || [])].flat().filter(Boolean)[0]?.match(/(?:(?:preview)|(?:samples)|(?:tour))\/(.*)\//)?.[1]; + + if (!originalEntryId) { + return null; + } + + if (toLowercase) { + return originalEntryId.toLowerCase(); + } + + return originalEntryId; } function scrapeAll(scenes, tilesByEntryId, channel) { @@ -63,6 +73,9 @@ function scrapeAll(scenes, tilesByEntryId, channel) { release[key] = value; } }); + } else { + // most tiles are on the front page, but not all, deep scrape actor's page + release.path = release.actors[0]?.url; } return release; @@ -94,9 +107,6 @@ function scrapeAllTiles(tiles, channel) { const release = {}; const sceneString = query.content(); - const originalEntryId = query.attribute('.scene-hover', 'data-path'); - release.entryId = originalEntryId?.toLowerCase(); - release.title = query.content('.scene-title'); release.date = query.date('.scene-date, .sDate', 'YYYY-MM-DD'); @@ -106,7 +116,7 @@ function scrapeAllTiles(tiles, channel) { name: unprint.query.content(actorEl), url: channel.slug === 'fellatiojapan' ? `${channel.url}/en/girl/${unprint.query.url(actorEl, null)}` - : unprint.query.element(actorEl, null, { origin: channel.url }), + : unprint.query.url(actorEl, null, { origin: channel.url }), })); release.tags = [...query.contents('.data a[href*="/tag"]'), ...(tagsMap[query.content('.scene-type')?.toLowerCase()] || [])].filter(Boolean); @@ -124,6 +134,10 @@ function scrapeAllTiles(tiles, channel) { ]; } + const originalEntryId = query.attribute('.scene-hover', 'data-path') || entryIdFromMedia(release, false); + + release.entryId = originalEntryId?.toLowerCase(); + release.teaser = originalEntryId && `https://img.${channel.slug}.com/preview/${originalEntryId}/hover.mp4`; release.photoCount = Number(sceneString.match(/(\d+) photos/)?.[1]) || null; @@ -699,20 +713,21 @@ function scrapeSceneCospuri({ query }, { url, entity }) { return release; } -// Fellatio Japan -async function fetchSceneFellatio(url, channel, baseRelease) { +// Sperm Mania, Fellatio Japan +async function fetchScene(url, channel, baseRelease) { if (!baseRelease.entryId || !baseRelease.path) { return null; } // no dedicated scene page, but there are dates on actor page; use that as 'deep' scrape // can't use front page like on Sperm Mania because dates are missing - const res = await unprint.get(baseRelease.path, { selectAll: '.scene-obj' }); + const res = await unprint.get(baseRelease.path, { selectAll: '.scene, .scene-obj' }); if (res.ok) { const tiles = scrapeAllTiles(res.context, channel); + const sceneTile = tiles.find((tile) => tile.entryId === baseRelease.entryId) || null; - return tiles.find((tile) => tile.entryId === baseRelease.entryId) || null; + return sceneTile; } return res.status; @@ -830,6 +845,7 @@ async function fetchProfile({ slug, url: actorUrl }, { entity, parameters }) { module.exports = { fetchLatest, fetchProfile, + fetchScene, cospuri: { fetchLatest: fetchLatestCospuri, scrapeScene: scrapeSceneCospuri, @@ -837,7 +853,7 @@ module.exports = { }, fellatio: { fetchLatest: fetchLatestFellatio, - fetchScene: fetchSceneFellatio, + fetchScene, fetchProfile, }, handjob: {