From 0864154a0e446ce50413bd6fb8ab3e0613cdde5e Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 28 Oct 2021 01:59:53 +0200 Subject: [PATCH] Added unextracted property to keep paginating when extracting scenes. --- seeds/02_sites.js | 10 ++++++++++ src/scrapers/mindgeek.js | 8 ++++++-- src/updates.js | 12 +++++++----- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/seeds/02_sites.js b/seeds/02_sites.js index cecd6d2a1..128cc17ec 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -1531,6 +1531,16 @@ const sites = [ url: 'https://www.bradmontana.com', }, // BRAZZERS + { + slug: 'brazzers', + name: 'Brazzers', + url: 'https://www.brazzers.com/videos', + parent: 'brazzers', + parameters: { + extract: true, + scene: 'https://www.brazzers.com/video', + }, + }, { slug: 'momsincontrol', name: 'Moms in Control', diff --git a/src/scrapers/mindgeek.js b/src/scrapers/mindgeek.js index f9da5af63..dd0dc3b85 100644 --- a/src/scrapers/mindgeek.js +++ b/src/scrapers/mindgeek.js @@ -95,8 +95,12 @@ function scrapeLatestX(data, site, filterChannel) { async function scrapeLatest(items, site, filterChannel) { const latestReleases = items.map(data => scrapeLatestX(data, site, filterChannel)); + const extractedScenes = latestReleases.filter(Boolean); - return latestReleases.filter(Boolean); + return { + scenes: extractedScenes, + unextracted: latestReleases.length - extractedScenes.length, + }; } function scrapeScene(data, url, _site, networkName) { @@ -240,7 +244,7 @@ async function fetchLatest(site, page = 1, options) { const { session, instanceToken } = options.beforeNetwork || await getSession(site, options.parameters); const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD'); - const limit = 10; + const limit = 24; const apiUrl = site.parameters?.native || site.parameters?.extract ? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene` : `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`; diff --git a/src/updates.js b/src/updates.js index 36db6fbff..04dcb3a34 100644 --- a/src/updates.js +++ b/src/updates.js @@ -66,10 +66,10 @@ async function filterUniqueReleases(releases) { return { uniqueReleases, duplicateReleases }; } -function needNextPage(pageReleases, accReleases, isUpcoming) { +function needNextPage(pageReleases, accReleases, isUpcoming, unextracted = 0) { const { localUniqueReleases: uniquePageReleases } = filterLocalUniqueReleases(pageReleases, accReleases); - if (uniquePageReleases.length === 0) { + if (uniquePageReleases.length + unextracted === 0) { // page is empty, or only contains scenes from previous page return false; } @@ -78,7 +78,7 @@ function needNextPage(pageReleases, accReleases, isUpcoming) { return uniquePageReleases.length > 0 && argv.paginateUpcoming; } - if (uniquePageReleases.length > 0) { + if (uniquePageReleases.length + unextracted > 0) { if (argv.last) { return accReleases.length + pageReleases.length < argv.last; } @@ -111,10 +111,12 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { parameters: getRecursiveParameters(entity), }; - const pageReleases = isUpcoming + const rawPageReleases = isUpcoming ? await scraper.fetchUpcoming(entity, page, options, preData) : await scraper.fetchLatest(entity, page, options, preData); + const pageReleases = rawPageReleases.scenes || rawPageReleases; + if (!Array.isArray(pageReleases)) { // scraper is unable to fetch the releases and returned a HTTP code or null logger.warn(`Scraper returned ${pageReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`); @@ -128,7 +130,7 @@ async function scrapeReleases(scraper, entity, preData, isUpcoming) { logger.warn(`Found ${pageReleases.length - validPageReleases.length} empty or unidentified releases on page ${page} for '${entity.name}'`); } - if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming)) { + if (needNextPage(pageReleasesWithEntity, accReleases, isUpcoming, rawPageReleases.unextracted)) { return scrapeReleasesPage(page + 1, accReleases.concat(pageReleasesWithEntity), isUpcoming); }