diff --git a/src/media.js b/src/media.js index e510a623..a8c82af0 100644 --- a/src/media.js +++ b/src/media.js @@ -93,7 +93,7 @@ async function extractItem(source) { return null; } -async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) { +async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) { if (!source) return null; try { @@ -106,7 +106,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att // fallbacks provided return source.reduce( - (outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)), + (outcome, sourceX, sourceIndexX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource, sourceIndexX)), Promise.reject(new Error()), ); } @@ -164,7 +164,11 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att if (attempt < 3) { await Promise.delay(5000); - return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1); + return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource); + } + + if (originalSource && sourceIndex < originalSource.length) { + throw error; } return null; diff --git a/src/releases.js b/src/releases.js index 116b9685..a6865f8b 100644 --- a/src/releases.js +++ b/src/releases.js @@ -308,7 +308,7 @@ function accumulateActors(releases) { if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info if (actor.avatar) { - acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar); + acc[actorSlug].avatars = acc[actorSlug].avatars.concat([actor.avatar]); // don't flatten fallbacks } }); diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 47084573..18440ca1 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -53,38 +53,38 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft return accReleases; } + const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site })); + const oldestReleaseOnPage = latestReleases.slice(-1)[0].date; const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases); - const uniqueReleases = latestReleases + const uniqueReleases = latestReleasesWithSite .filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database && (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`); - const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site })); - if ( uniqueReleases.length > 0 // && (oldestReleaseOnPage || page < argv.pages) && ((oldestReleaseOnPage ? moment(oldestReleaseOnPage).isAfter(afterDate) - : accReleases.length + uniqueReleases.length < argv.nullDateLimit) + : accReleases.length + uniqueReleases.length <= argv.nullDateLimit) || (argv.last && accReleases.length + uniqueReleases.length < argv.last)) ) { // oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page - return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1); + return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleases), page + 1); } - if (argv.latest && uniqueReleases.length >= argv.latest) { - return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last); + if (argv.last && uniqueReleases.length >= argv.last) { + return accReleases.concat(uniqueReleases).slice(0, argv.last); } if (oldestReleaseOnPage) { - return accReleases.concat(uniqueReleasesWithSite); + return accReleases.concat(uniqueReleases); } - return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit); + return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit); } async function scrapeUpcomingReleases(scraper, site, preflight) { diff --git a/src/scrapers/brazzers.js b/src/scrapers/brazzers.js index 0eab0623..d4366697 100644 --- a/src/scrapers/brazzers.js +++ b/src/scrapers/brazzers.js @@ -81,9 +81,19 @@ async function scrapeScene(html, url, _site) { .trim(); release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate(); - release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60; + const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => { + const avatar = `https:${$(actorElement).find('img').attr('data-src')}`; + + return { + name: $(actorElement).attr('title'), + avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar], + }; + }).toArray(); + + release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); + release.likes = Number($('.label-rating .like').text()); release.dislikes = Number($('.label-rating .dislike').text()); diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 2082fd3d..cd2f9da9 100644 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -95,9 +95,11 @@ async function scrapeScene(html, url, site) { const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent)); release.duration = ql(durationEl); - release.poster = qp('video') || qi('.flowplayer img'); // _800.jpg is larger than _xl.jpg in landscape + release.poster = qp('video') || qi('.flowplayer img') || qi('img'); // _800.jpg is larger than _xl.jpg in landscape const photosUrl = qu('.stat a[href*=photos]'); + console.log(release.poster); + if (photosUrl) { release.photos = await fetchPhotos(photosUrl); } else {