From a671190fff273b963c732f4072594af38282e04d Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 3 Feb 2020 02:04:47 +0100 Subject: [PATCH] Adapted Score scraper for Score Classics. --- seeds/01_sites.js | 17 ++++++++++------- src/releases.js | 2 +- src/scrape-sites.js | 2 +- src/scrapers/score.js | 31 +++++++++++++++++-------------- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/seeds/01_sites.js b/seeds/01_sites.js index e18aaa2d..c6aeecf8 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -3451,18 +3451,12 @@ const sites = [ url: 'https://www.bigboobbundle.com/sarennasworld', network: 'score', }, - { - name: 'Score Classics', - slug: 'scoreclassics', - url: 'https://www.scoreclassics.com', - network: 'score', - priority: 1, - }, { name: 'Scoreland', slug: 'scoreland', url: 'https://www.scoreland.com', network: 'score', + parameters: { path: '/big-boob-videos' }, priority: 3, }, { @@ -3470,6 +3464,15 @@ const sites = [ slug: 'scoreland2', url: 'https://www.scoreland2.com', network: 'score', + parameters: { path: '/big-boob-scenes' }, + priority: 1, + }, + { + name: 'Score Classics', + slug: 'scoreclassics', + url: 'https://www.scoreclassics.com', + network: 'score', + parameters: { path: '/classic-boob-videos' }, priority: 1, }, { diff --git a/src/releases.js b/src/releases.js index ad0bf0bf..bbe14033 100644 --- a/src/releases.js +++ b/src/releases.js @@ -428,7 +428,7 @@ async function storeReleases(releases) { storeReleaseAssets(storedReleases), ]); - if (argv.withProfiles) { + if (argv.withProfiles && Object.keys(actors).length > 0) { await scrapeBasicActors(); } diff --git a/src/scrape-sites.js b/src/scrape-sites.js index e9707d8d..434f3b20 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -45,7 +45,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a const uniqueReleases = latestReleases .filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database - && moment(release.date).isAfter(afterDate)); // release is older than specified date limit + && (!release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit logger.info(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`); diff --git a/src/scrapers/score.js b/src/scrapers/score.js index 4b00d581..cff53359 100644 --- a/src/scrapers/score.js +++ b/src/scrapers/score.js @@ -65,7 +65,7 @@ function scrapeAll(html) { } async function scrapeScene(html, url) { - const { q, qa, qtext, qd, ql, qu, qis, qp, qt } = ex(html, '#videos-page, #content'); + const { q, qa, qtext, qi, qd, ql, qu, qis, qp, qt } = ex(html, '#videos-page, #content'); const release = {}; [release.entryId] = new URL(url).pathname.split('/').slice(-2); @@ -82,8 +82,8 @@ async function scrapeScene(html, url) { const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent)); release.duration = ql(durationEl); - release.poster = qp('video'); // _800.jpg is larger than _xl.jpg in landscape - const photosUrl = qu('a[href*=photos]'); + release.poster = qp('video') || qi('.flowplayer img'); // _800.jpg is larger than _xl.jpg in landscape + const photosUrl = qu('.stat a[href*=photos]'); if (photosUrl) { release.photos = await fetchPhotos(photosUrl); @@ -92,17 +92,20 @@ async function scrapeScene(html, url) { } const trailer = qt(); - release.trailer = [ - { - // don't rely on trailer always being 720p by default - src: trailer.replace(/\d+p\.mp4/, '720p.mp4'), - quality: 720, - }, - { - src: trailer.replace(/\d+p\.mp4/, '360p.mp4'), - quality: 360, - }, - ]; + + if (trailer) { + release.trailer = [ + { + // don't rely on trailer always being 720p by default + src: trailer.replace(/\d+p\.mp4/, '720p.mp4'), + quality: 720, + }, + { + src: trailer.replace(/\d+p\.mp4/, '360p.mp4'), + quality: 360, + }, + ]; + } const stars = q('.rate-box').dataset.score; if (stars) release.rating = { stars };