From c998fcf9331970dae59fabb76eb7af0e80c6e615 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Fri, 13 Dec 2019 05:04:04 +0100 Subject: [PATCH] Added photo album support to Blowpass scraper. --- assets/components/releases/banner.vue | 117 ++++++++ assets/components/releases/release.vue | 398 +++++++++++++++++++++++++ assets/components/tile/actor.vue | 18 +- assets/components/tile/release.vue | 2 +- public/css/style.css | 3 + src/releases.js | 1 + src/scrape-sites.js | 2 + src/scrapers/blowpass.js | 71 ++++- 8 files changed, 606 insertions(+), 6 deletions(-) create mode 100644 assets/components/releases/banner.vue create mode 100644 assets/components/releases/release.vue diff --git a/assets/components/releases/banner.vue b/assets/components/releases/banner.vue new file mode 100644 index 00000000..ac2ecc4e --- /dev/null +++ b/assets/components/releases/banner.vue @@ -0,0 +1,117 @@ + + + + + diff --git a/assets/components/releases/release.vue b/assets/components/releases/release.vue new file mode 100644 index 00000000..8953ed02 --- /dev/null +++ b/assets/components/releases/release.vue @@ -0,0 +1,398 @@ + + + + + diff --git a/assets/components/tile/actor.vue b/assets/components/tile/actor.vue index 8b6dd815..2869daf5 100644 --- a/assets/components/tile/actor.vue +++ b/assets/components/tile/actor.vue @@ -27,9 +27,21 @@ v-if="actor.age || actor.origin" class="details" > - {{ actor.age }} + + {{ actor.age }} + + @ {{ actor.ageThen }} + {{ actor.origin.country.alpha2 }} @@ -110,4 +122,8 @@ export default { font-size: .8rem; font-weight: bold; } + +.age-then { + color: $highlight; +} diff --git a/assets/components/tile/release.vue b/assets/components/tile/release.vue index 70cc0f05..a1e3b68e 100644 --- a/assets/components/tile/release.vue +++ b/assets/components/tile/release.vue @@ -50,7 +50,7 @@ > 0 && (oldestReleaseOnPage || page < argv.pages) diff --git a/src/scrapers/blowpass.js b/src/scrapers/blowpass.js index 8a75fbc9..4e54593e 100644 --- a/src/scrapers/blowpass.js +++ b/src/scrapers/blowpass.js @@ -1,11 +1,71 @@ 'use strict'; /* eslint-disable newline-per-chained-call */ +const Promise = require('bluebird'); const bhttp = require('bhttp'); const cheerio = require('cheerio'); const moment = require('moment'); -const { matchTags } = require('../tags'); +async function fetchPhotos(url) { + const res = await bhttp.get(url); + + return res.body.toString(); +} + +function scrapePhotos(html) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + + return $('.preview .imgLink').toArray().map((linkEl) => { + const url = $(linkEl).attr('href'); + + if (url.match('/join')) { + // URL links to join page instead of full photo, extract thumbnail + const src = $(linkEl).find('img').attr('src'); + + if (src.match('previews/')) { + // resource often serves full photo at a modifier URL anyway, add as primary source + const highRes = src + .replace('previews/', '') + .replace('_tb.jpg', '.jpg'); + + // keep original thumbnail as fallback in case full photo is not available + return [highRes, src]; + } + + return src; + } + + // URL links to full photo + return url; + }); +} + +async function getPhotos(albumPath, siteDomain) { + const albumUrl = `https://www.blowpass.com${albumPath}`; + + try { + const html = await fetchPhotos(albumUrl); + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const photos = scrapePhotos(html); + + const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray(); + + const otherPhotos = await Promise.map(pages, async (page) => { + const pageUrl = `https://${siteDomain}${page}`; + const pageHtml = await fetchPhotos(pageUrl); + + return scrapePhotos(pageHtml); + }, { + concurrency: 2, + }); + + return photos.concat(otherPhotos.flat()); + } catch (error) { + console.error(`Failed to fetch Blowpass photos from ${albumPath}: ${error.message}`); + + return []; + } +} function scrape(html, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); @@ -68,13 +128,14 @@ async function scrapeScene(html, url, site) { const likes = Number(sceneElement.find('.rating .state_1 .value').text()); const dislikes = Number(sceneElement.find('.rating .state_2 .value').text()); + const channel = $('.siteNameSpan').text().trim().toLowerCase(); + const poster = playerData.picPreview; const trailer = `${playerData.playerOptions.host}${playerData.url}`; + const photos = await getPhotos($('.picturesItem a').attr('href'), channel, site); const duration = moment.duration(data.duration.slice(2)).asSeconds(); - - const rawTags = data.keywords.split(', '); - const tags = await matchTags(rawTags); + const tags = data.keywords.split(', '); return { url, @@ -86,6 +147,7 @@ async function scrapeScene(html, url, site) { date, duration, poster, + photos, trailer: { src: trailer, quality: playerData.sizeOnLoad.slice(0, -1), @@ -96,6 +158,7 @@ async function scrapeScene(html, url, site) { dislikes, }, site, + channel, }; }