From 30f4a418e387514c5af86e37b5e180fa93dd2b1a Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 7 Dec 2019 04:17:14 +0100 Subject: [PATCH] Allowing scrapers to return multiple trailer qualities, implemented for Brazzers. Removed tag and site matching from Brazzers scraper. --- src/media.js | 17 ++++++++++------- src/scrapers/brazzers.js | 31 ++++++++++--------------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/src/media.js b/src/media.js index a5f98ba8..9f86b28d 100644 --- a/src/media.js +++ b/src/media.js @@ -195,29 +195,32 @@ async function storePhotos(release, releaseId) { } async function storeTrailer(release, releaseId) { - if (!release.trailer || !release.trailer.src) { + // support scrapers supplying multiple qualities + const trailer = Array.isArray(release.trailer) ? release.trailer[0] : release.trailer; + + if (!trailer || !trailer.src) { console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`); return; } console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`); - const { pathname } = new URL(release.trailer.src); - const mimetype = release.trailer.type || mime.getType(pathname); + const { pathname } = new URL(trailer.src); + const mimetype = trailer.type || mime.getType(pathname); - const res = await bhttp.get(release.trailer.src); - const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); + const res = await bhttp.get(trailer.src); + const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); await Promise.all([ fs.writeFile(path.join(config.media.path, filepath), res.body), knex('media').insert({ path: filepath, mime: mimetype, - source: release.trailer.src, + source: trailer.src, domain: 'releases', target_id: releaseId, role: 'trailer', - quality: release.trailer.quality || null, + quality: trailer.quality || null, }), ]); } diff --git a/src/scrapers/brazzers.js b/src/scrapers/brazzers.js index b74a1fe9..8f406a89 100644 --- a/src/scrapers/brazzers.js +++ b/src/scrapers/brazzers.js @@ -7,8 +7,6 @@ const { JSDOM } = require('jsdom'); const moment = require('moment'); const { heightToCm, lbsToKg } = require('../utils/convert'); -const { fetchSites } = require('../sites'); -const { matchTags } = require('../tags'); const hairMap = { Blonde: 'blonde', @@ -82,26 +80,19 @@ async function scrapeScene(html, url, site) { const dislikes = Number($('.label-rating .dislike').text()); const siteElement = $('.niche-site-logo'); - const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; + // const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; const siteName = siteElement.attr('title'); - const siteSlug = siteName.replace(/\s+/g, '').toLowerCase(); + const channel = siteName.replace(/\s+/g, '').toLowerCase(); - const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); + const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const poster = `https:${videoData.poster}`; - const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`; const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray(); - const [tags, [channelSite]] = await Promise.all([ - matchTags(rawTags), - site.isFallback - ? fetchSites({ - slug: siteSlug, - name: siteName, - url: siteUrl, - }) - : [site], - ]); + const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({ + src: `https:${path}`, + quality: Number(quality.match(/\d{3,}/)[0]), + })); return { url, @@ -112,17 +103,15 @@ async function scrapeScene(html, url, site) { date, poster, photos, - trailer: { - src: trailer, - quality: 480, - }, + trailer, duration, rating: { likes, dislikes, }, tags, - site: channelSite || site, + site, + channel, }; }