Allowing scrapers to return multiple trailer qualities, implemented for Brazzers. Removed tag and site matching from Brazzers scraper.

This commit is contained in:
ThePendulum 2019-12-07 04:17:14 +01:00
parent 5455f5483b
commit 30f4a418e3
2 changed files with 20 additions and 28 deletions

View File

@ -195,29 +195,32 @@ async function storePhotos(release, releaseId) {
} }
async function storeTrailer(release, releaseId) { async function storeTrailer(release, releaseId) {
if (!release.trailer || !release.trailer.src) { // support scrapers supplying multiple qualities
const trailer = Array.isArray(release.trailer) ? release.trailer[0] : release.trailer;
if (!trailer || !trailer.src) {
console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`); console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
return; return;
} }
console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`); console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`);
const { pathname } = new URL(release.trailer.src); const { pathname } = new URL(trailer.src);
const mimetype = release.trailer.type || mime.getType(pathname); const mimetype = trailer.type || mime.getType(pathname);
const res = await bhttp.get(release.trailer.src); const res = await bhttp.get(trailer.src);
const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
await Promise.all([ await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), res.body), fs.writeFile(path.join(config.media.path, filepath), res.body),
knex('media').insert({ knex('media').insert({
path: filepath, path: filepath,
mime: mimetype, mime: mimetype,
source: release.trailer.src, source: trailer.src,
domain: 'releases', domain: 'releases',
target_id: releaseId, target_id: releaseId,
role: 'trailer', role: 'trailer',
quality: release.trailer.quality || null, quality: trailer.quality || null,
}), }),
]); ]);
} }

View File

@ -7,8 +7,6 @@ const { JSDOM } = require('jsdom');
const moment = require('moment'); const moment = require('moment');
const { heightToCm, lbsToKg } = require('../utils/convert'); const { heightToCm, lbsToKg } = require('../utils/convert');
const { fetchSites } = require('../sites');
const { matchTags } = require('../tags');
const hairMap = { const hairMap = {
Blonde: 'blonde', Blonde: 'blonde',
@ -82,26 +80,19 @@ async function scrapeScene(html, url, site) {
const dislikes = Number($('.label-rating .dislike').text()); const dislikes = Number($('.label-rating .dislike').text());
const siteElement = $('.niche-site-logo'); const siteElement = $('.niche-site-logo');
const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; // const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title'); const siteName = siteElement.attr('title');
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase(); const channel = siteName.replace(/\s+/g, '').toLowerCase();
const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const poster = `https:${videoData.poster}`; const poster = `https:${videoData.poster}`;
const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`;
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray(); const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const [tags, [channelSite]] = await Promise.all([ const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
matchTags(rawTags), src: `https:${path}`,
site.isFallback quality: Number(quality.match(/\d{3,}/)[0]),
? fetchSites({ }));
slug: siteSlug,
name: siteName,
url: siteUrl,
})
: [site],
]);
return { return {
url, url,
@ -112,17 +103,15 @@ async function scrapeScene(html, url, site) {
date, date,
poster, poster,
photos, photos,
trailer: { trailer,
src: trailer,
quality: 480,
},
duration, duration,
rating: { rating: {
likes, likes,
dislikes, dislikes,
}, },
tags, tags,
site: channelSite || site, site,
channel,
}; };
} }