forked from DebaucheryLibrarian/traxxx
Allowing scrapers to return multiple trailer qualities, implemented for Brazzers. Removed tag and site matching from Brazzers scraper.
This commit is contained in:
parent
5455f5483b
commit
30f4a418e3
17
src/media.js
17
src/media.js
|
@ -195,29 +195,32 @@ async function storePhotos(release, releaseId) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeTrailer(release, releaseId) {
|
async function storeTrailer(release, releaseId) {
|
||||||
if (!release.trailer || !release.trailer.src) {
|
// support scrapers supplying multiple qualities
|
||||||
|
const trailer = Array.isArray(release.trailer) ? release.trailer[0] : release.trailer;
|
||||||
|
|
||||||
|
if (!trailer || !trailer.src) {
|
||||||
console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
console.log(`Storing trailer for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
|
|
||||||
const { pathname } = new URL(release.trailer.src);
|
const { pathname } = new URL(trailer.src);
|
||||||
const mimetype = release.trailer.type || mime.getType(pathname);
|
const mimetype = trailer.type || mime.getType(pathname);
|
||||||
|
|
||||||
const res = await bhttp.get(release.trailer.src);
|
const res = await bhttp.get(trailer.src);
|
||||||
const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
|
const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${trailer.quality ? `_${trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
|
||||||
|
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
fs.writeFile(path.join(config.media.path, filepath), res.body),
|
fs.writeFile(path.join(config.media.path, filepath), res.body),
|
||||||
knex('media').insert({
|
knex('media').insert({
|
||||||
path: filepath,
|
path: filepath,
|
||||||
mime: mimetype,
|
mime: mimetype,
|
||||||
source: release.trailer.src,
|
source: trailer.src,
|
||||||
domain: 'releases',
|
domain: 'releases',
|
||||||
target_id: releaseId,
|
target_id: releaseId,
|
||||||
role: 'trailer',
|
role: 'trailer',
|
||||||
quality: release.trailer.quality || null,
|
quality: trailer.quality || null,
|
||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,6 @@ const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||||
const { fetchSites } = require('../sites');
|
|
||||||
const { matchTags } = require('../tags');
|
|
||||||
|
|
||||||
const hairMap = {
|
const hairMap = {
|
||||||
Blonde: 'blonde',
|
Blonde: 'blonde',
|
||||||
|
@ -82,26 +80,19 @@ async function scrapeScene(html, url, site) {
|
||||||
const dislikes = Number($('.label-rating .dislike').text());
|
const dislikes = Number($('.label-rating .dislike').text());
|
||||||
|
|
||||||
const siteElement = $('.niche-site-logo');
|
const siteElement = $('.niche-site-logo');
|
||||||
const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||||
const siteName = siteElement.attr('title');
|
const siteName = siteElement.attr('title');
|
||||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
const channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||||
|
|
||||||
const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
|
|
||||||
const poster = `https:${videoData.poster}`;
|
const poster = `https:${videoData.poster}`;
|
||||||
const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`;
|
|
||||||
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||||
|
|
||||||
const [tags, [channelSite]] = await Promise.all([
|
const trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||||
matchTags(rawTags),
|
src: `https:${path}`,
|
||||||
site.isFallback
|
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||||
? fetchSites({
|
}));
|
||||||
slug: siteSlug,
|
|
||||||
name: siteName,
|
|
||||||
url: siteUrl,
|
|
||||||
})
|
|
||||||
: [site],
|
|
||||||
]);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
|
@ -112,17 +103,15 @@ async function scrapeScene(html, url, site) {
|
||||||
date,
|
date,
|
||||||
poster,
|
poster,
|
||||||
photos,
|
photos,
|
||||||
trailer: {
|
trailer,
|
||||||
src: trailer,
|
|
||||||
quality: 480,
|
|
||||||
},
|
|
||||||
duration,
|
duration,
|
||||||
rating: {
|
rating: {
|
||||||
likes,
|
likes,
|
||||||
dislikes,
|
dislikes,
|
||||||
},
|
},
|
||||||
tags,
|
tags,
|
||||||
site: channelSite || site,
|
site,
|
||||||
|
channel,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue