Added media support to Brazzers scraper.

This commit is contained in:
ThePendulum 2019-10-29 03:50:39 +01:00
parent 27d1f4170c
commit eb50af7b81
1 changed files with 17 additions and 0 deletions

View File

@ -31,12 +31,17 @@ function scrape(html, site, upcoming) {
const likes = Number($(element).find('.label-rating .like-amount').text()); const likes = Number($(element).find('.label-rating .like-amount').text());
const dislikes = Number($(element).find('.label-rating .dislike-amount').text()); const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
return acc.concat({ return acc.concat({
url, url,
shootId, shootId,
title, title,
actors, actors,
date, date,
poster,
photos,
rating: { rating: {
likes, likes,
dislikes, dislikes,
@ -48,6 +53,8 @@ function scrape(html, site, upcoming) {
async function scrapeScene(html, url, site) { async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('"},') + 2));
const shootId = url.split('/').slice(-3, -2)[0]; const shootId = url.split('/').slice(-3, -2)[0];
const title = $('.scene-title[itemprop="name"]').text(); const title = $('.scene-title[itemprop="name"]').text();
@ -71,6 +78,10 @@ async function scrapeScene(html, url, site) {
const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const poster = `https:${videoData.poster}`;
const trailer = `https:${videoData.stream_info.http.paths.mp4_480_1500}`;
const photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const [tags, channelSite] = await Promise.all([ const [tags, channelSite] = await Promise.all([
matchTags(rawTags), matchTags(rawTags),
knex('sites') knex('sites')
@ -86,6 +97,12 @@ async function scrapeScene(html, url, site) {
description, description,
actors, actors,
date, date,
poster,
photos,
trailer: {
src: trailer,
quality: 480,
},
duration, duration,
rating: { rating: {
likes, likes,