Improved module structure. Added individual scene scrapers for Jules Jordan and XEmpire.

This commit is contained in:
2019-03-23 22:48:39 +01:00
parent d70d5f85aa
commit e8d4b76403
14 changed files with 434 additions and 132 deletions

View File

@@ -67,17 +67,55 @@ function scrapeUpcoming(html, site) {
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/categories/movies_1_d.html`),
bhttp.get(`${site.url}/index.php`),
]);
const title = $('.title_bar_hilite').text();
const date = moment
.utc($('.update_date').text(), 'MM/DD/YYYY')
.toDate();
return [
...scrapeUpcoming(upcomingRes.body.toString(), site, true),
...scrapeLatest(latestRes.body.toString(), site),
];
const actors = $('.update_description + .update_models a')
.map((_actorIndex, actorElement) => $(actorElement).text())
.toArray();
const description = $('.update_description').text().trim();
const stars = Number($('.avg_rating').text().trim().replace(/[\s|Avg Rating:]/g, ''));
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/trial/categories/movies_1_d.html`);
return scrapeLatest(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/trial/index.php`);
return scrapeUpcoming(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};

View File

@@ -0,0 +1 @@
'use strict';

View File

@@ -17,7 +17,7 @@ function scrape(html, site) {
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || null;
.length || 0;
return {
url,
@@ -25,15 +25,13 @@ function scrape(html, site) {
actors,
date,
rating: {
likes: null,
dislikes: null,
stars,
},
site,
};
}
async function fetchReleases(site) {
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=0&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
@@ -42,4 +40,6 @@ async function fetchReleases(site) {
return latest;
}
module.exports = fetchReleases;
module.exports = {
fetchLatest,
};

View File

@@ -24,7 +24,6 @@ function scrape(html, site) {
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const stars = likes || dislikes ? Math.floor(((likes * 5 + dislikes) / (likes + dislikes)) * 100) / 100 : null;
return {
url,
@@ -34,23 +33,54 @@ function scrape(html, site) {
rating: {
likes,
dislikes,
stars,
},
site,
};
});
}
async function fetchReleases(site) {
const [latestRes, upcomingRes] = await Promise.all([
bhttp.get(`${site.url}/en/videos`),
bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`),
]);
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
return [
...scrape(upcomingRes.body.toString(), site, true),
...scrape(latestRes.body.toString(), site),
];
const title = $('h1.title').text();
const date = moment.utc($('.updatedDate').text(), 'MM-DD-YYYY').toDate();
const actors = $('.sceneColActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.sceneDesc').text().replace(/Video Description:/g, ' ').trim();
const stars = $('.currentRating').text().split('/')[0] / 2;
return {
url,
title,
date,
actors,
description,
rating: {
stars,
},
site,
};
}
module.exports = fetchReleases;
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/en/videos`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrape(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
};