Scrapers can now iterate through pages. Filtering unique releases before saving to database. Improved scrapers and rendering.

This commit is contained in:
2019-04-05 03:45:40 +02:00
parent cbb4fdc919
commit 2b818e379a
14 changed files with 99 additions and 49 deletions

View File

@@ -23,14 +23,15 @@ function scrapeLatest(html, site) {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.attr('title');
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const internalId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
return {
url,
shootId,
shootId: shootId || internalId,
title,
date,
site,
@@ -68,8 +69,8 @@ async function scrapeScene(html, url, site) {
};
}
async function fetchLatest(site) {
const res = await bhttp.get(`${site.url}/new-videos`);
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
return scrapeLatest(res.body.toString(), site);
}