Scrapers can now iterate through pages. Filtering unique releases before saving to database. Improved scrapers and rendering.
This commit is contained in:
@@ -23,14 +23,15 @@ function scrapeLatest(html, site) {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
||||
const originalTitle = sceneLinkElement.attr('title');
|
||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const internalId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
shootId: shootId || internalId,
|
||||
title,
|
||||
date,
|
||||
site,
|
||||
@@ -68,8 +69,8 @@ async function scrapeScene(html, url, site) {
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos`);
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user