Queueing and batching media HTTP requests for improved reliability.

This commit is contained in:
2020-02-22 03:22:30 +01:00
parent b2dfbac9e5
commit 349a5a506e
15 changed files with 251 additions and 78 deletions

View File

@@ -211,8 +211,13 @@ async function scrapeScene(html, url, site) {
release.title = $('.title_bar_hilite').text().trim();
const setIdIndex = html.indexOf('setid:"');
release.entryId = html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
const entryId = html.match(/showtagform\((\d+)\)/);
if (entryId) release.entryId = entryId[1];
else {
const setIdIndex = html.indexOf('setid:"');
if (setIdIndex) release.entryId = html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
}
const dateElement = $('.update_date').text().trim();
const dateComment = $('*')

View File

@@ -1,6 +1,6 @@
'use strict';
const { geta, edate } = require('../utils/q');
const { geta, ed } = require('../utils/q');
function scrapeBlockLatest(scenes) {
return scenes.map(({ html, q, qa, qu, qt }) => {
@@ -13,7 +13,7 @@ function scrapeBlockLatest(scenes) {
release.title = q('h4 a', true);
release.url = qu('h4 a');
release.date = edate(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.actors = qa('.tour_update_models a', true);
@@ -22,8 +22,6 @@ function scrapeBlockLatest(scenes) {
release.teaser = qt();
console.log(release);
return release;
});
}
@@ -52,8 +50,6 @@ function scrapeClassicLatest(scenes) {
const photoCount = q('.update_thumb', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: photoCount }).map((value, index) => q('.update_thumb', `src${index}_3x`) || q('.update_thumb', `src${index}_2x`) || q('.update_thumb', `src${index}_1x`));
console.log(release);
return release;
});
}