Refactored 21sextury scraper.
This commit is contained in:
@@ -8,7 +8,6 @@ const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { matchTags } = require('../tags');
|
||||
const pluckPhotos = require('../utils/pluck-photos');
|
||||
|
||||
async function getPhoto(url) {
|
||||
const res = await bhttp.get(url);
|
||||
@@ -20,7 +19,7 @@ async function getPhoto(url) {
|
||||
return photoUrl;
|
||||
}
|
||||
|
||||
async function getPhotos(albumUrl, site, siteUrl) {
|
||||
async function getPhotos(albumUrl) {
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const { document } = new JSDOM(html).window;
|
||||
@@ -28,15 +27,7 @@ async function getPhotos(albumUrl, site, siteUrl) {
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
|
||||
// dogfart has massive albums, pick 25 or specified number of photos: first, last and evenly inbetween
|
||||
const photoLimit = (site.network.parameters && site.network.parameters.photoLimit) || 25;
|
||||
const photoIndexes = pluckPhotos(lastPhotoIndex, photoLimit);
|
||||
|
||||
if (photoLimit > 25) {
|
||||
console.log(`${site.name}: Scraping ${photoLimit} album photos from ${siteUrl}, this may take some time...`);
|
||||
}
|
||||
|
||||
const photoUrls = await Promise.map(photoIndexes, async (index) => {
|
||||
const photoUrls = await Promise.map(Array.from({ length: lastPhotoIndex }), async (index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${index.toString().padStart(3, '0')}.jpg`)}`;
|
||||
|
||||
return getPhoto(pageUrl);
|
||||
|
||||
Reference in New Issue
Block a user