Added media limit sampling.

This commit is contained in:
2020-04-11 22:49:37 +02:00
parent cb68319ac0
commit fc58850e56
6 changed files with 81 additions and 10 deletions

View File

@@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) {
// /createaccount is used by e.g. Tricky Spa native site
const src = $(linkEl).find('img').attr('src');
if (src.match('previews/')) {
if (/previews\//.test(src)) {
// resource often serves full photo at a modifier URL anyway, add as primary source
const highRes = src
.replace('previews/', '')

View File

@@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
@@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) {
}
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
release.studio = slugify(studioName, '');
return release;
}
@@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) {
async function fetchScene(url, site) {
const useGallery = true;
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);