Added media limit sampling.
This commit is contained in:
64
src/media.js
64
src/media.js
@@ -1,13 +1,12 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
// const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
const fileType = require('file-type');
|
||||
// const fileType = require('file-type');
|
||||
const sharp = require('sharp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
@@ -72,6 +71,60 @@ async function getThumbnail(buffer, height = config.media.thumbnailSize) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
|
||||
// limit media sets, use extrax as fallbacks
|
||||
if (medias.length <= limit) {
|
||||
return medias;
|
||||
}
|
||||
|
||||
const chunkSize = Math.floor(medias.length / limit);
|
||||
const rest = medias.length - (limit * chunkSize);
|
||||
|
||||
const chunks = Array.from(
|
||||
{ length: limit },
|
||||
(value, index) => {
|
||||
const start = (chunkSize * index) + Math.min(index, rest);
|
||||
|
||||
return medias.slice(
|
||||
start,
|
||||
start + chunkSize + (index < rest ? 1 : 0),
|
||||
);
|
||||
},
|
||||
);
|
||||
|
||||
// flip last chunk so the very last image (often the best cumshot) is tried first
|
||||
const lastPreferredChunks = preferLast
|
||||
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
|
||||
: chunks;
|
||||
|
||||
const groupedMedias = lastPreferredChunks.map((chunk) => {
|
||||
// merge chunked medias into single media with grouped fallback priorities,
|
||||
// so the first sources of each media is preferred over all second sources, etc.
|
||||
const sources = chunk
|
||||
.reduce((accSources, media) => {
|
||||
media.sources.forEach((source, index) => {
|
||||
if (!accSources[index]) {
|
||||
accSources.push([source]);
|
||||
return;
|
||||
}
|
||||
|
||||
accSources[index].push(source);
|
||||
});
|
||||
|
||||
return accSources;
|
||||
}, [])
|
||||
.flat();
|
||||
|
||||
return {
|
||||
id: chunk[0].id,
|
||||
role: chunk[0].role,
|
||||
sources,
|
||||
};
|
||||
});
|
||||
|
||||
return groupedMedias;
|
||||
}
|
||||
|
||||
function itemsByKey(items, key) {
|
||||
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
||||
}
|
||||
@@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawMedias.map((rawMedia) => {
|
||||
const baseMedias = rawMedias.map((rawMedia) => {
|
||||
if (!rawMedia) {
|
||||
return null;
|
||||
}
|
||||
@@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) {
|
||||
|
||||
return baseSourceToBaseMedia(baseSource, role);
|
||||
}).filter(Boolean);
|
||||
|
||||
const sampledBaseMedias = sampleMedias(baseMedias);
|
||||
|
||||
return sampledBaseMedias;
|
||||
}
|
||||
|
||||
async function findSourceDuplicates(baseMedias) {
|
||||
@@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: media count limits
|
||||
// TODO: catch errors
|
||||
// TODO: stage by role
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) {
|
||||
// /createaccount is used by e.g. Tricky Spa native site
|
||||
const src = $(linkEl).find('img').attr('src');
|
||||
|
||||
if (src.match('previews/')) {
|
||||
if (/previews\//.test(src)) {
|
||||
// resource often serves full photo at a modifier URL anyway, add as primary source
|
||||
const highRes = src
|
||||
.replace('previews/', '')
|
||||
|
||||
@@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
@@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) {
|
||||
}
|
||||
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
|
||||
release.studio = slugify(studioName, '');
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) {
|
||||
async function fetchScene(url, site) {
|
||||
const useGallery = true;
|
||||
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
|
||||
@@ -87,7 +87,7 @@ async function attachStudios(releases) {
|
||||
if (release.studio && studioBySlug[release.studio]) {
|
||||
return {
|
||||
...release,
|
||||
studio: release.studio,
|
||||
studio: studioBySlug[release.studio],
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user