Added media limit sampling.
This commit is contained in:
parent
cb68319ac0
commit
fc58850e56
18
README.md
18
README.md
|
@ -23,8 +23,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
|
|||
### Options
|
||||
`npm start -- --option value`
|
||||
|
||||
Running `npm start` without any arguments will run the web server.
|
||||
|
||||
* `--server`: Run the web server
|
||||
* `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file.
|
||||
* `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
|
||||
* `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID.
|
||||
|
@ -33,10 +32,23 @@ Running `npm start` without any arguments will run the web server.
|
|||
* `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages.
|
||||
* `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in.
|
||||
|
||||
#### Developer options
|
||||
## Developers
|
||||
|
||||
### Options
|
||||
* `--no-save`: Do not store retrieved information in local database, forcing re-fetch.
|
||||
* `--debug`: Show full error stack trace.
|
||||
|
||||
### Generating thumbnails
|
||||
Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done`
|
||||
|
||||
Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images:
|
||||
|
||||
* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg`
|
||||
* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg`
|
||||
|
||||
* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done`
|
||||
* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done`
|
||||
|
||||
## Supported networks & sites
|
||||
768 sites on 62 networks, continuously expanding!
|
||||
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 882 B After Width: | Height: | Size: 1.6 KiB |
64
src/media.js
64
src/media.js
|
@ -1,13 +1,12 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
// const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
const fileType = require('file-type');
|
||||
// const fileType = require('file-type');
|
||||
const sharp = require('sharp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
|
@ -72,6 +71,60 @@ async function getThumbnail(buffer, height = config.media.thumbnailSize) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
|
||||
// limit media sets, use extrax as fallbacks
|
||||
if (medias.length <= limit) {
|
||||
return medias;
|
||||
}
|
||||
|
||||
const chunkSize = Math.floor(medias.length / limit);
|
||||
const rest = medias.length - (limit * chunkSize);
|
||||
|
||||
const chunks = Array.from(
|
||||
{ length: limit },
|
||||
(value, index) => {
|
||||
const start = (chunkSize * index) + Math.min(index, rest);
|
||||
|
||||
return medias.slice(
|
||||
start,
|
||||
start + chunkSize + (index < rest ? 1 : 0),
|
||||
);
|
||||
},
|
||||
);
|
||||
|
||||
// flip last chunk so the very last image (often the best cumshot) is tried first
|
||||
const lastPreferredChunks = preferLast
|
||||
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
|
||||
: chunks;
|
||||
|
||||
const groupedMedias = lastPreferredChunks.map((chunk) => {
|
||||
// merge chunked medias into single media with grouped fallback priorities,
|
||||
// so the first sources of each media is preferred over all second sources, etc.
|
||||
const sources = chunk
|
||||
.reduce((accSources, media) => {
|
||||
media.sources.forEach((source, index) => {
|
||||
if (!accSources[index]) {
|
||||
accSources.push([source]);
|
||||
return;
|
||||
}
|
||||
|
||||
accSources[index].push(source);
|
||||
});
|
||||
|
||||
return accSources;
|
||||
}, [])
|
||||
.flat();
|
||||
|
||||
return {
|
||||
id: chunk[0].id,
|
||||
role: chunk[0].role,
|
||||
sources,
|
||||
};
|
||||
});
|
||||
|
||||
return groupedMedias;
|
||||
}
|
||||
|
||||
function itemsByKey(items, key) {
|
||||
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
||||
}
|
||||
|
@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) {
|
|||
return [];
|
||||
}
|
||||
|
||||
return rawMedias.map((rawMedia) => {
|
||||
const baseMedias = rawMedias.map((rawMedia) => {
|
||||
if (!rawMedia) {
|
||||
return null;
|
||||
}
|
||||
|
@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) {
|
|||
|
||||
return baseSourceToBaseMedia(baseSource, role);
|
||||
}).filter(Boolean);
|
||||
|
||||
const sampledBaseMedias = sampleMedias(baseMedias);
|
||||
|
||||
return sampledBaseMedias;
|
||||
}
|
||||
|
||||
async function findSourceDuplicates(baseMedias) {
|
||||
|
@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) {
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: media count limits
|
||||
// TODO: catch errors
|
||||
// TODO: stage by role
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) {
|
|||
// /createaccount is used by e.g. Tricky Spa native site
|
||||
const src = $(linkEl).find('img').attr('src');
|
||||
|
||||
if (src.match('previews/')) {
|
||||
if (/previews\//.test(src)) {
|
||||
// resource often serves full photo at a modifier URL anyway, add as primary source
|
||||
const highRes = src
|
||||
.replace('previews/', '')
|
||||
|
|
|
@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom');
|
|||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
|
@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) {
|
|||
}
|
||||
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
|
||||
release.studio = slugify(studioName, '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) {
|
|||
async function fetchScene(url, site) {
|
||||
const useGallery = true;
|
||||
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
|
|
|
@ -87,7 +87,7 @@ async function attachStudios(releases) {
|
|||
if (release.studio && studioBySlug[release.studio]) {
|
||||
return {
|
||||
...release,
|
||||
studio: release.studio,
|
||||
studio: studioBySlug[release.studio],
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue