Added media limit sampling.
This commit is contained in:
parent
cb68319ac0
commit
fc58850e56
18
README.md
18
README.md
|
@ -23,8 +23,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
|
||||||
### Options
|
### Options
|
||||||
`npm start -- --option value`
|
`npm start -- --option value`
|
||||||
|
|
||||||
Running `npm start` without any arguments will run the web server.
|
* `--server`: Run the web server
|
||||||
|
|
||||||
* `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file.
|
* `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file.
|
||||||
* `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
|
* `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
|
||||||
* `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID.
|
* `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID.
|
||||||
|
@ -33,10 +32,23 @@ Running `npm start` without any arguments will run the web server.
|
||||||
* `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages.
|
* `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages.
|
||||||
* `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in.
|
* `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in.
|
||||||
|
|
||||||
#### Developer options
|
## Developers
|
||||||
|
|
||||||
|
### Options
|
||||||
* `--no-save`: Do not store retrieved information in local database, forcing re-fetch.
|
* `--no-save`: Do not store retrieved information in local database, forcing re-fetch.
|
||||||
* `--debug`: Show full error stack trace.
|
* `--debug`: Show full error stack trace.
|
||||||
|
|
||||||
|
### Generating thumbnails
|
||||||
|
Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done`
|
||||||
|
|
||||||
|
Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images:
|
||||||
|
|
||||||
|
* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg`
|
||||||
|
* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg`
|
||||||
|
|
||||||
|
* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done`
|
||||||
|
* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done`
|
||||||
|
|
||||||
## Supported networks & sites
|
## Supported networks & sites
|
||||||
768 sites on 62 networks, continuously expanding!
|
768 sites on 62 networks, continuously expanding!
|
||||||
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 882 B After Width: | Height: | Size: 1.6 KiB |
64
src/media.js
64
src/media.js
|
@ -1,13 +1,12 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
// const util = require('util');
|
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const fs = require('fs').promises;
|
const fs = require('fs').promises;
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const nanoid = require('nanoid/non-secure');
|
const nanoid = require('nanoid/non-secure');
|
||||||
const mime = require('mime');
|
const mime = require('mime');
|
||||||
const fileType = require('file-type');
|
// const fileType = require('file-type');
|
||||||
const sharp = require('sharp');
|
const sharp = require('sharp');
|
||||||
const blake2 = require('blake2');
|
const blake2 = require('blake2');
|
||||||
|
|
||||||
|
@ -72,6 +71,60 @@ async function getThumbnail(buffer, height = config.media.thumbnailSize) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
|
||||||
|
// limit media sets, use extrax as fallbacks
|
||||||
|
if (medias.length <= limit) {
|
||||||
|
return medias;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunkSize = Math.floor(medias.length / limit);
|
||||||
|
const rest = medias.length - (limit * chunkSize);
|
||||||
|
|
||||||
|
const chunks = Array.from(
|
||||||
|
{ length: limit },
|
||||||
|
(value, index) => {
|
||||||
|
const start = (chunkSize * index) + Math.min(index, rest);
|
||||||
|
|
||||||
|
return medias.slice(
|
||||||
|
start,
|
||||||
|
start + chunkSize + (index < rest ? 1 : 0),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// flip last chunk so the very last image (often the best cumshot) is tried first
|
||||||
|
const lastPreferredChunks = preferLast
|
||||||
|
? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
|
||||||
|
: chunks;
|
||||||
|
|
||||||
|
const groupedMedias = lastPreferredChunks.map((chunk) => {
|
||||||
|
// merge chunked medias into single media with grouped fallback priorities,
|
||||||
|
// so the first sources of each media is preferred over all second sources, etc.
|
||||||
|
const sources = chunk
|
||||||
|
.reduce((accSources, media) => {
|
||||||
|
media.sources.forEach((source, index) => {
|
||||||
|
if (!accSources[index]) {
|
||||||
|
accSources.push([source]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
accSources[index].push(source);
|
||||||
|
});
|
||||||
|
|
||||||
|
return accSources;
|
||||||
|
}, [])
|
||||||
|
.flat();
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: chunk[0].id,
|
||||||
|
role: chunk[0].role,
|
||||||
|
sources,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return groupedMedias;
|
||||||
|
}
|
||||||
|
|
||||||
function itemsByKey(items, key) {
|
function itemsByKey(items, key) {
|
||||||
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
|
||||||
}
|
}
|
||||||
|
@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
return rawMedias.map((rawMedia) => {
|
const baseMedias = rawMedias.map((rawMedia) => {
|
||||||
if (!rawMedia) {
|
if (!rawMedia) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) {
|
||||||
|
|
||||||
return baseSourceToBaseMedia(baseSource, role);
|
return baseSourceToBaseMedia(baseSource, role);
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
|
const sampledBaseMedias = sampleMedias(baseMedias);
|
||||||
|
|
||||||
|
return sampledBaseMedias;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function findSourceDuplicates(baseMedias) {
|
async function findSourceDuplicates(baseMedias) {
|
||||||
|
@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: media count limits
|
|
||||||
// TODO: catch errors
|
// TODO: catch errors
|
||||||
// TODO: stage by role
|
// TODO: stage by role
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) {
|
||||||
// /createaccount is used by e.g. Tricky Spa native site
|
// /createaccount is used by e.g. Tricky Spa native site
|
||||||
const src = $(linkEl).find('img').attr('src');
|
const src = $(linkEl).find('img').attr('src');
|
||||||
|
|
||||||
if (src.match('previews/')) {
|
if (/previews\//.test(src)) {
|
||||||
// resource often serves full photo at a modifier URL anyway, add as primary source
|
// resource often serves full photo at a modifier URL anyway, add as primary source
|
||||||
const highRes = src
|
const highRes = src
|
||||||
.replace('previews/', '')
|
.replace('previews/', '')
|
||||||
|
|
|
@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function extractTitle(originalTitle) {
|
function extractTitle(originalTitle) {
|
||||||
const titleComponents = originalTitle.split(' ');
|
const titleComponents = originalTitle.split(' ');
|
||||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||||
|
@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||||
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
|
release.studio = slugify(studioName, '');
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const useGallery = true;
|
const useGallery = true;
|
||||||
|
|
||||||
|
// TODO: fall back on screenshots when gallery is not available
|
||||||
const res = useGallery
|
const res = useGallery
|
||||||
? await bhttp.get(`${url}/gallery#gallery`)
|
? await bhttp.get(`${url}/gallery#gallery`)
|
||||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||||
|
|
|
@ -87,7 +87,7 @@ async function attachStudios(releases) {
|
||||||
if (release.studio && studioBySlug[release.studio]) {
|
if (release.studio && studioBySlug[release.studio]) {
|
||||||
return {
|
return {
|
||||||
...release,
|
...release,
|
||||||
studio: release.studio,
|
studio: studioBySlug[release.studio],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue