diff --git a/README.md b/README.md index 87fc8235..3b208e11 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js ### Options `npm start -- --option value` -Running `npm start` without any arguments will run the web server. - +* `--server`: Run the web server * `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file. * `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. * `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID. @@ -33,10 +32,23 @@ Running `npm start` without any arguments will run the web server. * `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages. * `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in. -#### Developer options +## Developers + +### Options * `--no-save`: Do not store retrieved information in local database, forcing re-fetch. * `--debug`: Show full error stack trace. +### Generating thumbnails +Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done` + +Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images: + +* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg` +* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg` + +* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done` +* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done` + ## Supported networks & sites 768 sites on 62 networks, continuously expanding! diff --git a/public/img/logos/legalporno/favicon.png b/public/img/logos/legalporno/favicon.png index 14b597f1..456ce8f1 100644 Binary files a/public/img/logos/legalporno/favicon.png and b/public/img/logos/legalporno/favicon.png differ diff --git a/src/media.js b/src/media.js index c97f6da8..d986b346 100644 --- a/src/media.js +++ b/src/media.js @@ -1,13 +1,12 @@ 'use strict'; const config = require('config'); -// const util = require('util'); const Promise = require('bluebird'); const fs = require('fs').promises; const path = require('path'); const nanoid = require('nanoid/non-secure'); const mime = require('mime'); -const fileType = require('file-type'); +// const fileType = require('file-type'); const sharp = require('sharp'); const blake2 = require('blake2'); @@ -72,6 +71,60 @@ async function getThumbnail(buffer, height = config.media.thumbnailSize) { return null; } +function sampleMedias(medias, limit = config.media.limit, preferLast = true) { + // limit media sets, use extrax as fallbacks + if (medias.length <= limit) { + return medias; + } + + const chunkSize = Math.floor(medias.length / limit); + const rest = medias.length - (limit * chunkSize); + + const chunks = Array.from( + { length: limit }, + (value, index) => { + const start = (chunkSize * index) + Math.min(index, rest); + + return medias.slice( + start, + start + chunkSize + (index < rest ? 1 : 0), + ); + }, + ); + + // flip last chunk so the very last image (often the best cumshot) is tried first + const lastPreferredChunks = preferLast + ? chunks.slice(0, -1).concat(chunks.slice(-1).reverse()) + : chunks; + + const groupedMedias = lastPreferredChunks.map((chunk) => { + // merge chunked medias into single media with grouped fallback priorities, + // so the first sources of each media is preferred over all second sources, etc. + const sources = chunk + .reduce((accSources, media) => { + media.sources.forEach((source, index) => { + if (!accSources[index]) { + accSources.push([source]); + return; + } + + accSources[index].push(source); + }); + + return accSources; + }, []) + .flat(); + + return { + id: chunk[0].id, + role: chunk[0].role, + sources, + }; + }); + + return groupedMedias; +} + function itemsByKey(items, key) { return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {}); } @@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) { return []; } - return rawMedias.map((rawMedia) => { + const baseMedias = rawMedias.map((rawMedia) => { if (!rawMedia) { return null; } @@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) { return baseSourceToBaseMedia(baseSource, role); }).filter(Boolean); + + const sampledBaseMedias = sampleMedias(baseMedias); + + return sampledBaseMedias; } async function findSourceDuplicates(baseMedias) { @@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) { return; } - // TODO: media count limits // TODO: catch errors // TODO: stage by role diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index c86b5f23..26f25c4e 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) { // /createaccount is used by e.g. Tricky Spa native site const src = $(linkEl).find('img').attr('src'); - if (src.match('previews/')) { + if (/previews\//.test(src)) { // resource often serves full photo at a modifier URL anyway, add as primary source const highRes = src .replace('previews/', '') diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index 369b89e0..bd253a17 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); +const slugify = require('../utils/slugify'); + function extractTitle(originalTitle) { const titleComponents = originalTitle.split(' '); const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes @@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) { } const studioName = $('.watchpage-studioname').first().text().trim(); - release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase(); + release.studio = slugify(studioName, ''); return release; } @@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) { async function fetchScene(url, site) { const useGallery = true; + // TODO: fall back on screenshots when gallery is not available const res = useGallery ? await bhttp.get(`${url}/gallery#gallery`) : await bhttp.get(`${url}/screenshots#screenshots`); diff --git a/src/store-releases.js b/src/store-releases.js index 28da0269..02d8f7ec 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -87,7 +87,7 @@ async function attachStudios(releases) { if (release.studio && studioBySlug[release.studio]) { return { ...release, - studio: release.studio, + studio: studioBySlug[release.studio], }; }