Added media limit sampling.

2020-04-11 22:49:37 +02:00 · 2020-04-11 22:49:37 +02:00 · fc58850e56
parent cb68319ac0
commit fc58850e56
6 changed files with 81 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -23,8 +23,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js
 ### Options
 `npm start -- --option value`

-Running `npm start` without any arguments will run the web server.
-
+* `--server`: Run the web server
 * `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file.
 * `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig.
 * `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID.
@ -33,10 +32,23 @@ Running `npm start` without any arguments will run the web server.
 * `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages.
 * `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in.

-#### Developer options
+## Developers
+
+### Options
 * `--no-save`: Do not store retrieved information in local database, forcing re-fetch.
 * `--debug`: Show full error stack trace.

+### Generating thumbnails
+Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done`
+
+Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images:
+
+* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg`
+* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg`
+
+* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done`
+* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done`
+
 ## Supported networks & sites
 768 sites on 62 networks, continuously expanding!

--- a/public/img/logos/legalporno/favicon.png
+++ b/public/img/logos/legalporno/favicon.png
--- a/src/media.js
+++ b/src/media.js
@ -1,13 +1,12 @@
 'use strict';

 const config = require('config');
-// const util = require('util');
 const Promise = require('bluebird');
 const fs = require('fs').promises;
 const path = require('path');
 const nanoid = require('nanoid/non-secure');
 const mime = require('mime');
-const fileType = require('file-type');
+// const fileType = require('file-type');
 const sharp = require('sharp');
 const blake2 = require('blake2');

@ -72,6 +71,60 @@ async function getThumbnail(buffer, height = config.media.thumbnailSize) {
    return null;
 }

+function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
+    // limit media sets, use extrax as fallbacks
+    if (medias.length <= limit) {
+        return medias;
+    }
+
+    const chunkSize = Math.floor(medias.length / limit);
+    const rest = medias.length - (limit * chunkSize);
+
+    const chunks = Array.from(
+        { length: limit },
+        (value, index) => {
+            const start = (chunkSize * index) + Math.min(index, rest);
+
+            return medias.slice(
+                start,
+                start + chunkSize + (index < rest ? 1 : 0),
+            );
+        },
+    );
+
+    // flip last chunk so the very last image (often the best cumshot) is tried first
+    const lastPreferredChunks = preferLast
+        ? chunks.slice(0, -1).concat(chunks.slice(-1).reverse())
+        : chunks;
+
+    const groupedMedias = lastPreferredChunks.map((chunk) => {
+        // merge chunked medias into single media with grouped fallback priorities,
+        // so the first sources of each media is preferred over all second sources, etc.
+        const sources = chunk
+            .reduce((accSources, media) => {
+                media.sources.forEach((source, index) => {
+                    if (!accSources[index]) {
+                        accSources.push([source]);
+                        return;
+                    }
+
+                    accSources[index].push(source);
+                });
+
+                return accSources;
+            }, [])
+            .flat();
+
+        return {
+            id: chunk[0].id,
+            role: chunk[0].role,
+            sources,
+        };
+    });
+
+    return groupedMedias;
+}
+
 function itemsByKey(items, key) {
    return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {});
 }
@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) {
        return [];
    }

-    return rawMedias.map((rawMedia) => {
+    const baseMedias = rawMedias.map((rawMedia) => {
        if (!rawMedia) {
            return null;
        }
@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) {

        return baseSourceToBaseMedia(baseSource, role);
    }).filter(Boolean);
+
+    const sampledBaseMedias = sampleMedias(baseMedias);
+
+    return sampledBaseMedias;
 }

 async function findSourceDuplicates(baseMedias) {
@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) {
        return;
    }

-    // TODO: media count limits
    // TODO: catch errors
    // TODO: stage by role

--- a/src/scrapers/gamma.js
+++ b/src/scrapers/gamma.js
@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) {
            // /createaccount is used by e.g. Tricky Spa native site
            const src = $(linkEl).find('img').attr('src');

-            if (src.match('previews/')) {
+            if (/previews\//.test(src)) {
                // resource often serves full photo at a modifier URL anyway, add as primary source
                const highRes = src
                    .replace('previews/', '')
--- a/src/scrapers/legalporno.js
+++ b/src/scrapers/legalporno.js
@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom');
 const cheerio = require('cheerio');
 const moment = require('moment');

+const slugify = require('../utils/slugify');
+
 function extractTitle(originalTitle) {
    const titleComponents = originalTitle.split(' ');
    const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) {
    }

    const studioName = $('.watchpage-studioname').first().text().trim();
-    release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
+    release.studio = slugify(studioName, '');

    return release;
 }
@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) {
 async function fetchScene(url, site) {
    const useGallery = true;

+    // TODO: fall back on screenshots when gallery is not available
    const res = useGallery
        ? await bhttp.get(`${url}/gallery#gallery`)
        : await bhttp.get(`${url}/screenshots#screenshots`);
--- a/src/store-releases.js
+++ b/src/store-releases.js
@ -87,7 +87,7 @@ async function attachStudios(releases) {
        if (release.studio && studioBySlug[release.studio]) {
            return {
                ...release,
-                studio: release.studio,
+                studio: studioBySlug[release.studio],
            };
        }