From fc58850e56d0e0029b5473cbadac5dc7249ea762 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 11 Apr 2020 22:49:37 +0200 Subject: [PATCH] Added media limit sampling. --- README.md | 18 +++++-- public/img/logos/legalporno/favicon.png | Bin 882 -> 1686 bytes src/media.js | 64 ++++++++++++++++++++++-- src/scrapers/gamma.js | 2 +- src/scrapers/legalporno.js | 5 +- src/store-releases.js | 2 +- 6 files changed, 81 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 87fc8235..3b208e11 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js ### Options `npm start -- --option value` -Running `npm start` without any arguments will run the web server. - +* `--server`: Run the web server * `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file. * `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. * `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID. @@ -33,10 +32,23 @@ Running `npm start` without any arguments will run the web server. * `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages. * `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in. -#### Developer options +## Developers + +### Options * `--no-save`: Do not store retrieved information in local database, forcing re-fetch. * `--debug`: Show full error stack trace. +### Generating thumbnails +Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done` + +Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images: + +* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg` +* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg` + +* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done` +* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done` + ## Supported networks & sites 768 sites on 62 networks, continuously expanding! diff --git a/public/img/logos/legalporno/favicon.png b/public/img/logos/legalporno/favicon.png index 14b597f1ad72e265898c6086bcf4c964f879794c..456ce8f1f1773a0680a4329cb43015a1f69bfade 100644 GIT binary patch literal 1686 zcmV;H25I?;P)EX>4Tx04R}tkv&MmKpe$i(@Kj}9qb_DkfAzR5EXIMDionYs1;guFuC*#ni!H4 z7e~Rh;NZt%)xpJCR|i)?5c~jfa&%I3krMxx6k5c1aNLh~_a1le0HIN3niU!YG~G5c zsic_8uZZDSgb+b5!idVuGG-+y4d3x~j{slq5;;BvB z;Ji;9V`W(-J|`YE>4LKlt6PRh$_2lA=kV>&0eSad^gZEa<4bO1wgWnpw> zWFU8GbZ8()Nlj2!fese{00eGHL_t(o!|j(#Oq^8|$A9k#%_ZA^7S`&@=#)QV1acN`QSp3$PO?1(HAy@GY?T_B-$8Zu02hEbiwo&Y@*<#F2+;vN4zvQ&>!h}AzuVn?;ZN_NT{BoUknO1z*#rWv zw!3T~gK@*ZAzgKK6+j(uN!RrX4WJB|cK8s>Iw}VNuLAmk0-(aZIXykY#KZ)%vvV{w z)DaGcJqz$_{_JumaMVr0SSGOJ=l}z2ix|v6zr~&W<4=xb=sEzu_0{s!lTUaSV3@}8 zIuePrN`MdMXUPz6iwh+{XE;J;ewQui zAh5OoMI5Nlg@4Ln5Fn(axv4R40ev@aEU#x~W`1(d?48Rq>-56w^d`?gI{-B2%w>9- zC0z$0XuE$Ot*v)^CLbFeUq(j=p-M|jFKaGAs&^AG&jj?bza3z&d$XhCK@J>f&#GZw z>*Ai>k)7u;8^qKP*x;_ki4t%(H zv*uKC1VR#`t_eW%?y;-{e;G>p1yB@)np!n7I=ULI45(`uK|w*m+MqMg;%0mhl<;oB zDm)ZZhLQDEz**o`pe+`wvpg11T?w{7jV4RFCY{gnwnoY=hL)q(=fgUX7=of z(chmSk@(}bM^qE=4bYjDS1tcb$OQhxxte`}Vf-q_|1y-la@hX<#D?z&nuZuMQTGBL z0ndfj;&ci43^)fk8KeZ=z>b8yP~BJoCW^*Tc)o9OzSD&60K5sD$|O7sB&ThgEmFD_ gX7m5Vru}#9ALwm`;ziCFVgLXD07*qoM6N<$f{N|^z5oCK delta 874 zcmV-w1C{)i4e|zmiBL{Q4GJ0x0000DNk~Le0000G0000G2nGNE03Y-JVE_ODg=s@W zP)S2WAaHVTW@&6?001bFeUUv#!%!53PgAub6$d+rI%KH2SP&I))G8FALZ}s5bufA9 zkI9(0l#KlZ@MGU7XLYMNiva*-s%6W^kR;sYZJ^2g6IemGF>$C=uzycN_L4<+|Hc*0%80|VK7E*K`_wo0; zeu-QPxi-MaF^@7d$gUs!4}Qc zat)aNB)!_yqDMf_HgIv>)RaBoatG*pGGtSBr64VTA)g1{&*+=7K<_Qkwd&2SagNgm zAWgGM-T()Oz;J=G*L>dH**dp>dm8im0U$ndhM;l}$p8QV24YJ`L;&FcG5{Y&5NR6# z000SaNLh0L04^f{04^f|c%?sf00007bV*G`2jl?;11cR?A)D|3000?uMObu0Z*6U5 zZgc>X`vDh!0ZBibksbPBymyXa~Nl9nNpYaFq zE3~DtpaMdo(FTGfRF`ancz_ujSU~XWe3MCbcXnppoqY%33TOcGM{CSu8O8`+*l=gQ zhz)0TlXVgR^k>txOTO=;ltL+mF@{>LhB3zaEv1`(;}Z$JoHS280Yqyp-ELPtquFfQ znB8p*7SLMb`~C)PrIcN-CAd5|1mI;h10bKz15l|{C=?1?dteh>?Csmb`)F+64&b^j z)oRuHTdmew5&@)?OiAeFB=mApPG5OEZM4WkG@Q+~{02E_R { + const start = (chunkSize * index) + Math.min(index, rest); + + return medias.slice( + start, + start + chunkSize + (index < rest ? 1 : 0), + ); + }, + ); + + // flip last chunk so the very last image (often the best cumshot) is tried first + const lastPreferredChunks = preferLast + ? chunks.slice(0, -1).concat(chunks.slice(-1).reverse()) + : chunks; + + const groupedMedias = lastPreferredChunks.map((chunk) => { + // merge chunked medias into single media with grouped fallback priorities, + // so the first sources of each media is preferred over all second sources, etc. + const sources = chunk + .reduce((accSources, media) => { + media.sources.forEach((source, index) => { + if (!accSources[index]) { + accSources.push([source]); + return; + } + + accSources[index].push(source); + }); + + return accSources; + }, []) + .flat(); + + return { + id: chunk[0].id, + role: chunk[0].role, + sources, + }; + }); + + return groupedMedias; +} + function itemsByKey(items, key) { return items.reduce((acc, item) => ({ ...acc, [item[key]]: item }), {}); } @@ -143,7 +196,7 @@ function toBaseMedias(rawMedias, role) { return []; } - return rawMedias.map((rawMedia) => { + const baseMedias = rawMedias.map((rawMedia) => { if (!rawMedia) { return null; } @@ -157,6 +210,10 @@ function toBaseMedias(rawMedias, role) { return baseSourceToBaseMedia(baseSource, role); }).filter(Boolean); + + const sampledBaseMedias = sampleMedias(baseMedias); + + return sampledBaseMedias; } async function findSourceDuplicates(baseMedias) { @@ -465,7 +522,6 @@ async function associateReleaseMedia(releases) { return; } - // TODO: media count limits // TODO: catch errors // TODO: stage by role diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index c86b5f23..26f25c4e 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -42,7 +42,7 @@ function scrapePhotos(html, includeThumbnails = true) { // /createaccount is used by e.g. Tricky Spa native site const src = $(linkEl).find('img').attr('src'); - if (src.match('previews/')) { + if (/previews\//.test(src)) { // resource often serves full photo at a modifier URL anyway, add as primary source const highRes = src .replace('previews/', '') diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index 369b89e0..bd253a17 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -5,6 +5,8 @@ const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); const moment = require('moment'); +const slugify = require('../utils/slugify'); + function extractTitle(originalTitle) { const titleComponents = originalTitle.split(' '); const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes @@ -139,7 +141,7 @@ async function scrapeScene(html, url, site, useGallery) { } const studioName = $('.watchpage-studioname').first().text().trim(); - release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase(); + release.studio = slugify(studioName, ''); return release; } @@ -175,6 +177,7 @@ async function fetchLatest(site, page = 1) { async function fetchScene(url, site) { const useGallery = true; + // TODO: fall back on screenshots when gallery is not available const res = useGallery ? await bhttp.get(`${url}/gallery#gallery`) : await bhttp.get(`${url}/screenshots#screenshots`); diff --git a/src/store-releases.js b/src/store-releases.js index 28da0269..02d8f7ec 100644 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -87,7 +87,7 @@ async function attachStudios(releases) { if (release.studio && studioBySlug[release.studio]) { return { ...release, - studio: release.studio, + studio: studioBySlug[release.studio], }; }