Preserving Jules Jordan base photos in deep scrape.

This commit is contained in:
DebaucheryLibrarian
2023-07-29 23:59:17 +02:00
parent c3d4bf0e62
commit ab5b165c68
56 changed files with 140 additions and 24 deletions

View File

@@ -51,28 +51,16 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
const prefixedSrc = qu.prefixUrl(src, site.url);
if (src) {
return [
{
src: prefixedSrc.replace(/.jpg$/, '-full.jpg'),
referer: site.url,
verifyType: 'image', // sometimes returns 200 OK with text/html instead of 403
},
{
src: prefixedSrc.replace(/-1x.jpg$/, '-4x.jpg'),
referer: site.url,
verifyType: 'image',
},
{
src: prefixedSrc.replace(/-1x.jpg$/, '-2x.jpg'),
referer: site.url,
verifyType: 'image',
},
{
src: prefixedSrc,
referer: site.url,
verifyType: 'image',
},
];
return Array.from(new Set([
prefixedSrc.replace(/.jpg$/, '-full.jpg'),
prefixedSrc.replace(/-1x.jpg$/, '-4x.jpg'),
prefixedSrc.replace(/-1x.jpg$/, '-2x.jpg'),
prefixedSrc,
])).map((source) => ({
src: source,
referer: site.url,
verifyType: 'image',
}));
}
return null;
@@ -209,7 +197,21 @@ async function scrapeScene({ html, query }, context) {
if (argv.jjFullPhotos) {
release.photos = getPhotos(query, release, context);
} else {
release.photos = query.imgs('#images img');
// base release photos are usually better, but deep photos have additional thumbs
// the filenames are not chronological, so sorting after appending only worsens the mix
release.photos = [
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
...query.imgs('#images img'),
].map((source) => Array.from(new Set([
source.replace(/.jpg$/, '-full.jpg'),
source.replace(/-1x.jpg$/, '-4x.jpg'),
source.replace(/-1x.jpg$/, '-2x.jpg'),
source,
])).map((fallbackSource) => ({
src: fallbackSource,
referer: context.entity.url,
verifyType: 'image',
})));
}
if (query.exists('.update_dvds a')) {