Added mimetype verification option to media source to ensure server returned a plausible file. Added additional fallbacks to Jules Jordan poster scraper for Amateur Allure.

This commit is contained in:
DebaucheryLibrarian 2020-09-17 02:30:58 +02:00
parent 6d1f83bc40
commit 1a8de4fcf6
9 changed files with 29 additions and 6 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 782 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 27 KiB

View File

@ -603,7 +603,7 @@ const tagPosters = [
['brunette', 0, 'Darcie Dolce for Playboy'], ['brunette', 0, 'Darcie Dolce for Playboy'],
['bondage', 0, 'Veronica Leal for Her Limit'], ['bondage', 0, 'Veronica Leal for Her Limit'],
['bukkake', 0, 'Jaye Summers in "Facialized 5" for HardX'], ['bukkake', 0, 'Jaye Summers in "Facialized 5" for HardX'],
['caucasian', 0, 'Remy Lacroix for HardX'], ['caucasian', 2, 'Kenzie Reeves for Bang'],
['creampie', 'poster', 'ALina Lopez in "Making Yourself Unforgettable" for Blacked'], ['creampie', 'poster', 'ALina Lopez in "Making Yourself Unforgettable" for Blacked'],
['cum-in-mouth', 1, 'Sarah Vandella in "Blow Bang Vandella" for HardX'], ['cum-in-mouth', 1, 'Sarah Vandella in "Blow Bang Vandella" for HardX'],
['cum-on-butt', 0, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'], ['cum-on-butt', 0, 'Jynx Maze in "Don\'t Make Me Beg 4" for Evil Angel'],
@ -681,6 +681,7 @@ const tagPhotos = [
['blonde', 2, 'Isabelle Deltore for Her Limit'], ['blonde', 2, 'Isabelle Deltore for Her Limit'],
['blowbang', 'poster', 'Marsha May in "Feeding Frenzy 12" for Jules Jordan'], ['blowbang', 'poster', 'Marsha May in "Feeding Frenzy 12" for Jules Jordan'],
// ['bukkake', 'poster', 'Mia Malkova in "Facialized 2" for HardX'], // ['bukkake', 'poster', 'Mia Malkova in "Facialized 2" for HardX'],
['caucasian', 0, 'Remy Lacroix for HardX'],
['caucasian', 1, 'Sheena Shaw for Brazzers'], ['caucasian', 1, 'Sheena Shaw for Brazzers'],
['da-tp', 6, 'Adriana Chechik in "Gangbang Me" for HardX'], ['da-tp', 6, 'Adriana Chechik in "Gangbang Me" for HardX'],
['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'], ['da-tp', 0, 'Natasha Teen in LegalPorno SZ2164'],

View File

@ -99,6 +99,9 @@ function toBaseSource(rawSource) {
baseSource.stream = rawSource.stream; baseSource.stream = rawSource.stream;
} }
// reject source if response mimetype does not match specified type
if (rawSource.verifyType) baseSource.verifyType = rawSource.verifyType;
if (rawSource.referer) baseSource.referer = rawSource.referer; if (rawSource.referer) baseSource.referer = rawSource.referer;
if (rawSource.host) baseSource.host = rawSource.host; if (rawSource.host) baseSource.host = rawSource.host;
if (rawSource.attempts) baseSource.attempts = rawSource.attempts; if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
@ -441,6 +444,8 @@ streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStr
}); });
async function fetchSource(source, baseMedia) { async function fetchSource(source, baseMedia) {
const maxAttempts = source.attempts || 3;
logger.silly(`Fetching media from ${source.src}`); logger.silly(`Fetching media from ${source.src}`);
// attempts // attempts
@ -470,6 +475,10 @@ async function fetchSource(source, baseMedia) {
const [type, subtype] = mimetype.split('/'); const [type, subtype] = mimetype.split('/');
const extension = mime.getExtension(mimetype); const extension = mime.getExtension(mimetype);
if (source.verifyType && source.verifyType !== type) {
throw Object.assign(new Error(`Type '${type}' does not match type '${source.verifyType}' specified by source`), { code: 'VERIFY_TYPE' });
}
return { return {
...source, ...source,
file: { file: {
@ -486,14 +495,15 @@ async function fetchSource(source, baseMedia) {
}; };
} catch (error) { } catch (error) {
hasher.end(); hasher.end();
const maxAttempts = source.attempts || 3;
logger.warn(`Failed attempt ${attempts}/${maxAttempts} to fetch ${source.src}: ${error.message}`); if (error.code !== 'VERIFY_TYPE') {
logger.warn(`Failed attempt ${attempts}/${maxAttempts} to fetch ${source.src}: ${error.message}`);
if (attempts < maxAttempts) { if (attempts < maxAttempts) {
await Promise.delay(1000); await Promise.delay(1000);
return attempt(attempts + 1); return attempt(attempts + 1);
}
} }
throw new Error(`Failed to fetch ${source.src}: ${error.message}`); throw new Error(`Failed to fetch ${source.src}: ${error.message}`);

View File

@ -160,10 +160,22 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
{ {
src: prefixedSrc.replace(/.jpg$/, '-full.jpg'), src: prefixedSrc.replace(/.jpg$/, '-full.jpg'),
referer: site.url, referer: site.url,
verifyType: 'image', // sometimes returns 200 OK with text/html instead of 403
},
{
src: prefixedSrc.replace(/-1x.jpg$/, '-4x.jpg'),
referer: site.url,
verifyType: 'image',
},
{
src: prefixedSrc.replace(/-1x.jpg$/, '-2x.jpg'),
referer: site.url,
verifyType: 'image',
}, },
{ {
src: prefixedSrc, src: prefixedSrc,
referer: site.url, referer: site.url,
verifyType: 'image',
}, },
]; ];
} }