diff --git a/src/argv.js b/src/argv.js index 873501ab..78633b9b 100755 --- a/src/argv.js +++ b/src/argv.js @@ -142,6 +142,11 @@ const { argv } = yargs type: 'boolean', alias: 'redownload', }) + .option('force-media', { + describe: 'Force existing media to be redownloaded.', + type: 'boolean', + default: false, + }) .option('after', { describe: 'Don\'t fetch scenes older than', type: 'string', diff --git a/src/media.js b/src/media.js index 097ec169..43a22099 100755 --- a/src/media.js +++ b/src/media.js @@ -556,7 +556,7 @@ async function storeFile(media, options) { const filedir = config.s3.enabled ? media.role : path.join(media.role, hashDir, hashSubDir); const filepath = path.join(filedir, filename); - if (argv.force) { + if (argv.forceMedia) { try { // remove old file to in case rename() does not overwrite (possibly on NFS setups) await fsPromises.unlink(path.join(config.media.path, filepath)); @@ -739,14 +739,14 @@ async function trySource(baseSource, existingMedias, baseMedia) { const extractedSource = await extractSource(baseSource, existingMedias); const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src]; - if (!argv.force && extractedSource.entry) { + if (!argv.forceMedia && extractedSource.entry) { logger.silly(`Media page URL already in database, not extracting ${baseSource.url}`); // media entry found during extraction, don't fetch return extractedSource; } - if (!argv.force && existingSourceMedia) { + if (!argv.forceMedia && existingSourceMedia) { logger.silly(`Media source URL already in database, skipping ${baseSource.src}`); // media entry found by source URL, don't fetch @@ -843,7 +843,7 @@ async function storeMedias(baseMedias, options) { { concurrency: 100 }, // don't overload disk ); - if (argv.force) { + if (argv.forceMedia) { // overwrite files in case image processing was changed await Promise.map( existingHashMedias, diff --git a/src/scrapers/archangel.js b/src/scrapers/archangel.js index 2f2b9fe4..622232c8 100755 --- a/src/scrapers/archangel.js +++ b/src/scrapers/archangel.js @@ -26,7 +26,12 @@ function scrapeAll(scenes) { url: unprint.query.url(actorEl, null), })); - release.poster = query.img('img.mainThumb'); + const poster = query.img('img.mainThumb'); + + if (poster && !/images\/p\d+\.jpe?g/i.test(poster)) { + release.poster = poster; + } + release.photoCount = query.number('.timeDate'); release.entryId = getEntryId(release); diff --git a/src/scrapers/traxxx.js b/src/scrapers/traxxx.js index 2055808b..b9e32d69 100755 --- a/src/scrapers/traxxx.js +++ b/src/scrapers/traxxx.js @@ -9,6 +9,7 @@ const moment = require('moment'); const knex = require('../knex'); const capitalize = require('../utils/capitalize'); +const shuffle = require('../utils/shuffle'); function random(array) { return array[Math.floor(Math.random() * array.length)]; @@ -231,6 +232,16 @@ function actors(release) { })); } +async function beforeFetchLatest() { + const tags = await knex('tags') + .select('name') + .where('priority', '>', 7) + .orderByRaw('random()') + .pluck('name'); + + return { tags }; +} + async function fetchLatest(entity, page, options) { return Promise.all(Array.from({ length: 10000 }, async (value, index) => { const release = {}; @@ -258,20 +269,18 @@ async function fetchLatest(entity, page, options) { release.photos = photos.map((photo) => `http://${config.web.host}:${config.web.port}/img/${photo}?id=${nanoid()}`); } - release.tags = await knex('tags') - .select('name') - .where('priority', '>', 7) - .orderByRaw('random()') - .limit(faker.datatype.number({ min: 15, max: 25 })) - .pluck('name'); + release.tags = shuffle(options.beforeFetchLatest.tags).slice(faker.datatype.number({ min: 3, max: 20 })); release.actors = [...actors(release), null]; // include empty actor to ensure proper handling release.title = title(release); + console.log(release.entryId); + return release; })); } module.exports = { + beforeFetchLatest, fetchLatest, };