Added S3 support for media files. Fixed MindGeek scraper for new poster data structure.

This commit is contained in:
DebaucheryLibrarian
2021-02-22 02:33:39 +01:00
parent 9a65d8c0eb
commit 37e39dc1ec
17 changed files with 152 additions and 79 deletions

View File

@@ -14,6 +14,7 @@ const ffmpeg = require('fluent-ffmpeg');
const sharp = require('sharp');
const blake2 = require('blake2');
const taskQueue = require('promise-task-queue');
const AWS = require('aws-sdk');
const logger = require('./logger')(__filename);
const argv = require('./argv');
@@ -25,6 +26,17 @@ const { get } = require('./utils/qu');
const pipeline = util.promisify(stream.pipeline);
const streamQueue = taskQueue();
const endpoint = new AWS.Endpoint('s3.wasabisys.com');
const s3 = new AWS.S3({
// region: 'eu-central-1',
endpoint,
credentials: {
accessKeyId: config.s3.accessKey,
secretAccessKey: config.s3.secretKey,
},
});
function sampleMedias(medias, limit = argv.mediaLimit, preferLast = true) {
// limit media sets, use extras as fallbacks
if (medias.length <= limit) {
@@ -303,6 +315,58 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`);
}
async function storeS3Object(filepath, media) {
const fullFilepath = path.join(config.media.path, filepath);
const file = fs.createReadStream(fullFilepath);
const status = await s3.upload({
Bucket: config.s3.bucket,
Body: file,
Key: filepath,
ContentType: media.meta.mimetype,
}).promise();
await fsPromises.unlink(fullFilepath);
return status;
}
async function writeImage(image, media, info, filepath, isProcessed) {
if (isProcessed && info.pages) {
// convert animated image to WebP and write to permanent location
await image
.webp()
.toFile(path.join(config.media.path, filepath));
}
if (isProcessed) {
// convert to JPEG and write to permanent location
await image
.jpeg()
.toFile(path.join(config.media.path, filepath));
}
}
async function writeThumbnail(image, thumbpath) {
return image
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.jpeg({ quality: config.media.thumbnailQuality })
.toFile(path.join(config.media.path, thumbpath));
}
async function writeLazy(image, lazypath) {
return image
.resize({
height: config.media.lazySize,
withoutEnlargement: true,
})
.jpeg({ quality: config.media.lazyQuality })
.toFile(path.join(config.media.path, lazypath));
}
async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, filepath, options) {
logger.silly(`Storing permanent media files for ${media.id} from ${media.src} at ${filepath}`);
@@ -343,46 +407,28 @@ async function storeImageFile(media, hashDir, hashSubDir, filename, filedir, fil
});
}
if (isProcessed) {
if (info.pages) {
// convert animated image to WebP and write to permanent location
await image
.webp()
.toFile(path.join(config.media.path, filepath));
} else {
// convert to JPEG and write to permanent location
await image
.jpeg()
.toFile(path.join(config.media.path, filepath));
}
}
// generate thumbnail and lazy
await Promise.all([
image
.resize({
height: config.media.thumbnailSize,
withoutEnlargement: true,
})
.jpeg({ quality: config.media.thumbnailQuality })
.toFile(path.join(config.media.path, thumbpath)),
image
.resize({
height: config.media.lazySize,
withoutEnlargement: true,
})
.jpeg({ quality: config.media.lazyQuality })
.toFile(path.join(config.media.path, lazypath)),
writeImage(image, media, info, filepath, isProcessed),
writeThumbnail(image, thumbpath),
writeLazy(image, lazypath),
]);
if (isProcessed) {
// remove temp file
// file already stored, remove temporary file
await fsPromises.unlink(media.file.path);
} else {
// move temp file to permanent location
// image not processed, simply move temporary file to final location
await fsPromises.rename(media.file.path, path.join(config.media.path, filepath));
}
if (config.s3.enabled) {
await Promise.all([
storeS3Object(filepath, media),
storeS3Object(thumbpath, media),
storeS3Object(lazypath, media),
]);
}
logger.silly(`Stored thumbnail, lazy and permanent media file for ${media.id} from ${media.src} at ${filepath}`);
return {
@@ -521,7 +567,6 @@ async function fetchSource(source, baseMedia) {
try {
const tempFilePath = path.join(config.media.path, 'temp', `${baseMedia.id}`);
const tempFileTarget = fs.createWriteStream(tempFilePath);
const hashStream = new stream.PassThrough();
let size = 0;
@@ -648,6 +693,7 @@ function curateMediaEntry(media, index) {
path: media.file.path,
thumbnail: media.file.thumbnail,
lazy: media.file.lazy,
is_s3: config.s3.enabled,
index,
mime: media.meta.mimetype,
hash: media.meta.hash,

View File

@@ -13,7 +13,9 @@ const { cookieToData } = require('../utils/cookies');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
return Object.values(scene.images.poster) // can be { 0: {}, 1: {}, ... } instead of array
.filter(img => typeof img === 'object') // remove alternateText property
.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {