Fixed Vixen scraper, using new token URL for trailers.

This commit is contained in:
2020-04-26 03:51:59 +02:00
parent 2cfbd21560
commit 2ac879d276
5 changed files with 313 additions and 97 deletions

118
src/utils/buffer.js Normal file
View File

@@ -0,0 +1,118 @@
'use strict';
const bhttp = require('bhttp');
const Promise = require('bluebird');
const fsPromises = require('fs').promises;
const fs = require('fs');
const { PassThrough } = require('stream');
const blake2 = require('blake2');
const argv = require('yargs').argv;
const file = 'https://speed.hetzner.de/100MB.bin';
// const file = 'https://speed.hetzner.de/1GB.bin';
// const file = 'https://speed.hetzner.de/10GB.bin';
function getMemoryUsage() {
return process.memoryUsage().rss / (10 ** 6);
}
const stats = {
peakMemoryUsage: getMemoryUsage(),
done: false,
downloads: {},
};
function render() {
const downloads = Object.entries(stats.downloads);
process.stdout.clearScreenDown();
process.stdout.write(`peak memory: ${stats.peakMemoryUsage.toFixed(2)} MB\n`);
downloads.forEach(([download, progress]) => {
process.stdout.write(`${download}: ${progress}${typeof progress === 'string' ? '' : '%'}\n`);
});
process.stdout.moveCursor(0, -(downloads.length + 1));
process.stdout.cursorTo(0);
if (downloads.length === 0 || !downloads.every(([_label, download]) => typeof download === 'string')) {
setTimeout(() => render(), 1000);
return;
}
process.stdout.moveCursor(0, downloads.length + 1);
}
function setProgress(label, completedBytes, totalBytes, hash) {
const memory = getMemoryUsage();
stats.peakMemoryUsage = Math.max(memory, stats.peakMemoryUsage);
stats.downloads[label] = hash || Math.round((completedBytes / totalBytes) * 100);
}
async function buffered(label) {
const hash = new blake2.Hash('blake2b');
const imageRes = await bhttp.get(file, {
onDownloadProgress(completedBytes, totalBytes) {
setProgress(label, completedBytes, totalBytes);
},
});
hash.update(imageRes.body);
setProgress(label, null, null, hash.digest('hex'));
await fsPromises.writeFile(`/mnt/stor/Pictures/traxxx/temp/buffered-${label}.bin`, imageRes.body);
}
async function streamed(label) {
const hash = new blake2.Hash('blake2b');
hash.setEncoding('hex');
const hashStream = new PassThrough();
const targetStream = fs.createWriteStream(`/mnt/stor/Pictures/traxxx/temp/streamed-${label}.bin`);
const imageRes = await bhttp.get(file, {
stream: true,
});
const stream = imageRes
.pipe(hashStream)
.pipe(targetStream);
imageRes.on('progress', (completedBytes, totalBytes) => {
setProgress(label, completedBytes, totalBytes);
});
hashStream.on('data', (chunk) => {
hash.write(chunk);
});
stream.on('finish', () => {
hash.end();
setProgress(label, null, null, hash.read());
});
}
async function init() {
const n = argv.n || 1;
if (argv._.includes('stream')) {
console.log('using streams');
render();
await Promise.map(Array.from({ length: n }), async (value, index) => streamed(index + 1));
return;
}
if (argv._.includes('buffer')) {
console.log('using buffers');
render();
await Promise.map(Array.from({ length: n }), async (value, index) => buffered(index + 1));
}
}
init();

View File

@@ -45,7 +45,7 @@ queue.define('http', async ({
options = {},
}) => {
if (body) {
logger.silly(`${method.toUpperCase()} ${url} with ${body}`);
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)}`);
} else {
logger.silly(`${method.toUpperCase()} ${url}`);
}
@@ -73,8 +73,10 @@ queue.define('http', async ({
return {
...res,
originalRes: res,
html,
json,
pipe: res.pipe,
ok: res.statusCode >= 200 && res.statusCode <= 299,
code: res.statusCode,
status: res.statusCode,
@@ -85,7 +87,7 @@ queue.define('http', async ({
async function get(url, headers, options) {
return queue.push('http', {
method: 'get',
method: 'GET',
url,
headers,
options,
@@ -94,6 +96,7 @@ async function get(url, headers, options) {
async function post(url, body, headers, options) {
return queue.push('http', {
method: 'POST',
url,
body,
headers,

55
src/utils/stream.js Normal file
View File

@@ -0,0 +1,55 @@
'use strict';
const config = require('config');
const { PassThrough } = require('stream');
const fs = require('fs');
const path = require('path');
const bhttp = require('bhttp');
const blake2 = require('blake2');
const sharp = require('sharp');
const url = 'https://thumbs.julesjordan.com/trial/content//upload/dl03/julesjordan/oil_overload_16_scene2//photos/alina_lopez_jules_jordan_com_77.jpg';
async function init() {
const hash = new blake2.Hash('blake2b');
hash.setEncoding('hex');
const res = await bhttp.get(url, {
stream: true,
});
const metaStream = sharp();
const hashStream = new PassThrough();
const target = fs.createWriteStream(path.join(config.media.path, 'temp', 'alina.jpg'));
const thumbTarget = fs.createWriteStream(path.join(config.media.path, 'temp', 'alina_thumb.jpg'));
hashStream.on('data', (chunk) => {
hash.write(chunk);
});
metaStream.clone()
.resize(320)
.pipe(thumbTarget);
const stream = res
.pipe(metaStream)
.pipe(hashStream)
.pipe(target);
stream.on('finish', () => {
hash.end();
const digest = hash.read();
console.log('stream', digest);
});
metaStream.on('info', (info) => {
console.log('info', info);
});
const stats = await metaStream.stats();
console.log('stats', stats);
}
init();