Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -12,99 +12,99 @@ const { PassThrough } = require('stream');
|
||||
const http = require('./http');
|
||||
|
||||
function getMemoryUsage() {
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
return process.memoryUsage().rss / (10 ** 6);
|
||||
}
|
||||
|
||||
let peakMemoryUsage = getMemoryUsage();
|
||||
|
||||
async function fetchSource(link) {
|
||||
const id = nanoid();
|
||||
const id = nanoid();
|
||||
|
||||
const hasher = new blake2.Hash('blake2b');
|
||||
hasher.setEncoding('hex');
|
||||
const hasher = new blake2.Hash('blake2b');
|
||||
hasher.setEncoding('hex');
|
||||
|
||||
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
|
||||
const tempFileStream = fs.createWriteStream(tempFilePath);
|
||||
const hashStream = new PassThrough();
|
||||
const tempFilePath = `/home/niels/Pictures/thumbs/temp/${id}.jpeg`;
|
||||
const tempFileStream = fs.createWriteStream(tempFilePath);
|
||||
const hashStream = new PassThrough();
|
||||
|
||||
hashStream.on('data', chunk => hasher.write(chunk));
|
||||
hashStream.on('data', chunk => hasher.write(chunk));
|
||||
|
||||
try {
|
||||
const res = await http.get(link, null, {
|
||||
stream: true,
|
||||
transforms: [hashStream],
|
||||
destination: tempFileStream,
|
||||
timeout: 5000,
|
||||
});
|
||||
try {
|
||||
const res = await http.get(link, null, {
|
||||
stream: true,
|
||||
transforms: [hashStream],
|
||||
destination: tempFileStream,
|
||||
timeout: 5000,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(res.status);
|
||||
}
|
||||
if (!res.ok) {
|
||||
throw new Error(res.status);
|
||||
}
|
||||
|
||||
hasher.end();
|
||||
const hash = hasher.read();
|
||||
hasher.end();
|
||||
const hash = hasher.read();
|
||||
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
|
||||
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
console.log(`Stored ${tempFilePath}, memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
|
||||
return {
|
||||
id,
|
||||
path: tempFilePath,
|
||||
hash,
|
||||
};
|
||||
} catch (error) {
|
||||
await fsPromises.unlink(tempFilePath);
|
||||
return {
|
||||
id,
|
||||
path: tempFilePath,
|
||||
hash,
|
||||
};
|
||||
} catch (error) {
|
||||
await fsPromises.unlink(tempFilePath);
|
||||
|
||||
throw error;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
|
||||
const links = linksFile.split('\n').filter(Boolean);
|
||||
const linksFile = await fsPromises.readFile('/home/niels/Pictures/photos', 'utf8');
|
||||
const links = linksFile.split('\n').filter(Boolean);
|
||||
|
||||
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
|
||||
await fsPromises.mkdir('/home/niels/Pictures/thumbs/temp', { recursive: true });
|
||||
|
||||
console.time('thumbs');
|
||||
console.time('thumbs');
|
||||
|
||||
const files = await Promise.map(links, async (link) => {
|
||||
try {
|
||||
return await fetchSource(link);
|
||||
} catch (error) {
|
||||
console.log(`Failed to fetch ${link}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
const files = await Promise.map(links, async (link) => {
|
||||
try {
|
||||
return await fetchSource(link);
|
||||
} catch (error) {
|
||||
console.log(`Failed to fetch ${link}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.map(files.filter(Boolean), async (file) => {
|
||||
const image = sharp(file.path).jpeg();
|
||||
await Promise.map(files.filter(Boolean), async (file) => {
|
||||
const image = sharp(file.path).jpeg();
|
||||
|
||||
const [{ width, height }, { size }] = await Promise.all([
|
||||
image.metadata(),
|
||||
fsPromises.stat(file.path),
|
||||
]);
|
||||
const [{ width, height }, { size }] = await Promise.all([
|
||||
image.metadata(),
|
||||
fsPromises.stat(file.path),
|
||||
]);
|
||||
|
||||
await Promise.all([
|
||||
image
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
|
||||
image
|
||||
.resize({
|
||||
height: config.media.thumbnailSize,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
|
||||
]);
|
||||
await Promise.all([
|
||||
image
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}.jpeg`),
|
||||
image
|
||||
.resize({
|
||||
height: config.media.thumbnailSize,
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
.toFile(`/home/niels/Pictures/thumbs/${file.hash}_thumb.jpeg`),
|
||||
]);
|
||||
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
const memoryUsage = getMemoryUsage();
|
||||
peakMemoryUsage = Math.max(memoryUsage, peakMemoryUsage);
|
||||
|
||||
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
}, { concurrency: 10 });
|
||||
console.log(`Resized ${file.id} (${width}, ${height}, ${size}), memory usage: ${memoryUsage.toFixed(2)} MB`);
|
||||
}, { concurrency: 10 });
|
||||
|
||||
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
|
||||
console.timeEnd('thumbs');
|
||||
console.log(`Peak memory usage: ${peakMemoryUsage.toFixed(2)} MB`);
|
||||
console.timeEnd('thumbs');
|
||||
}
|
||||
|
||||
init();
|
||||
|
||||
Reference in New Issue
Block a user