Media module saves files.
This commit is contained in:
parent
4eaacf5697
commit
9a712e7371
|
@ -43,6 +43,7 @@ async function mounted() {
|
|||
'double-vaginal',
|
||||
'da-tp',
|
||||
'dv-tp',
|
||||
'triple-anal',
|
||||
],
|
||||
oral: [
|
||||
'deepthroat',
|
||||
|
|
231
src/media.js
231
src/media.js
|
@ -1,9 +1,9 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs');
|
||||
const fsPromises = require('fs').promises;
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
|
@ -23,19 +23,19 @@ function getHash(buffer) {
|
|||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
async function getEntropy(buffer) {
|
||||
async function getEntropy(buffer, source) {
|
||||
try {
|
||||
const { entropy } = await sharp(buffer).stats();
|
||||
|
||||
return entropy;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
||||
logger.warn(`Failed to retrieve image entropy, using 7.5 for ${source.src}: ${error.message}`);
|
||||
|
||||
return 7.5;
|
||||
}
|
||||
}
|
||||
|
||||
async function getMeta(buffer) {
|
||||
async function getMeta(buffer, source) {
|
||||
try {
|
||||
const { width, height, size } = await sharp(buffer).metadata();
|
||||
|
||||
|
@ -45,7 +45,7 @@ async function getMeta(buffer) {
|
|||
size,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to retrieve image metadata: ${error.message}`);
|
||||
logger.warn(`Failed to retrieve image metadata from ${source.src}: ${error.message}`);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
@ -89,6 +89,10 @@ function toBaseSource(rawSource) {
|
|||
if (rawSource.referer) baseSource.referer = rawSource.referer;
|
||||
if (rawSource.host) baseSource.host = rawSource.host;
|
||||
|
||||
if (rawSource.copyright) baseSource.copyright = rawSource.copyright;
|
||||
if (rawSource.comment) baseSource.comment = rawSource.comment;
|
||||
if (rawSource.group) baseSource.group = rawSource.group;
|
||||
|
||||
return baseSource;
|
||||
}
|
||||
|
||||
|
@ -180,10 +184,21 @@ async function findSourceDuplicates(baseMedias) {
|
|||
}
|
||||
|
||||
async function findHashDuplicates(medias) {
|
||||
const mediaHashes = medias.map(media => media.file?.hash).filter(Boolean);
|
||||
const existingHashMedia = await knex('media').whereIn('hash', mediaHashes);
|
||||
const hashes = medias.map(media => media.meta?.hash || media.entry?.hash).filter(Boolean);
|
||||
|
||||
return itemsByKey(existingHashMedia, 'hash');
|
||||
const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
|
||||
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
|
||||
|
||||
const uniqueHashMedia = medias.filter(media => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
||||
|
||||
const existingHashMedia = medias
|
||||
.filter(media => existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash])
|
||||
.map(media => ({
|
||||
...media,
|
||||
entry: existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash],
|
||||
}));
|
||||
|
||||
return { uniqueHashMedia, existingHashMedia };
|
||||
}
|
||||
|
||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||
|
@ -198,7 +213,6 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
|||
return {
|
||||
...baseSource,
|
||||
entry: existingExtractMedia,
|
||||
src: existingExtractMedia.source,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -216,7 +230,57 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
|||
throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`);
|
||||
}
|
||||
|
||||
async function fetchSource(source, baseMedia, baseSourceIndex) {
|
||||
async function saveMedia(media) {
|
||||
const hashDir = media.meta.hash.slice(0, 2);
|
||||
const hashSubDir = media.meta.hash.slice(2, 4);
|
||||
const hashFilename = media.meta.hash.slice(4);
|
||||
|
||||
const filename = media.quality
|
||||
? `${hashFilename}_${media.quality}.${media.meta.extension}`
|
||||
: `${hashFilename}.${media.meta.extension}`;
|
||||
|
||||
const filedir = path.join(media.role, hashDir, hashSubDir);
|
||||
const filepath = path.join(filedir, filename);
|
||||
|
||||
if (media.meta.type === 'image') {
|
||||
const thumbnail = await getThumbnail(media.file.buffer);
|
||||
|
||||
const thumbdir = path.join(media.role, 'thumbs', hashDir, hashSubDir);
|
||||
const thumbpath = path.join(thumbdir, filename);
|
||||
|
||||
await Promise.all([
|
||||
fs.mkdir(path.join(config.media.path, filedir), { recursive: true }),
|
||||
fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true }),
|
||||
]);
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(path.join(config.media.path, filepath), media.file.buffer),
|
||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||
]);
|
||||
|
||||
return {
|
||||
...media,
|
||||
file: {
|
||||
// buffer is no longer needed, discard to free up memory
|
||||
path: filepath,
|
||||
thumbnail: thumbpath,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
await fs.mkdir(path.join(config.media.path, filedir), { recursive: true });
|
||||
await fs.writeFile(path.join(config.media.path, filepath), media.file.buffer);
|
||||
|
||||
return {
|
||||
...media,
|
||||
file: {
|
||||
// buffer is no longer needed, discard to free up memory
|
||||
path: filepath,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchSource(source) {
|
||||
logger.silly(`Fetching media from ${source.src}`);
|
||||
// attempts
|
||||
|
||||
|
@ -225,39 +289,32 @@ async function fetchSource(source, baseMedia, baseSourceIndex) {
|
|||
const { pathname } = new URL(source.src);
|
||||
const mimetype = mime.getType(pathname);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
const isImage = /image/.test(mimetype);
|
||||
|
||||
const tempPath = path.join(config.media.path, 'temp', `${baseMedia.id}-${baseSourceIndex}.${extension}`);
|
||||
const type = mimetype.split('/')[0];
|
||||
|
||||
const res = await http.get(source.src, {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
}, {
|
||||
stream: true,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Response ${res.status} not OK`);
|
||||
}
|
||||
|
||||
res.res.pipe(fs.createWriteStream(tempPath));
|
||||
|
||||
const buffer = res.body;
|
||||
|
||||
console.log(res.body);
|
||||
|
||||
const hash = getHash(buffer);
|
||||
const entropy = isImage ? await getEntropy(buffer) : null;
|
||||
const { size, width, height } = isImage ? await getMeta(buffer) : {};
|
||||
const hash = getHash(res.body);
|
||||
const entropy = type === 'image' ? await getEntropy(res.body) : null;
|
||||
const { size, width, height } = type === 'image' ? await getMeta(res.body) : {};
|
||||
|
||||
logger.silly(`Fetched media from ${source.src}`);
|
||||
|
||||
return {
|
||||
...source,
|
||||
file: {
|
||||
temp: tempPath,
|
||||
buffer: res.body,
|
||||
},
|
||||
meta: {
|
||||
mimetype,
|
||||
extension,
|
||||
type,
|
||||
hash,
|
||||
entropy,
|
||||
size,
|
||||
|
@ -270,7 +327,6 @@ async function fetchSource(source, baseMedia, baseSourceIndex) {
|
|||
|
||||
if (attempts < 3) {
|
||||
await Promise.delay(1000);
|
||||
|
||||
return attempt(attempts + 1);
|
||||
}
|
||||
|
||||
|
@ -287,16 +343,19 @@ async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)
|
|||
const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src];
|
||||
|
||||
if (extractedSource.entry) {
|
||||
logger.silly(`Media page URL already in database, not extracting ${baseSource.url}`);
|
||||
|
||||
// media entry found during extraction, don't fetch
|
||||
return extractedSource;
|
||||
}
|
||||
|
||||
if (existingSourceMedia) {
|
||||
logger.silly(`Media source URL already in database, skipping ${baseSource.url}`);
|
||||
|
||||
// media entry found by source URL, don't fetch
|
||||
return {
|
||||
...baseSource,
|
||||
entry: existingSourceMedia,
|
||||
src: existingSourceMedia.source,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -311,10 +370,18 @@ async function fetchMedia(baseMedia, existingMedias) {
|
|||
Promise.reject(new Error()),
|
||||
);
|
||||
|
||||
return {
|
||||
if (source.entry) {
|
||||
// don't save media, already in database
|
||||
return {
|
||||
...baseMedia,
|
||||
...source,
|
||||
};
|
||||
}
|
||||
|
||||
return saveMedia({
|
||||
...baseMedia,
|
||||
...source,
|
||||
};
|
||||
});
|
||||
} catch (error) {
|
||||
logger.warn(error.message);
|
||||
|
||||
|
@ -322,40 +389,52 @@ async function fetchMedia(baseMedia, existingMedias) {
|
|||
}
|
||||
}
|
||||
|
||||
function saveMedia(media, existingHashMediaByHash) {
|
||||
const existingHashMedia = existingHashMediaByHash[media.file.hash];
|
||||
|
||||
if (existingHashMedia) {
|
||||
return {
|
||||
...media,
|
||||
entry: existingHashMedia,
|
||||
};
|
||||
function curateMediaEntry(media, index) {
|
||||
if (media.entry) {
|
||||
return media;
|
||||
}
|
||||
|
||||
const hashDir = media.file.hash.slice(0, 2);
|
||||
const hashSubDir = media.file.hash.slice(2, 4);
|
||||
const hashFilename = media.file.hash.slice(4);
|
||||
const curatedMediaEntry = {
|
||||
id: media.id,
|
||||
path: media.file.path,
|
||||
thumbnail: media.file.thumbnail,
|
||||
index,
|
||||
mime: media.meta.mimetype,
|
||||
hash: media.meta.hash,
|
||||
size: media.meta.size,
|
||||
width: media.meta.width,
|
||||
height: media.meta.height,
|
||||
entropy: media.meta.entropy,
|
||||
source: media.src,
|
||||
source_page: media.url,
|
||||
scraper: media.scraper,
|
||||
copyright: media.copyright,
|
||||
comment: media.comment,
|
||||
};
|
||||
|
||||
const filename = media.quality
|
||||
? `${hashFilename}_${media.quality}.${media.file.extension}`
|
||||
: `${hashFilename}.${media.file.extension}`;
|
||||
|
||||
const filedir = path.join(media.role, hashDir, hashSubDir);
|
||||
const filepath = path.join(filedir, filename);
|
||||
|
||||
console.log(filedir, filepath);
|
||||
|
||||
return media;
|
||||
return {
|
||||
...media,
|
||||
newEntry: true,
|
||||
entry: curatedMediaEntry,
|
||||
};
|
||||
}
|
||||
|
||||
async function storeMedias(baseMedias) {
|
||||
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
||||
|
||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||
const fetchedMedias = await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }));
|
||||
|
||||
const existingHashMediaByHash = await findHashDuplicates(fetchedMedias);
|
||||
const savedMedias = await Promise.map(fetchedMedias, async fetchedMedia => saveMedia(fetchedMedia, existingHashMediaByHash));
|
||||
const savedMedias = await Promise.map(
|
||||
baseMedias,
|
||||
async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
||||
);
|
||||
|
||||
const { uniqueHashMedia, existingHashMedia } = await findHashDuplicates(savedMedias);
|
||||
|
||||
const newMediaWithEntries = uniqueHashMedia.map((media, index) => curateMediaEntry(media, index));
|
||||
const newMediaEntries = newMediaWithEntries.filter(media => !media.newEntry).map(media => media.entry);
|
||||
|
||||
await knex('media').insert(newMediaEntries);
|
||||
|
||||
return [...newMediaWithEntries, ...existingHashMedia];
|
||||
}
|
||||
|
||||
async function associateReleaseMedia(releases) {
|
||||
|
@ -363,14 +442,18 @@ async function associateReleaseMedia(releases) {
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: internal duplicate filtering
|
||||
// TODO: media count limits
|
||||
// TODO: catch errors
|
||||
|
||||
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
||||
...acc,
|
||||
[release.id]: {
|
||||
poster: argv.images && argv.poster && toBaseMedias([release.poster], 'posters'),
|
||||
photos: argv.images && argv.photos && toBaseMedias(release.photos, 'photos').slice(0, 5),
|
||||
trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer], 'trailers'),
|
||||
teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser], 'teasers'),
|
||||
},
|
||||
[release.id]: [
|
||||
...(argv.images && argv.poster ? toBaseMedias([release.poster], 'posters') : []),
|
||||
...(argv.images && argv.photos ? toBaseMedias(release.photos, 'photos').slice(0, 5) : []),
|
||||
...(argv.videos && argv.trailer ? toBaseMedias([release.trailer], 'trailers') : []),
|
||||
...(argv.videos && argv.teaser ? toBaseMedias([release.teaser], 'teasers') : []),
|
||||
],
|
||||
}), {});
|
||||
|
||||
const baseMedias = Object.values(baseMediasByReleaseId)
|
||||
|
@ -378,7 +461,29 @@ async function associateReleaseMedia(releases) {
|
|||
.flat(2)
|
||||
.filter(Boolean);
|
||||
|
||||
await storeMedias(baseMedias);
|
||||
const storedMedias = await storeMedias(baseMedias);
|
||||
const storedMediasById = itemsByKey(storedMedias, 'id');
|
||||
|
||||
const associationsByRole = Object.entries(baseMediasByReleaseId).reduce((acc, [releaseId, releaseBaseMedias]) => {
|
||||
releaseBaseMedias.forEach((baseMedia) => {
|
||||
const media = storedMediasById[baseMedia.id];
|
||||
|
||||
if (!media) return;
|
||||
if (!acc[media.role]) acc[media.role] = [];
|
||||
|
||||
acc[media.role].push({
|
||||
release_id: releaseId,
|
||||
media_id: media.entry.id,
|
||||
});
|
||||
});
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
console.log(util.inspect(associationsByRole, null, null));
|
||||
|
||||
await Promise.all(Object.entries(associationsByRole)
|
||||
.map(async ([role, associations]) => knex(`releases_${role}`).insert(associations)));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -72,10 +72,9 @@ queue.define('http', async ({
|
|||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
|
||||
return {
|
||||
res,
|
||||
...res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
|
|
Loading…
Reference in New Issue