forked from DebaucheryLibrarian/traxxx
Media module saves files.
This commit is contained in:
parent
4eaacf5697
commit
9a712e7371
|
@ -43,6 +43,7 @@ async function mounted() {
|
||||||
'double-vaginal',
|
'double-vaginal',
|
||||||
'da-tp',
|
'da-tp',
|
||||||
'dv-tp',
|
'dv-tp',
|
||||||
|
'triple-anal',
|
||||||
],
|
],
|
||||||
oral: [
|
oral: [
|
||||||
'deepthroat',
|
'deepthroat',
|
||||||
|
|
231
src/media.js
231
src/media.js
|
@ -1,9 +1,9 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
|
const util = require('util');
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const fs = require('fs');
|
const fs = require('fs').promises;
|
||||||
const fsPromises = require('fs').promises;
|
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const nanoid = require('nanoid/non-secure');
|
const nanoid = require('nanoid/non-secure');
|
||||||
const mime = require('mime');
|
const mime = require('mime');
|
||||||
|
@ -23,19 +23,19 @@ function getHash(buffer) {
|
||||||
return hash.digest('hex');
|
return hash.digest('hex');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getEntropy(buffer) {
|
async function getEntropy(buffer, source) {
|
||||||
try {
|
try {
|
||||||
const { entropy } = await sharp(buffer).stats();
|
const { entropy } = await sharp(buffer).stats();
|
||||||
|
|
||||||
return entropy;
|
return entropy;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
logger.warn(`Failed to retrieve image entropy, using 7.5 for ${source.src}: ${error.message}`);
|
||||||
|
|
||||||
return 7.5;
|
return 7.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getMeta(buffer) {
|
async function getMeta(buffer, source) {
|
||||||
try {
|
try {
|
||||||
const { width, height, size } = await sharp(buffer).metadata();
|
const { width, height, size } = await sharp(buffer).metadata();
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ async function getMeta(buffer) {
|
||||||
size,
|
size,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`Failed to retrieve image metadata: ${error.message}`);
|
logger.warn(`Failed to retrieve image metadata from ${source.src}: ${error.message}`);
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -89,6 +89,10 @@ function toBaseSource(rawSource) {
|
||||||
if (rawSource.referer) baseSource.referer = rawSource.referer;
|
if (rawSource.referer) baseSource.referer = rawSource.referer;
|
||||||
if (rawSource.host) baseSource.host = rawSource.host;
|
if (rawSource.host) baseSource.host = rawSource.host;
|
||||||
|
|
||||||
|
if (rawSource.copyright) baseSource.copyright = rawSource.copyright;
|
||||||
|
if (rawSource.comment) baseSource.comment = rawSource.comment;
|
||||||
|
if (rawSource.group) baseSource.group = rawSource.group;
|
||||||
|
|
||||||
return baseSource;
|
return baseSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,10 +184,21 @@ async function findSourceDuplicates(baseMedias) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function findHashDuplicates(medias) {
|
async function findHashDuplicates(medias) {
|
||||||
const mediaHashes = medias.map(media => media.file?.hash).filter(Boolean);
|
const hashes = medias.map(media => media.meta?.hash || media.entry?.hash).filter(Boolean);
|
||||||
const existingHashMedia = await knex('media').whereIn('hash', mediaHashes);
|
|
||||||
|
|
||||||
return itemsByKey(existingHashMedia, 'hash');
|
const existingHashMediaEntries = await knex('media').whereIn('hash', hashes);
|
||||||
|
const existingHashMediaEntriesByHash = itemsByKey(existingHashMediaEntries, 'hash');
|
||||||
|
|
||||||
|
const uniqueHashMedia = medias.filter(media => !media.entry && !existingHashMediaEntriesByHash[media.meta?.hash]);
|
||||||
|
|
||||||
|
const existingHashMedia = medias
|
||||||
|
.filter(media => existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash])
|
||||||
|
.map(media => ({
|
||||||
|
...media,
|
||||||
|
entry: existingHashMediaEntriesByHash[media.entry?.hash || media.meta?.hash],
|
||||||
|
}));
|
||||||
|
|
||||||
|
return { uniqueHashMedia, existingHashMedia };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||||
|
@ -198,7 +213,6 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||||
return {
|
return {
|
||||||
...baseSource,
|
...baseSource,
|
||||||
entry: existingExtractMedia,
|
entry: existingExtractMedia,
|
||||||
src: existingExtractMedia.source,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,7 +230,57 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) {
|
||||||
throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`);
|
throw new Error(`Could not extract source from ${baseSource.url}: ${res.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchSource(source, baseMedia, baseSourceIndex) {
|
async function saveMedia(media) {
|
||||||
|
const hashDir = media.meta.hash.slice(0, 2);
|
||||||
|
const hashSubDir = media.meta.hash.slice(2, 4);
|
||||||
|
const hashFilename = media.meta.hash.slice(4);
|
||||||
|
|
||||||
|
const filename = media.quality
|
||||||
|
? `${hashFilename}_${media.quality}.${media.meta.extension}`
|
||||||
|
: `${hashFilename}.${media.meta.extension}`;
|
||||||
|
|
||||||
|
const filedir = path.join(media.role, hashDir, hashSubDir);
|
||||||
|
const filepath = path.join(filedir, filename);
|
||||||
|
|
||||||
|
if (media.meta.type === 'image') {
|
||||||
|
const thumbnail = await getThumbnail(media.file.buffer);
|
||||||
|
|
||||||
|
const thumbdir = path.join(media.role, 'thumbs', hashDir, hashSubDir);
|
||||||
|
const thumbpath = path.join(thumbdir, filename);
|
||||||
|
|
||||||
|
await Promise.all([
|
||||||
|
fs.mkdir(path.join(config.media.path, filedir), { recursive: true }),
|
||||||
|
fs.mkdir(path.join(config.media.path, thumbdir), { recursive: true }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
await Promise.all([
|
||||||
|
fs.writeFile(path.join(config.media.path, filepath), media.file.buffer),
|
||||||
|
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...media,
|
||||||
|
file: {
|
||||||
|
// buffer is no longer needed, discard to free up memory
|
||||||
|
path: filepath,
|
||||||
|
thumbnail: thumbpath,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
await fs.mkdir(path.join(config.media.path, filedir), { recursive: true });
|
||||||
|
await fs.writeFile(path.join(config.media.path, filepath), media.file.buffer);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...media,
|
||||||
|
file: {
|
||||||
|
// buffer is no longer needed, discard to free up memory
|
||||||
|
path: filepath,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSource(source) {
|
||||||
logger.silly(`Fetching media from ${source.src}`);
|
logger.silly(`Fetching media from ${source.src}`);
|
||||||
// attempts
|
// attempts
|
||||||
|
|
||||||
|
@ -225,39 +289,32 @@ async function fetchSource(source, baseMedia, baseSourceIndex) {
|
||||||
const { pathname } = new URL(source.src);
|
const { pathname } = new URL(source.src);
|
||||||
const mimetype = mime.getType(pathname);
|
const mimetype = mime.getType(pathname);
|
||||||
const extension = mime.getExtension(mimetype);
|
const extension = mime.getExtension(mimetype);
|
||||||
const isImage = /image/.test(mimetype);
|
const type = mimetype.split('/')[0];
|
||||||
|
|
||||||
const tempPath = path.join(config.media.path, 'temp', `${baseMedia.id}-${baseSourceIndex}.${extension}`);
|
|
||||||
|
|
||||||
const res = await http.get(source.src, {
|
const res = await http.get(source.src, {
|
||||||
...(source.referer && { referer: source.referer }),
|
...(source.referer && { referer: source.referer }),
|
||||||
...(source.host && { host: source.host }),
|
...(source.host && { host: source.host }),
|
||||||
}, {
|
|
||||||
stream: true,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
throw new Error(`Response ${res.status} not OK`);
|
throw new Error(`Response ${res.status} not OK`);
|
||||||
}
|
}
|
||||||
|
|
||||||
res.res.pipe(fs.createWriteStream(tempPath));
|
const hash = getHash(res.body);
|
||||||
|
const entropy = type === 'image' ? await getEntropy(res.body) : null;
|
||||||
const buffer = res.body;
|
const { size, width, height } = type === 'image' ? await getMeta(res.body) : {};
|
||||||
|
|
||||||
console.log(res.body);
|
|
||||||
|
|
||||||
const hash = getHash(buffer);
|
|
||||||
const entropy = isImage ? await getEntropy(buffer) : null;
|
|
||||||
const { size, width, height } = isImage ? await getMeta(buffer) : {};
|
|
||||||
|
|
||||||
logger.silly(`Fetched media from ${source.src}`);
|
logger.silly(`Fetched media from ${source.src}`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...source,
|
...source,
|
||||||
file: {
|
file: {
|
||||||
temp: tempPath,
|
buffer: res.body,
|
||||||
|
},
|
||||||
|
meta: {
|
||||||
mimetype,
|
mimetype,
|
||||||
extension,
|
extension,
|
||||||
|
type,
|
||||||
hash,
|
hash,
|
||||||
entropy,
|
entropy,
|
||||||
size,
|
size,
|
||||||
|
@ -270,7 +327,6 @@ async function fetchSource(source, baseMedia, baseSourceIndex) {
|
||||||
|
|
||||||
if (attempts < 3) {
|
if (attempts < 3) {
|
||||||
await Promise.delay(1000);
|
await Promise.delay(1000);
|
||||||
|
|
||||||
return attempt(attempts + 1);
|
return attempt(attempts + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,16 +343,19 @@ async function trySource(baseSource, existingMedias, baseMedia, baseSourceIndex)
|
||||||
const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src];
|
const existingSourceMedia = existingMedias.existingSourceMediaByUrl[extractedSource.src];
|
||||||
|
|
||||||
if (extractedSource.entry) {
|
if (extractedSource.entry) {
|
||||||
|
logger.silly(`Media page URL already in database, not extracting ${baseSource.url}`);
|
||||||
|
|
||||||
// media entry found during extraction, don't fetch
|
// media entry found during extraction, don't fetch
|
||||||
return extractedSource;
|
return extractedSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (existingSourceMedia) {
|
if (existingSourceMedia) {
|
||||||
|
logger.silly(`Media source URL already in database, skipping ${baseSource.url}`);
|
||||||
|
|
||||||
// media entry found by source URL, don't fetch
|
// media entry found by source URL, don't fetch
|
||||||
return {
|
return {
|
||||||
...baseSource,
|
...baseSource,
|
||||||
entry: existingSourceMedia,
|
entry: existingSourceMedia,
|
||||||
src: existingSourceMedia.source,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,10 +370,18 @@ async function fetchMedia(baseMedia, existingMedias) {
|
||||||
Promise.reject(new Error()),
|
Promise.reject(new Error()),
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
if (source.entry) {
|
||||||
|
// don't save media, already in database
|
||||||
|
return {
|
||||||
|
...baseMedia,
|
||||||
|
...source,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return saveMedia({
|
||||||
...baseMedia,
|
...baseMedia,
|
||||||
...source,
|
...source,
|
||||||
};
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(error.message);
|
logger.warn(error.message);
|
||||||
|
|
||||||
|
@ -322,40 +389,52 @@ async function fetchMedia(baseMedia, existingMedias) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveMedia(media, existingHashMediaByHash) {
|
function curateMediaEntry(media, index) {
|
||||||
const existingHashMedia = existingHashMediaByHash[media.file.hash];
|
if (media.entry) {
|
||||||
|
return media;
|
||||||
if (existingHashMedia) {
|
|
||||||
return {
|
|
||||||
...media,
|
|
||||||
entry: existingHashMedia,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const hashDir = media.file.hash.slice(0, 2);
|
const curatedMediaEntry = {
|
||||||
const hashSubDir = media.file.hash.slice(2, 4);
|
id: media.id,
|
||||||
const hashFilename = media.file.hash.slice(4);
|
path: media.file.path,
|
||||||
|
thumbnail: media.file.thumbnail,
|
||||||
|
index,
|
||||||
|
mime: media.meta.mimetype,
|
||||||
|
hash: media.meta.hash,
|
||||||
|
size: media.meta.size,
|
||||||
|
width: media.meta.width,
|
||||||
|
height: media.meta.height,
|
||||||
|
entropy: media.meta.entropy,
|
||||||
|
source: media.src,
|
||||||
|
source_page: media.url,
|
||||||
|
scraper: media.scraper,
|
||||||
|
copyright: media.copyright,
|
||||||
|
comment: media.comment,
|
||||||
|
};
|
||||||
|
|
||||||
const filename = media.quality
|
return {
|
||||||
? `${hashFilename}_${media.quality}.${media.file.extension}`
|
...media,
|
||||||
: `${hashFilename}.${media.file.extension}`;
|
newEntry: true,
|
||||||
|
entry: curatedMediaEntry,
|
||||||
const filedir = path.join(media.role, hashDir, hashSubDir);
|
};
|
||||||
const filepath = path.join(filedir, filename);
|
|
||||||
|
|
||||||
console.log(filedir, filepath);
|
|
||||||
|
|
||||||
return media;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeMedias(baseMedias) {
|
async function storeMedias(baseMedias) {
|
||||||
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
|
||||||
|
|
||||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
||||||
const fetchedMedias = await Promise.map(baseMedias, async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }));
|
|
||||||
|
|
||||||
const existingHashMediaByHash = await findHashDuplicates(fetchedMedias);
|
const savedMedias = await Promise.map(
|
||||||
const savedMedias = await Promise.map(fetchedMedias, async fetchedMedia => saveMedia(fetchedMedia, existingHashMediaByHash));
|
baseMedias,
|
||||||
|
async baseMedia => fetchMedia(baseMedia, { existingSourceMediaByUrl, existingExtractMediaByUrl }),
|
||||||
|
);
|
||||||
|
|
||||||
|
const { uniqueHashMedia, existingHashMedia } = await findHashDuplicates(savedMedias);
|
||||||
|
|
||||||
|
const newMediaWithEntries = uniqueHashMedia.map((media, index) => curateMediaEntry(media, index));
|
||||||
|
const newMediaEntries = newMediaWithEntries.filter(media => !media.newEntry).map(media => media.entry);
|
||||||
|
|
||||||
|
await knex('media').insert(newMediaEntries);
|
||||||
|
|
||||||
|
return [...newMediaWithEntries, ...existingHashMedia];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function associateReleaseMedia(releases) {
|
async function associateReleaseMedia(releases) {
|
||||||
|
@ -363,14 +442,18 @@ async function associateReleaseMedia(releases) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: internal duplicate filtering
|
||||||
|
// TODO: media count limits
|
||||||
|
// TODO: catch errors
|
||||||
|
|
||||||
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
const baseMediasByReleaseId = releases.reduce((acc, release) => ({
|
||||||
...acc,
|
...acc,
|
||||||
[release.id]: {
|
[release.id]: [
|
||||||
poster: argv.images && argv.poster && toBaseMedias([release.poster], 'posters'),
|
...(argv.images && argv.poster ? toBaseMedias([release.poster], 'posters') : []),
|
||||||
photos: argv.images && argv.photos && toBaseMedias(release.photos, 'photos').slice(0, 5),
|
...(argv.images && argv.photos ? toBaseMedias(release.photos, 'photos').slice(0, 5) : []),
|
||||||
trailer: argv.videos && argv.trailer && toBaseMedias([release.trailer], 'trailers'),
|
...(argv.videos && argv.trailer ? toBaseMedias([release.trailer], 'trailers') : []),
|
||||||
teaser: argv.videos && argv.teaser && toBaseMedias([release.teaser], 'teasers'),
|
...(argv.videos && argv.teaser ? toBaseMedias([release.teaser], 'teasers') : []),
|
||||||
},
|
],
|
||||||
}), {});
|
}), {});
|
||||||
|
|
||||||
const baseMedias = Object.values(baseMediasByReleaseId)
|
const baseMedias = Object.values(baseMediasByReleaseId)
|
||||||
|
@ -378,7 +461,29 @@ async function associateReleaseMedia(releases) {
|
||||||
.flat(2)
|
.flat(2)
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
|
|
||||||
await storeMedias(baseMedias);
|
const storedMedias = await storeMedias(baseMedias);
|
||||||
|
const storedMediasById = itemsByKey(storedMedias, 'id');
|
||||||
|
|
||||||
|
const associationsByRole = Object.entries(baseMediasByReleaseId).reduce((acc, [releaseId, releaseBaseMedias]) => {
|
||||||
|
releaseBaseMedias.forEach((baseMedia) => {
|
||||||
|
const media = storedMediasById[baseMedia.id];
|
||||||
|
|
||||||
|
if (!media) return;
|
||||||
|
if (!acc[media.role]) acc[media.role] = [];
|
||||||
|
|
||||||
|
acc[media.role].push({
|
||||||
|
release_id: releaseId,
|
||||||
|
media_id: media.entry.id,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
console.log(util.inspect(associationsByRole, null, null));
|
||||||
|
|
||||||
|
await Promise.all(Object.entries(associationsByRole)
|
||||||
|
.map(async ([role, associations]) => knex(`releases_${role}`).insert(associations)));
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -72,10 +72,9 @@ queue.define('http', async ({
|
||||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
res,
|
...res,
|
||||||
html,
|
html,
|
||||||
json,
|
json,
|
||||||
pipe: res.pipe,
|
|
||||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||||
code: res.statusCode,
|
code: res.statusCode,
|
||||||
status: res.statusCode,
|
status: res.statusCode,
|
||||||
|
|
Loading…
Reference in New Issue