Refactored media module for better duplicate handling, readability and DRY.
This commit is contained in:
parent
b9617c76a1
commit
c4d620c6ff
272
src/media.js
272
src/media.js
|
@ -46,109 +46,152 @@ async function createActorMediaDirectory(profile, actor) {
|
|||
}
|
||||
}
|
||||
|
||||
function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId, setAvatar = false) {
|
||||
return files.map((file, index) => ({
|
||||
path: file.filepath,
|
||||
thumbnail: file.thumbpath,
|
||||
mime: file.mimetype,
|
||||
hash: file.hash,
|
||||
source: file.source,
|
||||
index,
|
||||
domain,
|
||||
target_id: targetId,
|
||||
role: setAvatar && index === 0 ? 'avatar' : role,
|
||||
}));
|
||||
}
|
||||
|
||||
// before fetching
|
||||
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
|
||||
const photoSourceEntries = await knex('media')
|
||||
.whereIn('source', photos)
|
||||
.whereIn('domain', [].concat(domains))
|
||||
.whereIn('role', [].concat(roles)); // accept string argument
|
||||
|
||||
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
|
||||
const newPhotos = photos.filter(source => !photoSources.has(source));
|
||||
|
||||
if (photoSourceEntries.length > 0) {
|
||||
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
|
||||
}
|
||||
|
||||
return newPhotos;
|
||||
}
|
||||
|
||||
// after fetching
|
||||
async function filterHashDuplicates(files, domains = ['releases'], roles = ['photo'], identifier) {
|
||||
const photoHashEntries = await knex('media')
|
||||
.whereIn('hash', files.map(file => file.hash))
|
||||
.whereIn('domain', [].concat(domains))
|
||||
.whereIn('role', [].concat(roles)); // accept string argument
|
||||
|
||||
const photoHashes = new Set(photoHashEntries.map(entry => entry.hash));
|
||||
|
||||
if (photoHashEntries.length > 0) {
|
||||
console.log(`Ignoring ${photoHashEntries.length} ${roles} items already present by hash for ${identifier}`);
|
||||
}
|
||||
|
||||
return files.filter(file => file && !photoHashes.has(file.hash));
|
||||
}
|
||||
|
||||
async function fetchPhoto(photoUrl, index, identifier) {
|
||||
const { pathname } = new URL(photoUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
|
||||
try {
|
||||
const res = await bhttp.get(photoUrl);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const extension = mime.getExtension(mimetype);
|
||||
const hash = getHash(res.body);
|
||||
|
||||
return {
|
||||
photo: res.body,
|
||||
mimetype,
|
||||
extension,
|
||||
hash,
|
||||
source: photoUrl,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed to store photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function savePhotos(files, release, releaseId, actorSlug, isPoster = false) {
|
||||
return Promise.map(files, async (file, index) => {
|
||||
const timestamp = new Date().getTime();
|
||||
const thumbnail = await getThumbnail(file.photo);
|
||||
|
||||
const filepath = actorSlug
|
||||
? path.join('actors', actorSlug, `${timestamp + index}.${file.extension}`)
|
||||
: path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${isPoster ? 'poster' : index + 1}.${file.extension}`);
|
||||
|
||||
const thumbpath = actorSlug
|
||||
? path.join('actors', actorSlug, `${timestamp + index}_thumb.${file.extension}`)
|
||||
: path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${isPoster ? 'poster' : index + 1}_thumb.${file.extension}`);
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(path.join(config.media.path, filepath), file.photo),
|
||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||
]);
|
||||
|
||||
return {
|
||||
...file,
|
||||
thumbnail,
|
||||
filepath,
|
||||
thumbpath,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function storePoster(release, releaseId) {
|
||||
if (!release.poster) {
|
||||
console.warn(`No poster available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
const [newPoster] = await filterSourceDuplicates([release.poster], 'releases', 'poster', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
if (!newPoster) return;
|
||||
|
||||
const res = await bhttp.get(release.poster);
|
||||
console.log(`Fetching poster for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const thumbnail = await getThumbnail(res.body);
|
||||
const metaFile = await fetchPhoto(release.poster, null, `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const [uniquePoster] = await filterHashDuplicates([metaFile], 'releases', 'poster', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
const { pathname } = new URL(release.poster);
|
||||
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
|
||||
const extension = mime.getExtension(mimetype);
|
||||
if (!uniquePoster) return;
|
||||
|
||||
const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`);
|
||||
const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`);
|
||||
const hash = getHash(res.body);
|
||||
const savedPosters = await savePhotos([uniquePoster], release, releaseId, null, true);
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(path.join(config.media.path, filepath), res.body),
|
||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||
]);
|
||||
|
||||
await knex('media').insert({
|
||||
path: filepath,
|
||||
thumbnail: thumbpath,
|
||||
mime: mimetype,
|
||||
hash,
|
||||
source: release.poster,
|
||||
domain: 'releases',
|
||||
target_id: releaseId,
|
||||
role: 'poster',
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
console.warn(`Failed to store poster for (${release.site.name}, ${releaseId}) "${release.title}": ${res.statusCode}`);
|
||||
await knex('media').insert(curatePhotoEntries(savedPosters, 'releases', 'poster', releaseId));
|
||||
}
|
||||
|
||||
|
||||
async function storePhotos(release, releaseId) {
|
||||
if (!release.photos || release.photos.length === 0) {
|
||||
console.warn(`No photos available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
||||
console.warn(`No photos available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing ${release.photos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const newPhotos = await filterSourceDuplicates(release.photos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
const files = await Promise.map(release.photos, async (photoUrl, index) => {
|
||||
const { pathname } = new URL(photoUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
if (newPhotos.length === 0) return;
|
||||
|
||||
try {
|
||||
const res = await bhttp.get(photoUrl);
|
||||
console.log(`Fetching ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const thumbnail = await getThumbnail(res.body);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
|
||||
const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`);
|
||||
const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`);
|
||||
const hash = getHash(res.body);
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(path.join(config.media.path, filepath), res.body),
|
||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||
]);
|
||||
|
||||
return {
|
||||
filepath,
|
||||
thumbpath,
|
||||
mimetype,
|
||||
hash,
|
||||
source: photoUrl,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed to store photo ${index + 1} for "${release.title}" (${photoUrl}, ${release.url}, ${release.site.name}, ${releaseId}): ${error}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, `(${release.site.name}, ${releaseId}) "${release.title}"`), {
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
await knex('media')
|
||||
.insert(files.filter(file => file)
|
||||
.map((file, index) => ({
|
||||
path: file.filepath,
|
||||
thumbnail: file.thumbpath,
|
||||
mime: file.mimetype,
|
||||
hash: file.hash,
|
||||
source: file.source,
|
||||
index,
|
||||
domain: 'releases',
|
||||
target_id: releaseId,
|
||||
role: 'photo',
|
||||
})));
|
||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||
|
||||
await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId));
|
||||
|
||||
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
}
|
||||
|
||||
async function storeTrailer(release, releaseId) {
|
||||
|
@ -185,69 +228,28 @@ async function storeAvatars(profile, actor) {
|
|||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing ${profile.avatars.length} avatars for '${profile.name}'`);
|
||||
const newPhotos = await filterSourceDuplicates(profile.avatars, 'actors', ['avatar', 'photo'], actor.name);
|
||||
|
||||
const files = await Promise.map(profile.avatars, async (avatarUrl, index) => {
|
||||
try {
|
||||
const { pathname } = new URL(avatarUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
if (newPhotos.length === 0) return;
|
||||
|
||||
const res = await bhttp.get(avatarUrl);
|
||||
console.log(`Fetching ${newPhotos.length} avatars for '${actor.name}'`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const thumbnail = await getThumbnail(res.body);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
|
||||
const timestamp = new Date().getTime();
|
||||
|
||||
const filepath = path.join('actors', actor.slug, `${timestamp + index}.${extension}`);
|
||||
const thumbpath = path.join('actors', actor.slug, `${timestamp + index}_thumb.${extension}`);
|
||||
const hash = getHash(res.body);
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(path.join(config.media.path, filepath), res.body),
|
||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||
]);
|
||||
|
||||
return {
|
||||
filepath,
|
||||
thumbpath,
|
||||
mimetype,
|
||||
hash,
|
||||
source: avatarUrl,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed to store avatar ${index + 1} for '${profile.name}': ${avatarUrl}`);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, {
|
||||
const metaFiles = await Promise.map(newPhotos, async (photoUrl, index) => fetchPhoto(photoUrl, index, actor.name), {
|
||||
concurrency: 10,
|
||||
});
|
||||
|
||||
const avatars = files.filter(file => file);
|
||||
|
||||
const existingAvatars = await knex('media')
|
||||
.whereIn('hash', avatars.map(file => file.hash));
|
||||
|
||||
const newAvatars = avatars.filter(file => !existingAvatars.some(avatar => file.hash === avatar.hash));
|
||||
const hasAvatar = existingAvatars.some(avatar => avatar.role === 'avatar');
|
||||
|
||||
await knex('media')
|
||||
.insert(newAvatars.map((file, index) => ({
|
||||
path: file.filepath,
|
||||
thumbnail: file.thumbpath,
|
||||
mime: file.mimetype,
|
||||
hash: file.hash,
|
||||
source: file.source,
|
||||
index,
|
||||
domain: 'actors',
|
||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'actors', ['avatar', 'photo'], actor.name);
|
||||
const [savedPhotos, avatarEntry] = await Promise.all([
|
||||
savePhotos(uniquePhotos, null, null, actor.slug),
|
||||
knex('media').where({
|
||||
target_id: actor.id,
|
||||
role: index === 0 && !hasAvatar ? 'avatar' : 'photo',
|
||||
})));
|
||||
domain: 'actors',
|
||||
role: 'avatar',
|
||||
}).first(),
|
||||
]);
|
||||
|
||||
// if no avatar entry is present, curatePhotoEntries will store the first photo as avatar
|
||||
await knex('media').insert(curatePhotoEntries(savedPhotos, 'actors', 'photo', actor.id, !avatarEntry));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -225,13 +225,13 @@ async function storeReleaseAssets(release, releaseId) {
|
|||
|
||||
try {
|
||||
await Promise.all([
|
||||
associateTags(release, releaseId),
|
||||
// associateTags(release, releaseId),
|
||||
storePhotos(release, releaseId),
|
||||
storePoster(release, releaseId),
|
||||
storeTrailer(release, releaseId),
|
||||
]);
|
||||
} catch (error) {
|
||||
console.log(release, error);
|
||||
console.log(release.url, error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,8 +31,6 @@ async function findSite(url, release) {
|
|||
async function scrapeRelease(url, release, deep = false) {
|
||||
const site = await findSite(url, release);
|
||||
|
||||
console.log(url, site);
|
||||
|
||||
if (!site) {
|
||||
throw new Error('Could not find site in database');
|
||||
}
|
||||
|
|
12
src/tags.js
12
src/tags.js
|
@ -66,14 +66,10 @@ async function associateTags(release, releaseId) {
|
|||
? await matchTags(release.tags) // scraper returned raw tags
|
||||
: release.tags; // tags already matched by scraper
|
||||
|
||||
try {
|
||||
await knex('tags_associated').insert(tags.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
} catch (error) {
|
||||
console.log(release, error);
|
||||
}
|
||||
await knex('tags_associated').insert(tags.map(tagId => ({
|
||||
tag_id: tagId,
|
||||
release_id: releaseId,
|
||||
})));
|
||||
}
|
||||
|
||||
async function fetchTags(queryObject, groupsQueryObject, limit = 100) {
|
||||
|
|
Loading…
Reference in New Issue