Fixed off-by-one in photo plucker. Fixed source duplicate photo function not handling fallback sources.
This commit is contained in:
15
src/media.js
15
src/media.js
@@ -28,7 +28,7 @@ function pluckPhotos(photos, release, specifiedLimit) {
|
||||
|
||||
const plucked = [1]
|
||||
.concat(
|
||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
||||
);
|
||||
|
||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
@@ -78,12 +78,14 @@ function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId
|
||||
// before fetching
|
||||
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
|
||||
const photoSourceEntries = await knex('media')
|
||||
.whereIn('source', photos)
|
||||
.whereIn('source', photos.flat())
|
||||
.whereIn('domain', [].concat(domains))
|
||||
.whereIn('role', [].concat(roles)); // accept string argument
|
||||
|
||||
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
|
||||
const newPhotos = photos.filter(source => !photoSources.has(source));
|
||||
const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided?
|
||||
? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match
|
||||
: !photoSources.has(source)));
|
||||
|
||||
if (photoSourceEntries.length > 0) {
|
||||
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
|
||||
@@ -135,7 +137,7 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`);
|
||||
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`);
|
||||
|
||||
if (attempt < 3) {
|
||||
await Promise.delay(1000);
|
||||
@@ -202,7 +204,6 @@ async function storePhotos(release, releaseId) {
|
||||
}
|
||||
|
||||
const pluckedPhotos = pluckPhotos(release.photos, release);
|
||||
|
||||
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
if (newPhotos.length === 0) return;
|
||||
@@ -216,7 +217,9 @@ async function storePhotos(release, releaseId) {
|
||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||
|
||||
await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId));
|
||||
const curatedPhotoEntries = curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId);
|
||||
|
||||
await knex('media').insert(curatedPhotoEntries);
|
||||
|
||||
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
}
|
||||
|
||||
@@ -18,13 +18,13 @@ function scrapePhotos(html) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.map((photoIndex, photoElement) => {
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
||||
return [src.replace('thumbs/', 'photos/'), src];
|
||||
})
|
||||
.toArray();
|
||||
});
|
||||
|
||||
return photos;
|
||||
}
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
||||
function pluckPhotos(photos, release, specifiedLimit) {
|
||||
const limit = specifiedLimit || config.media.limit;
|
||||
|
||||
if (photos.length <= limit) {
|
||||
return photos;
|
||||
}
|
||||
|
||||
const plucked = [1]
|
||||
.concat(
|
||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
||||
);
|
||||
|
||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
}
|
||||
|
||||
module.exports = pluckPhotos;
|
||||
Reference in New Issue
Block a user