Fixed off-by-one in photo plucker. Fixed source duplicate photo function not handling fallback sources.

This commit is contained in:
2019-12-12 04:04:35 +01:00
parent dbaf1a9a9c
commit 0b819713b5
5 changed files with 30 additions and 36 deletions

View File

@@ -28,7 +28,7 @@ function pluckPhotos(photos, release, specifiedLimit) {
const plucked = [1]
.concat(
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
);
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
@@ -78,12 +78,14 @@ function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId
// before fetching
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
const photoSourceEntries = await knex('media')
.whereIn('source', photos)
.whereIn('source', photos.flat())
.whereIn('domain', [].concat(domains))
.whereIn('role', [].concat(roles)); // accept string argument
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
const newPhotos = photos.filter(source => !photoSources.has(source));
const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided?
? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match
: !photoSources.has(source)));
if (photoSourceEntries.length > 0) {
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
@@ -135,7 +137,7 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
throw new Error(`Response ${res.statusCode} not OK`);
} catch (error) {
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`);
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`);
if (attempt < 3) {
await Promise.delay(1000);
@@ -202,7 +204,6 @@ async function storePhotos(release, releaseId) {
}
const pluckedPhotos = pluckPhotos(release.photos, release);
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
if (newPhotos.length === 0) return;
@@ -216,7 +217,9 @@ async function storePhotos(release, releaseId) {
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId));
const curatedPhotoEntries = curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId);
await knex('media').insert(curatedPhotoEntries);
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
}

View File

@@ -18,13 +18,13 @@ function scrapePhotos(html) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.map((photoIndex, photoElement) => {
.toArray()
.map((photoElement) => {
const src = $(photoElement).attr('src');
// high res often available in photos/ directory, but not always, provide original as fallback
return [src.replace('thumbs/', 'photos/'), src];
})
.toArray();
});
return photos;
}

View File

@@ -1,21 +0,0 @@
'use strict';
const config = require('config');
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
function pluckPhotos(photos, release, specifiedLimit) {
const limit = specifiedLimit || config.media.limit;
if (photos.length <= limit) {
return photos;
}
const plucked = [1]
.concat(
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
);
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
}
module.exports = pluckPhotos;