forked from DebaucheryLibrarian/traxxx
Fixed off-by-one in photo plucker. Fixed source duplicate photo function not handling fallback sources.
This commit is contained in:
parent
dbaf1a9a9c
commit
0b819713b5
|
@ -1992,6 +1992,17 @@
|
|||
"resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
|
||||
"integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg="
|
||||
},
|
||||
"array.prototype.flat": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.2.tgz",
|
||||
"integrity": "sha512-VXjh7lAL4KXKF2hY4FnEW9eRW6IhdvFW1sN/JwLbmECbCgACCnBHNyP3lFiYuttr0jxRN9Bsc5+G27dMseSWqQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"define-properties": "^1.1.3",
|
||||
"es-abstract": "^1.15.0",
|
||||
"function-bind": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"asn1": {
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz",
|
||||
|
@ -4534,22 +4545,23 @@
|
|||
"dev": true
|
||||
},
|
||||
"eslint-plugin-import": {
|
||||
"version": "2.18.2",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.18.2.tgz",
|
||||
"integrity": "sha512-5ohpsHAiUBRNaBWAF08izwUGlbrJoJJ+W9/TBwsGoR1MnlgfwMIKrFeSjWbt6moabiXW9xNvtFz+97KHRfI4HQ==",
|
||||
"version": "2.19.1",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.19.1.tgz",
|
||||
"integrity": "sha512-x68131aKoCZlCae7rDXKSAQmbT5DQuManyXo2sK6fJJ0aK5CWAkv6A6HJZGgqC8IhjQxYPgo6/IY4Oz8AFsbBw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"array-includes": "^3.0.3",
|
||||
"array.prototype.flat": "^1.2.1",
|
||||
"contains-path": "^0.1.0",
|
||||
"debug": "^2.6.9",
|
||||
"doctrine": "1.5.0",
|
||||
"eslint-import-resolver-node": "^0.3.2",
|
||||
"eslint-module-utils": "^2.4.0",
|
||||
"eslint-module-utils": "^2.4.1",
|
||||
"has": "^1.0.3",
|
||||
"minimatch": "^3.0.4",
|
||||
"object.values": "^1.1.0",
|
||||
"read-pkg-up": "^2.0.0",
|
||||
"resolve": "^1.11.0"
|
||||
"resolve": "^1.12.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"doctrine": {
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
"eslint-config-airbnb": "^17.1.1",
|
||||
"eslint-config-airbnb-base": "^13.2.0",
|
||||
"eslint-loader": "^2.2.1",
|
||||
"eslint-plugin-import": "^2.18.2",
|
||||
"eslint-plugin-import": "^2.19.1",
|
||||
"eslint-plugin-jsx-a11y": "^6.2.3",
|
||||
"eslint-plugin-react": "^7.17.0",
|
||||
"eslint-plugin-vue": "^6.0.1",
|
||||
|
|
15
src/media.js
15
src/media.js
|
@ -28,7 +28,7 @@ function pluckPhotos(photos, release, specifiedLimit) {
|
|||
|
||||
const plucked = [1]
|
||||
.concat(
|
||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
||||
);
|
||||
|
||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
|
@ -78,12 +78,14 @@ function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId
|
|||
// before fetching
|
||||
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
|
||||
const photoSourceEntries = await knex('media')
|
||||
.whereIn('source', photos)
|
||||
.whereIn('source', photos.flat())
|
||||
.whereIn('domain', [].concat(domains))
|
||||
.whereIn('role', [].concat(roles)); // accept string argument
|
||||
|
||||
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
|
||||
const newPhotos = photos.filter(source => !photoSources.has(source));
|
||||
const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided?
|
||||
? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match
|
||||
: !photoSources.has(source)));
|
||||
|
||||
if (photoSourceEntries.length > 0) {
|
||||
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
|
||||
|
@ -135,7 +137,7 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
|||
|
||||
throw new Error(`Response ${res.statusCode} not OK`);
|
||||
} catch (error) {
|
||||
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`);
|
||||
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`);
|
||||
|
||||
if (attempt < 3) {
|
||||
await Promise.delay(1000);
|
||||
|
@ -202,7 +204,6 @@ async function storePhotos(release, releaseId) {
|
|||
}
|
||||
|
||||
const pluckedPhotos = pluckPhotos(release.photos, release);
|
||||
|
||||
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
|
||||
if (newPhotos.length === 0) return;
|
||||
|
@ -216,7 +217,9 @@ async function storePhotos(release, releaseId) {
|
|||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||
|
||||
await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId));
|
||||
const curatedPhotoEntries = curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId);
|
||||
|
||||
await knex('media').insert(curatedPhotoEntries);
|
||||
|
||||
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||
}
|
||||
|
|
|
@ -18,13 +18,13 @@ function scrapePhotos(html) {
|
|||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.map((photoIndex, photoElement) => {
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
||||
return [src.replace('thumbs/', 'photos/'), src];
|
||||
})
|
||||
.toArray();
|
||||
});
|
||||
|
||||
return photos;
|
||||
}
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
|
||||
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
||||
function pluckPhotos(photos, release, specifiedLimit) {
|
||||
const limit = specifiedLimit || config.media.limit;
|
||||
|
||||
if (photos.length <= limit) {
|
||||
return photos;
|
||||
}
|
||||
|
||||
const plucked = [1]
|
||||
.concat(
|
||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
||||
);
|
||||
|
||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
}
|
||||
|
||||
module.exports = pluckPhotos;
|
Loading…
Reference in New Issue