forked from DebaucheryLibrarian/traxxx
Fixed off-by-one in photo plucker. Fixed source duplicate photo function not handling fallback sources.
This commit is contained in:
parent
dbaf1a9a9c
commit
0b819713b5
|
@ -1992,6 +1992,17 @@
|
||||||
"resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
|
"resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz",
|
||||||
"integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg="
|
"integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg="
|
||||||
},
|
},
|
||||||
|
"array.prototype.flat": {
|
||||||
|
"version": "1.2.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.2.tgz",
|
||||||
|
"integrity": "sha512-VXjh7lAL4KXKF2hY4FnEW9eRW6IhdvFW1sN/JwLbmECbCgACCnBHNyP3lFiYuttr0jxRN9Bsc5+G27dMseSWqQ==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"define-properties": "^1.1.3",
|
||||||
|
"es-abstract": "^1.15.0",
|
||||||
|
"function-bind": "^1.1.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
"asn1": {
|
"asn1": {
|
||||||
"version": "0.2.4",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz",
|
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz",
|
||||||
|
@ -4534,22 +4545,23 @@
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"eslint-plugin-import": {
|
"eslint-plugin-import": {
|
||||||
"version": "2.18.2",
|
"version": "2.19.1",
|
||||||
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.18.2.tgz",
|
"resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.19.1.tgz",
|
||||||
"integrity": "sha512-5ohpsHAiUBRNaBWAF08izwUGlbrJoJJ+W9/TBwsGoR1MnlgfwMIKrFeSjWbt6moabiXW9xNvtFz+97KHRfI4HQ==",
|
"integrity": "sha512-x68131aKoCZlCae7rDXKSAQmbT5DQuManyXo2sK6fJJ0aK5CWAkv6A6HJZGgqC8IhjQxYPgo6/IY4Oz8AFsbBw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"array-includes": "^3.0.3",
|
"array-includes": "^3.0.3",
|
||||||
|
"array.prototype.flat": "^1.2.1",
|
||||||
"contains-path": "^0.1.0",
|
"contains-path": "^0.1.0",
|
||||||
"debug": "^2.6.9",
|
"debug": "^2.6.9",
|
||||||
"doctrine": "1.5.0",
|
"doctrine": "1.5.0",
|
||||||
"eslint-import-resolver-node": "^0.3.2",
|
"eslint-import-resolver-node": "^0.3.2",
|
||||||
"eslint-module-utils": "^2.4.0",
|
"eslint-module-utils": "^2.4.1",
|
||||||
"has": "^1.0.3",
|
"has": "^1.0.3",
|
||||||
"minimatch": "^3.0.4",
|
"minimatch": "^3.0.4",
|
||||||
"object.values": "^1.1.0",
|
"object.values": "^1.1.0",
|
||||||
"read-pkg-up": "^2.0.0",
|
"read-pkg-up": "^2.0.0",
|
||||||
"resolve": "^1.11.0"
|
"resolve": "^1.12.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"doctrine": {
|
"doctrine": {
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
"eslint-config-airbnb": "^17.1.1",
|
"eslint-config-airbnb": "^17.1.1",
|
||||||
"eslint-config-airbnb-base": "^13.2.0",
|
"eslint-config-airbnb-base": "^13.2.0",
|
||||||
"eslint-loader": "^2.2.1",
|
"eslint-loader": "^2.2.1",
|
||||||
"eslint-plugin-import": "^2.18.2",
|
"eslint-plugin-import": "^2.19.1",
|
||||||
"eslint-plugin-jsx-a11y": "^6.2.3",
|
"eslint-plugin-jsx-a11y": "^6.2.3",
|
||||||
"eslint-plugin-react": "^7.17.0",
|
"eslint-plugin-react": "^7.17.0",
|
||||||
"eslint-plugin-vue": "^6.0.1",
|
"eslint-plugin-vue": "^6.0.1",
|
||||||
|
|
15
src/media.js
15
src/media.js
|
@ -28,7 +28,7 @@ function pluckPhotos(photos, release, specifiedLimit) {
|
||||||
|
|
||||||
const plucked = [1]
|
const plucked = [1]
|
||||||
.concat(
|
.concat(
|
||||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
||||||
);
|
);
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||||
|
@ -78,12 +78,14 @@ function curatePhotoEntries(files, domain = 'releases', role = 'photo', targetId
|
||||||
// before fetching
|
// before fetching
|
||||||
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
|
async function filterSourceDuplicates(photos, domains = ['releases'], roles = ['photo'], identifier) {
|
||||||
const photoSourceEntries = await knex('media')
|
const photoSourceEntries = await knex('media')
|
||||||
.whereIn('source', photos)
|
.whereIn('source', photos.flat())
|
||||||
.whereIn('domain', [].concat(domains))
|
.whereIn('domain', [].concat(domains))
|
||||||
.whereIn('role', [].concat(roles)); // accept string argument
|
.whereIn('role', [].concat(roles)); // accept string argument
|
||||||
|
|
||||||
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
|
const photoSources = new Set(photoSourceEntries.map(photo => photo.source));
|
||||||
const newPhotos = photos.filter(source => !photoSources.has(source));
|
const newPhotos = photos.filter(source => (Array.isArray(source) // fallbacks provided?
|
||||||
|
? !source.some(sourceX => photoSources.has(sourceX)) // ensure none of the sources match
|
||||||
|
: !photoSources.has(source)));
|
||||||
|
|
||||||
if (photoSourceEntries.length > 0) {
|
if (photoSourceEntries.length > 0) {
|
||||||
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
|
console.log(`Ignoring ${photoSourceEntries.length} ${roles} items already present by source for ${identifier}`);
|
||||||
|
@ -135,7 +137,7 @@ async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
|
||||||
|
|
||||||
throw new Error(`Response ${res.statusCode} not OK`);
|
throw new Error(`Response ${res.statusCode} not OK`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} (${photoUrl}) for ${identifier}: ${error}`);
|
console.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${identifier} (${photoUrl}): ${error}`);
|
||||||
|
|
||||||
if (attempt < 3) {
|
if (attempt < 3) {
|
||||||
await Promise.delay(1000);
|
await Promise.delay(1000);
|
||||||
|
@ -202,7 +204,6 @@ async function storePhotos(release, releaseId) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const pluckedPhotos = pluckPhotos(release.photos, release);
|
const pluckedPhotos = pluckPhotos(release.photos, release);
|
||||||
|
|
||||||
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
|
|
||||||
if (newPhotos.length === 0) return;
|
if (newPhotos.length === 0) return;
|
||||||
|
@ -216,7 +217,9 @@ async function storePhotos(release, releaseId) {
|
||||||
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);
|
||||||
|
|
||||||
await knex('media').insert(curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId));
|
const curatedPhotoEntries = curatePhotoEntries(savedPhotos, 'releases', 'photo', releaseId);
|
||||||
|
|
||||||
|
await knex('media').insert(curatedPhotoEntries);
|
||||||
|
|
||||||
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
console.log(`Stored ${newPhotos.length} photos for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,13 +18,13 @@ function scrapePhotos(html) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
|
||||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||||
.map((photoIndex, photoElement) => {
|
.toArray()
|
||||||
|
.map((photoElement) => {
|
||||||
const src = $(photoElement).attr('src');
|
const src = $(photoElement).attr('src');
|
||||||
|
|
||||||
// high res often available in photos/ directory, but not always, provide original as fallback
|
// high res often available in photos/ directory, but not always, provide original as fallback
|
||||||
return [src.replace('thumbs/', 'photos/'), src];
|
return [src.replace('thumbs/', 'photos/'), src];
|
||||||
})
|
});
|
||||||
.toArray();
|
|
||||||
|
|
||||||
return photos;
|
return photos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const config = require('config');
|
|
||||||
|
|
||||||
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
|
||||||
function pluckPhotos(photos, release, specifiedLimit) {
|
|
||||||
const limit = specifiedLimit || config.media.limit;
|
|
||||||
|
|
||||||
if (photos.length <= limit) {
|
|
||||||
return photos;
|
|
||||||
}
|
|
||||||
|
|
||||||
const plucked = [1]
|
|
||||||
.concat(
|
|
||||||
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
|
||||||
);
|
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = pluckPhotos;
|
|
Loading…
Reference in New Issue