Added Fantasy Massage sites. Improved Private scraper, added movie link.

This commit is contained in:
2020-02-08 04:52:32 +01:00
parent ff8ab2fe09
commit d2cb74a252
5 changed files with 143 additions and 95 deletions

View File

@@ -38,9 +38,15 @@ function pluckPhotos(photos, specifiedLimit) {
}
async function getEntropy(buffer) {
const { entropy } = await sharp(buffer).stats();
try {
const { entropy } = await sharp(buffer).stats();
return entropy;
return entropy;
} catch (error) {
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
return 7.5;
}
}
async function createThumbnail(buffer) {
@@ -80,7 +86,7 @@ function curatePhotoEntries(files) {
}));
}
async function findDuplicates(photos, identifier, prop = null, label) {
async function findDuplicates(photos, identifier, prop = null) {
const duplicates = await knex('media')
.whereIn(identifier, photos.flat().map((photo) => {
if (prop) return photo[prop];
@@ -99,14 +105,6 @@ async function findDuplicates(photos, identifier, prop = null, label) {
return !duplicateLookup.has((prop && source[prop]) || (source.src && source));
});
if (duplicates.length > 0) {
logger.info(`${duplicates.length} media items already present by ${identifier} for ${label}`);
}
if (originals.length > 0) {
logger.info(`Fetching ${originals.length} new media items for ${label}`);
}
return [duplicates, originals];
}
@@ -228,6 +226,8 @@ async function storePhotos(photos, {
const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration
const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label);
logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`);
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
concurrency: 10,
}).filter(photo => photo && photo.entropy > entropyFilter);
@@ -235,6 +235,8 @@ async function storePhotos(photos, {
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`);
const savedPhotos = await savePhotos(hashOriginals, {
domain,
role,