Refactored 21sextury scraper.

This commit is contained in:
2019-12-09 05:00:49 +01:00
parent d874c508de
commit 04a89efa58
52 changed files with 2621 additions and 2068 deletions

View File

@@ -10,6 +10,7 @@ const sharp = require('sharp');
const blake2 = require('blake2');
const knex = require('./knex');
const pluckPhotos = require('./utils/pluck-photos');
function getHash(buffer) {
const hash = blake2.createHash('blake2b', { digestLength: 24 });
@@ -94,10 +95,10 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho
}
async function fetchPhoto(photoUrl, index, identifier) {
const { pathname } = new URL(photoUrl);
const mimetype = mime.getType(pathname);
try {
const { pathname } = new URL(photoUrl);
const mimetype = mime.getType(pathname);
const res = await bhttp.get(photoUrl);
if (res.statusCode === 200) {
@@ -176,7 +177,11 @@ async function storePhotos(release, releaseId) {
return;
}
const newPhotos = await filterSourceDuplicates(release.photos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
const pluckedPhotos = pluckPhotos(release.photos, release);
console.log(release.photos, pluckedPhotos);
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
if (newPhotos.length === 0) return;