Fixed Porn Pros scraper. Added various Score site logos.
|
@ -25,10 +25,11 @@ exports.up = knex => Promise.resolve()
|
||||||
table.integer('index');
|
table.integer('index');
|
||||||
table.string('mime');
|
table.string('mime');
|
||||||
|
|
||||||
|
table.string('hash');
|
||||||
table.string('type');
|
table.string('type');
|
||||||
table.string('quality', 6);
|
table.string('quality', 6);
|
||||||
|
table.float('entropy');
|
||||||
|
|
||||||
table.string('hash');
|
|
||||||
table.text('comment');
|
table.text('comment');
|
||||||
table.string('source', 1000);
|
table.string('source', 1000);
|
||||||
|
|
||||||
|
|
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 9.0 KiB After Width: | Height: | Size: 9.0 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB |
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB |
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 69 KiB |
Before Width: | Height: | Size: 82 KiB After Width: | Height: | Size: 83 KiB |
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 6.9 KiB After Width: | Height: | Size: 6.9 KiB |
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 31 KiB After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 54 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 124 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 30 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 129 KiB |
After Width: | Height: | Size: 6.8 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 6.9 KiB |
After Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 1.2 KiB After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 86 KiB |
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 79 KiB |
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 5.3 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 74 KiB |
After Width: | Height: | Size: 85 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 108 KiB After Width: | Height: | Size: 108 KiB |
Before Width: | Height: | Size: 7.6 KiB After Width: | Height: | Size: 7.6 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 34 KiB |
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 9.1 KiB |
After Width: | Height: | Size: 113 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 5.1 KiB |
After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 111 KiB After Width: | Height: | Size: 113 KiB |
After Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 50 KiB |
|
@ -4481,6 +4481,8 @@ const sites = [
|
||||||
slug: 'milfbundle',
|
slug: 'milfbundle',
|
||||||
url: 'https://www.milfbundle.com',
|
url: 'https://www.milfbundle.com',
|
||||||
network: 'score',
|
network: 'score',
|
||||||
|
show: false,
|
||||||
|
scrape: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Teaming Cock',
|
name: 'Teaming Cock',
|
||||||
|
@ -4543,10 +4545,12 @@ const sites = [
|
||||||
network: 'score',
|
network: 'score',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Definition Porn Videos',
|
name: 'Porn Mega Load',
|
||||||
slug: 'pornmegaload',
|
slug: 'pornmegaload',
|
||||||
url: 'https://www.pornmegaload.com',
|
url: 'https://www.pornmegaload.com',
|
||||||
network: 'score',
|
network: 'score',
|
||||||
|
show: false,
|
||||||
|
scrape: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'SaRennas World',
|
name: 'SaRennas World',
|
||||||
|
@ -4630,13 +4634,13 @@ const sites = [
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Tawny Peaks',
|
name: 'Tawny Peaks',
|
||||||
slug: 'tawny',
|
slug: 'tawnypeaks',
|
||||||
url: 'https://www.bigboobbundle.com/tawny-peaks',
|
url: 'https://www.bigboobbundle.com/tawny-peaks',
|
||||||
network: 'score',
|
network: 'score',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Tiffany Towers',
|
name: 'Tiffany Towers',
|
||||||
slug: 'tiffany',
|
slug: 'tiffanytowers',
|
||||||
url: 'https://www.bigboobbundle.com/tiffany-towers',
|
url: 'https://www.bigboobbundle.com/tiffany-towers',
|
||||||
network: 'score',
|
network: 'score',
|
||||||
},
|
},
|
||||||
|
|
|
@ -12,7 +12,7 @@ const scrapers = require('./scrapers/scrapers');
|
||||||
const whereOr = require('./utils/where-or');
|
const whereOr = require('./utils/where-or');
|
||||||
const resolvePlace = require('./utils/resolve-place');
|
const resolvePlace = require('./utils/resolve-place');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
const { createMediaDirectory, storePhotos } = require('./media');
|
const { createMediaDirectory, storePhotos } = require('./media_legacy');
|
||||||
|
|
||||||
async function curateActor(actor) {
|
async function curateActor(actor) {
|
||||||
const [aliases, avatar, photos, social] = await Promise.all([
|
const [aliases, avatar, photos, social] = await Promise.all([
|
||||||
|
|
350
src/media.js
|
@ -2,39 +2,32 @@
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const path = require('path');
|
|
||||||
const fs = require('fs-extra');
|
|
||||||
const bhttp = require('bhttp');
|
const bhttp = require('bhttp');
|
||||||
const mime = require('mime');
|
const mime = require('mime');
|
||||||
const sharp = require('sharp');
|
const sharp = require('sharp');
|
||||||
const blake2 = require('blake2');
|
const blake2 = require('blake2');
|
||||||
|
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger');
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const upsert = require('./utils/upsert');
|
|
||||||
const { ex } = require('./utils/q');
|
|
||||||
|
|
||||||
function getHash(buffer) {
|
function getHash(buffer) {
|
||||||
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||||
|
|
||||||
hash.update(buffer);
|
hash.update(buffer);
|
||||||
|
|
||||||
return hash.digest('hex');
|
return hash.digest('hex');
|
||||||
}
|
}
|
||||||
|
|
||||||
function pluckPhotos(photos, specifiedLimit) {
|
function pluckItems(items, specifiedLimit) {
|
||||||
const limit = specifiedLimit || config.media.limit;
|
const limit = specifiedLimit || config.media.limit;
|
||||||
|
|
||||||
if (photos.length <= limit) {
|
if (items.length <= limit) return items;
|
||||||
return photos;
|
|
||||||
}
|
|
||||||
|
|
||||||
const plucked = [1]
|
const plucked = [1]
|
||||||
.concat(
|
.concat(
|
||||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))),
|
||||||
);
|
);
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
return Array.from(new Set(plucked)).map(itemIndex => items[itemIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getEntropy(buffer) {
|
async function getEntropy(buffer) {
|
||||||
|
@ -49,337 +42,74 @@ async function getEntropy(buffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function createThumbnail(buffer) {
|
async function fetchItem(source, index, existingItemsBySource, attempt = 1) {
|
||||||
try {
|
try {
|
||||||
const thumbnail = sharp(buffer)
|
|
||||||
.resize({
|
|
||||||
height: config.media.thumbnailSize,
|
|
||||||
withoutEnlargement: true,
|
|
||||||
})
|
|
||||||
.jpeg({
|
|
||||||
quality: config.media.thumbnailQuality,
|
|
||||||
})
|
|
||||||
.toBuffer();
|
|
||||||
|
|
||||||
return thumbnail;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to create thumbnail: ${error.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function createMediaDirectory(domain, subpath) {
|
|
||||||
const filepath = path.join(config.media.path, domain, subpath);
|
|
||||||
|
|
||||||
await fs.mkdir(filepath, { recursive: true });
|
|
||||||
return filepath;
|
|
||||||
}
|
|
||||||
|
|
||||||
function curatePhotoEntries(files) {
|
|
||||||
return files.map((file, index) => ({
|
|
||||||
path: file.filepath,
|
|
||||||
thumbnail: file.thumbpath,
|
|
||||||
mime: file.mimetype,
|
|
||||||
hash: file.hash,
|
|
||||||
source: file.source,
|
|
||||||
index,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function findDuplicates(photos, identifier, prop = null) {
|
|
||||||
const duplicates = await knex('media')
|
|
||||||
.whereIn(identifier, photos.flat().map((photo) => {
|
|
||||||
if (prop) return photo[prop];
|
|
||||||
if (photo.src) return photo.src;
|
|
||||||
|
|
||||||
return photo;
|
|
||||||
}));
|
|
||||||
|
|
||||||
const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier]));
|
|
||||||
|
|
||||||
const originals = photos.filter((source) => {
|
|
||||||
if (Array.isArray(source)) {
|
if (Array.isArray(source)) {
|
||||||
return !source.some(sourceX => duplicateLookup.has((prop && sourceX[prop]) || (sourceX.src && sourceX)));
|
// fallbacks provided
|
||||||
|
return source.reduce((outcome, sourceX) => outcome.catch(async () => {
|
||||||
|
const item = await fetchItem(sourceX, index, existingItemsBySource);
|
||||||
|
|
||||||
|
if (item) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Item not available: ${source}`);
|
||||||
|
}), Promise.reject(new Error()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return !duplicateLookup.has((prop && source[prop]) || (source.src && source));
|
if (existingItemsBySource[source]) {
|
||||||
});
|
return existingItemsBySource[source];
|
||||||
|
}
|
||||||
|
|
||||||
return [duplicates, originals];
|
const res = await bhttp.get(source);
|
||||||
}
|
|
||||||
|
|
||||||
async function extractPhoto(source) {
|
|
||||||
const res = await bhttp.get(source.src);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
const { q } = ex(res.body.toString());
|
|
||||||
|
|
||||||
return source.extract(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchPhoto(photoUrl, index, label, attempt = 1) {
|
|
||||||
if (photoUrl.src && photoUrl.extract) {
|
|
||||||
// source links to page containing a (presumably) tokenized photo
|
|
||||||
const photo = await extractPhoto(photoUrl);
|
|
||||||
|
|
||||||
return fetchPhoto(photo, index, label);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(photoUrl)) {
|
|
||||||
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
|
||||||
const photo = await fetchPhoto(url, index, label);
|
|
||||||
|
|
||||||
if (photo) {
|
|
||||||
return photo;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error('Photo not available');
|
|
||||||
}), Promise.reject(new Error()));
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { pathname } = new URL(photoUrl);
|
|
||||||
const res = await bhttp.get(photoUrl);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
|
const { pathname } = new URL(source);
|
||||||
const mimetype = mime.getType(pathname);
|
const mimetype = mime.getType(pathname);
|
||||||
const extension = mime.getExtension(mimetype);
|
const extension = mime.getExtension(mimetype);
|
||||||
const hash = getHash(res.body);
|
const hash = getHash(res.body);
|
||||||
const entropy = await getEntropy(res.body);
|
const entropy = await getEntropy(res.body);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
photo: res.body,
|
file: res.body,
|
||||||
mimetype,
|
mimetype,
|
||||||
extension,
|
extension,
|
||||||
hash,
|
hash,
|
||||||
entropy,
|
entropy,
|
||||||
source: photoUrl,
|
source,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`Response ${res.statusCode} not OK`);
|
throw new Error(`Response ${res.statusCode} not OK`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`);
|
if (attempt <= 3) {
|
||||||
|
return fetchItem(source, index, existingItemsBySource, attempt + 1);
|
||||||
if (attempt < 3) {
|
|
||||||
await Promise.delay(5000);
|
|
||||||
return fetchPhoto(photoUrl, index, label, attempt + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
throw new Error(`Failed to fetch media from ${source}: ${error}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function savePhotos(files, {
|
async function fetchItems(itemSources, existingItemsBySource) {
|
||||||
domain = 'release',
|
return Promise.map(itemSources, async (source, index) => fetchItem(source, index, existingItemsBySource));
|
||||||
subpath,
|
|
||||||
role = 'photo',
|
|
||||||
naming = 'index',
|
|
||||||
}) {
|
|
||||||
return Promise.map(files, async (file, index) => {
|
|
||||||
try {
|
|
||||||
const timestamp = new Date().getTime();
|
|
||||||
const thumbnail = await createThumbnail(file.photo);
|
|
||||||
|
|
||||||
const filename = naming === 'index'
|
|
||||||
? `${file.role || role}${index + 1}`
|
|
||||||
: `${timestamp + index}`;
|
|
||||||
|
|
||||||
const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`);
|
|
||||||
const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`);
|
|
||||||
|
|
||||||
await Promise.all([
|
|
||||||
fs.writeFile(path.join(config.media.path, filepath), file.photo),
|
|
||||||
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return {
|
|
||||||
...file,
|
|
||||||
thumbnail,
|
|
||||||
filepath,
|
|
||||||
thumbpath,
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storePhotos(photos, {
|
async function storeReleaseMedia(releases, {
|
||||||
domain = 'release',
|
type = 'poster',
|
||||||
role = 'photo',
|
} = {}) {
|
||||||
naming = 'index',
|
const pluckedSources = releases.map(release => pluckItems(release[type]));
|
||||||
targetId,
|
const existingSourceItems = await knex('media').whereIn('source', pluckedSources.flat());
|
||||||
subpath,
|
const existingItemsBySource = existingSourceItems.reduce((acc, item) => ({ ...acc, [item.source]: item }), {});
|
||||||
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
|
|
||||||
entropyFilter = 2.5, // filter out fallback avatars and other generic clipart
|
|
||||||
}, label) {
|
|
||||||
if (!photos || photos.length === 0) {
|
|
||||||
logger.info(`No ${role}s available for ${label}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration
|
const fetchedItems = await fetchItems(pluckedSources, existingItemsBySource);
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label);
|
const existingHashItems = await knex('media').whereIn('hash', fetchedItems.map(item => item.hash));
|
||||||
|
const existingItemsByHash = existingHashItems.reduce((acc, item) => ({ ...acc, [item.hash]: item }), {});
|
||||||
|
|
||||||
logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`);
|
const newItems = fetchedItems.filter(item => !existingItemsByHash[item.hash]);
|
||||||
|
|
||||||
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
|
console.log(fetchedItems, existingHashItems, existingItemsByHash, newItems);
|
||||||
concurrency: 10,
|
|
||||||
}).filter(photo => photo && photo.entropy > entropyFilter);
|
|
||||||
|
|
||||||
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
|
|
||||||
|
|
||||||
logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`);
|
|
||||||
|
|
||||||
const savedPhotos = await savePhotos(hashOriginals, {
|
|
||||||
domain,
|
|
||||||
role,
|
|
||||||
targetId,
|
|
||||||
subpath,
|
|
||||||
naming,
|
|
||||||
});
|
|
||||||
|
|
||||||
const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId);
|
|
||||||
|
|
||||||
const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*');
|
|
||||||
const photoEntries = Array.isArray(newPhotos)
|
|
||||||
? [...sourceDuplicates, ...hashDuplicates, ...newPhotos]
|
|
||||||
: [...sourceDuplicates, ...hashDuplicates];
|
|
||||||
|
|
||||||
const photoAssociations = photoEntries
|
|
||||||
.map(photoEntry => ({
|
|
||||||
[`${domain}_id`]: targetId,
|
|
||||||
media_id: photoEntry.id,
|
|
||||||
}));
|
|
||||||
|
|
||||||
if (primaryRole) {
|
|
||||||
// store one photo as a 'primary' photo, such as an avatar or cover
|
|
||||||
const primaryPhoto = await knex(`${domain}s_${primaryRole}s`)
|
|
||||||
.where(`${domain}_id`, targetId)
|
|
||||||
.first();
|
|
||||||
|
|
||||||
if (primaryPhoto) {
|
|
||||||
const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id);
|
|
||||||
|
|
||||||
await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.all([
|
|
||||||
upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']),
|
|
||||||
upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']),
|
|
||||||
]);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
async function storeReleasePhotos(releases, label) {
|
|
||||||
const sources = releases.map(release => pluckPhotos(release.photos)).flat();
|
|
||||||
const uniqueSources = Array.from(new Set(sources));
|
|
||||||
|
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label);
|
|
||||||
|
|
||||||
const metaFiles = await Promise.map(
|
|
||||||
sourceOriginals,
|
|
||||||
async (photoUrl, index) => fetchPhoto(photoUrl, index, label),
|
|
||||||
{ concurrency: 10 },
|
|
||||||
)
|
|
||||||
.filter(photo => photo);
|
|
||||||
|
|
||||||
const hashUniques = Object.values(metaFiles.reduce((acc, file) => {
|
|
||||||
if (!acc[file.hash]) acc[file.hash] = file;
|
|
||||||
|
|
||||||
return acc;
|
|
||||||
}, {}));
|
|
||||||
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label);
|
|
||||||
|
|
||||||
const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => {
|
|
||||||
acc[file.source] = file.hash;
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat();
|
|
||||||
|
|
||||||
console.log(associations);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
async function storeTrailer(trailers, {
|
|
||||||
domain = 'releases',
|
|
||||||
role = 'trailer',
|
|
||||||
targetId,
|
|
||||||
subpath,
|
|
||||||
}, label) {
|
|
||||||
// support scrapers supplying multiple qualities
|
|
||||||
const trailer = Array.isArray(trailers)
|
|
||||||
? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0]
|
|
||||||
: trailers;
|
|
||||||
|
|
||||||
if (!trailer || !trailer.src) {
|
|
||||||
logger.info(`No ${role} available for ${label}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label);
|
|
||||||
|
|
||||||
const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => {
|
|
||||||
const { pathname } = new URL(trailerX.src);
|
|
||||||
const mimetype = trailerX.type || mime.getType(pathname);
|
|
||||||
|
|
||||||
const res = await bhttp.get(trailerX.src);
|
|
||||||
const hash = getHash(res.body);
|
|
||||||
const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
trailer: res.body,
|
|
||||||
path: filepath,
|
|
||||||
mime: mimetype,
|
|
||||||
source: trailerX.src,
|
|
||||||
quality: trailerX.quality || null,
|
|
||||||
hash,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
|
|
||||||
|
|
||||||
const newTrailers = await knex('media')
|
|
||||||
.insert(hashOriginals.map(trailerX => ({
|
|
||||||
path: trailerX.path,
|
|
||||||
mime: trailerX.mime,
|
|
||||||
source: trailerX.source,
|
|
||||||
quality: trailerX.quality,
|
|
||||||
hash: trailerX.hash,
|
|
||||||
type: role,
|
|
||||||
})))
|
|
||||||
.returning('*');
|
|
||||||
|
|
||||||
await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer)));
|
|
||||||
|
|
||||||
const trailerEntries = Array.isArray(newTrailers)
|
|
||||||
? [...sourceDuplicates, ...hashDuplicates, ...newTrailers]
|
|
||||||
: [...sourceDuplicates, ...hashDuplicates];
|
|
||||||
|
|
||||||
await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({
|
|
||||||
release_id: targetId,
|
|
||||||
media_id: trailerEntry.id,
|
|
||||||
})), ['release_id', 'media_id']);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
createMediaDirectory,
|
storeReleaseMedia,
|
||||||
storePhotos,
|
|
||||||
// storeReleasePhotos,
|
|
||||||
storeTrailer,
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -0,0 +1,385 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
const Promise = require('bluebird');
|
||||||
|
const path = require('path');
|
||||||
|
const fs = require('fs-extra');
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const mime = require('mime');
|
||||||
|
const sharp = require('sharp');
|
||||||
|
const blake2 = require('blake2');
|
||||||
|
|
||||||
|
const logger = require('./logger')(__filename);
|
||||||
|
const knex = require('./knex');
|
||||||
|
const upsert = require('./utils/upsert');
|
||||||
|
const { ex } = require('./utils/q');
|
||||||
|
|
||||||
|
function getHash(buffer) {
|
||||||
|
const hash = blake2.createHash('blake2b', { digestLength: 24 });
|
||||||
|
|
||||||
|
hash.update(buffer);
|
||||||
|
|
||||||
|
return hash.digest('hex');
|
||||||
|
}
|
||||||
|
|
||||||
|
function pluckPhotos(photos, specifiedLimit) {
|
||||||
|
const limit = specifiedLimit || config.media.limit;
|
||||||
|
|
||||||
|
if (photos.length <= limit) {
|
||||||
|
return photos;
|
||||||
|
}
|
||||||
|
|
||||||
|
const plucked = [1]
|
||||||
|
.concat(
|
||||||
|
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit - 1)))),
|
||||||
|
);
|
||||||
|
|
||||||
|
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getEntropy(buffer) {
|
||||||
|
try {
|
||||||
|
const { entropy } = await sharp(buffer).stats();
|
||||||
|
|
||||||
|
return entropy;
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`Failed to retrieve image entropy, using 7.5: ${error.message}`);
|
||||||
|
|
||||||
|
return 7.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createThumbnail(buffer) {
|
||||||
|
try {
|
||||||
|
const thumbnail = sharp(buffer)
|
||||||
|
.resize({
|
||||||
|
height: config.media.thumbnailSize,
|
||||||
|
withoutEnlargement: true,
|
||||||
|
})
|
||||||
|
.jpeg({
|
||||||
|
quality: config.media.thumbnailQuality,
|
||||||
|
})
|
||||||
|
.toBuffer();
|
||||||
|
|
||||||
|
return thumbnail;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to create thumbnail: ${error.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createMediaDirectory(domain, subpath) {
|
||||||
|
const filepath = path.join(config.media.path, domain, subpath);
|
||||||
|
|
||||||
|
await fs.mkdir(filepath, { recursive: true });
|
||||||
|
return filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
function curatePhotoEntries(files) {
|
||||||
|
return files.map((file, index) => ({
|
||||||
|
path: file.filepath,
|
||||||
|
thumbnail: file.thumbpath,
|
||||||
|
mime: file.mimetype,
|
||||||
|
hash: file.hash,
|
||||||
|
source: file.source,
|
||||||
|
index,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function findDuplicates(photos, identifier, prop = null) {
|
||||||
|
const duplicates = await knex('media')
|
||||||
|
.whereIn(identifier, photos.flat().map((photo) => {
|
||||||
|
if (prop) return photo[prop];
|
||||||
|
if (photo.src) return photo.src;
|
||||||
|
|
||||||
|
return photo;
|
||||||
|
}));
|
||||||
|
|
||||||
|
const duplicateLookup = new Set(duplicates.map(photo => photo[prop || identifier]));
|
||||||
|
|
||||||
|
const originals = photos.filter((source) => {
|
||||||
|
if (Array.isArray(source)) {
|
||||||
|
return !source.some(sourceX => duplicateLookup.has((prop && sourceX[prop]) || (sourceX.src && sourceX)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return !duplicateLookup.has((prop && source[prop]) || (source.src && source));
|
||||||
|
});
|
||||||
|
|
||||||
|
return [duplicates, originals];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractPhoto(source) {
|
||||||
|
const res = await bhttp.get(source.src);
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const { q } = ex(res.body.toString());
|
||||||
|
|
||||||
|
return source.extract(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchPhoto(photoUrl, index, label, attempt = 1) {
|
||||||
|
if (photoUrl.src && photoUrl.extract) {
|
||||||
|
// source links to page containing a (presumably) tokenized photo
|
||||||
|
const photo = await extractPhoto(photoUrl);
|
||||||
|
|
||||||
|
return fetchPhoto(photo, index, label);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(photoUrl)) {
|
||||||
|
return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
|
||||||
|
const photo = await fetchPhoto(url, index, label);
|
||||||
|
|
||||||
|
if (photo) {
|
||||||
|
return photo;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Photo not available');
|
||||||
|
}), Promise.reject(new Error()));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { pathname } = new URL(photoUrl);
|
||||||
|
const res = await bhttp.get(photoUrl);
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
const mimetype = mime.getType(pathname);
|
||||||
|
const extension = mime.getExtension(mimetype);
|
||||||
|
const hash = getHash(res.body);
|
||||||
|
const entropy = await getEntropy(res.body);
|
||||||
|
|
||||||
|
return {
|
||||||
|
photo: res.body,
|
||||||
|
mimetype,
|
||||||
|
extension,
|
||||||
|
hash,
|
||||||
|
entropy,
|
||||||
|
source: photoUrl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Response ${res.statusCode} not OK`);
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`Failed attempt ${attempt}/3 to fetch photo ${index + 1} for ${label} (${photoUrl}): ${error}`);
|
||||||
|
|
||||||
|
if (attempt < 3) {
|
||||||
|
await Promise.delay(5000);
|
||||||
|
return fetchPhoto(photoUrl, index, label, attempt + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function savePhotos(files, {
|
||||||
|
domain = 'release',
|
||||||
|
subpath,
|
||||||
|
role = 'photo',
|
||||||
|
naming = 'index',
|
||||||
|
}) {
|
||||||
|
return Promise.map(files, async (file, index) => {
|
||||||
|
try {
|
||||||
|
const timestamp = new Date().getTime();
|
||||||
|
const thumbnail = await createThumbnail(file.photo);
|
||||||
|
|
||||||
|
const filename = naming === 'index'
|
||||||
|
? `${file.role || role}${index + 1}`
|
||||||
|
: `${timestamp + index}`;
|
||||||
|
|
||||||
|
const filepath = path.join(`${domain}s`, subpath, `${filename}.${file.extension}`);
|
||||||
|
const thumbpath = path.join(`${domain}s`, subpath, `${filename}_thumb.${file.extension}`);
|
||||||
|
|
||||||
|
await Promise.all([
|
||||||
|
fs.writeFile(path.join(config.media.path, filepath), file.photo),
|
||||||
|
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...file,
|
||||||
|
thumbnail,
|
||||||
|
filepath,
|
||||||
|
thumbpath,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to store ${domain} ${role} to ${subpath}: ${error.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function storePhotos(photos, {
|
||||||
|
domain = 'release',
|
||||||
|
role = 'photo',
|
||||||
|
naming = 'index',
|
||||||
|
targetId,
|
||||||
|
subpath,
|
||||||
|
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
|
||||||
|
entropyFilter = 2.5, // filter out fallback avatars and other generic clipart
|
||||||
|
}, label) {
|
||||||
|
if (!photos || photos.length === 0) {
|
||||||
|
logger.info(`No ${role}s available for ${label}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pluckedPhotos = pluckPhotos(Array.from(new Set(photos))); // pre-filter link duplicates, limit total per configuration
|
||||||
|
const [sourceDuplicates, sourceOriginals] = await findDuplicates(pluckedPhotos, 'source', null, label);
|
||||||
|
|
||||||
|
logger.info(`Fetching ${sourceOriginals.length} new ${role}s, ${sourceDuplicates.length} already present by source for ${label}`);
|
||||||
|
|
||||||
|
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
|
||||||
|
concurrency: 10,
|
||||||
|
}).filter(photo => photo && photo.entropy > entropyFilter);
|
||||||
|
|
||||||
|
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
|
||||||
|
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
|
||||||
|
|
||||||
|
logger.info(`Saving ${hashOriginals.length} new ${role}s, ${hashDuplicates.length} already present by hash for ${label}`);
|
||||||
|
|
||||||
|
const savedPhotos = await savePhotos(hashOriginals, {
|
||||||
|
domain,
|
||||||
|
role,
|
||||||
|
targetId,
|
||||||
|
subpath,
|
||||||
|
naming,
|
||||||
|
});
|
||||||
|
|
||||||
|
const curatedPhotoEntries = curatePhotoEntries(savedPhotos.filter(Boolean), domain, role, targetId);
|
||||||
|
|
||||||
|
const newPhotos = await knex('media').insert(curatedPhotoEntries).returning('*');
|
||||||
|
const photoEntries = Array.isArray(newPhotos)
|
||||||
|
? [...sourceDuplicates, ...hashDuplicates, ...newPhotos]
|
||||||
|
: [...sourceDuplicates, ...hashDuplicates];
|
||||||
|
|
||||||
|
const photoAssociations = photoEntries
|
||||||
|
.map(photoEntry => ({
|
||||||
|
[`${domain}_id`]: targetId,
|
||||||
|
media_id: photoEntry.id,
|
||||||
|
}));
|
||||||
|
|
||||||
|
if (primaryRole) {
|
||||||
|
// store one photo as a 'primary' photo, such as an avatar or cover
|
||||||
|
const primaryPhoto = await knex(`${domain}s_${primaryRole}s`)
|
||||||
|
.where(`${domain}_id`, targetId)
|
||||||
|
.first();
|
||||||
|
|
||||||
|
if (primaryPhoto) {
|
||||||
|
const remainingAssociations = photoAssociations.filter(association => association.media_id !== primaryPhoto.media_id);
|
||||||
|
|
||||||
|
await upsert(`${domain}s_${role}s`, remainingAssociations, [`${domain}_id`, 'media_id']);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.all([
|
||||||
|
upsert(`${domain}s_${primaryRole}s`, photoAssociations.slice(0, 1), [`${domain}_id`, 'media_id']),
|
||||||
|
upsert(`${domain}s_${role}s`, photoAssociations.slice(1), [`${domain}_id`, 'media_id']),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await upsert(`${domain}s_${role}s`, photoAssociations, [`${domain}_id`, 'media_id']);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
async function storeReleasePhotos(releases, label) {
|
||||||
|
const sources = releases.map(release => pluckPhotos(release.photos)).flat();
|
||||||
|
const uniqueSources = Array.from(new Set(sources));
|
||||||
|
|
||||||
|
const [sourceDuplicates, sourceOriginals] = await findDuplicates(uniqueSources, 'source', null, label);
|
||||||
|
|
||||||
|
const metaFiles = await Promise.map(
|
||||||
|
sourceOriginals,
|
||||||
|
async (photoUrl, index) => fetchPhoto(photoUrl, index, label),
|
||||||
|
{ concurrency: 10 },
|
||||||
|
)
|
||||||
|
.filter(photo => photo);
|
||||||
|
|
||||||
|
const hashUniques = Object.values(metaFiles.reduce((acc, file) => {
|
||||||
|
if (!acc[file.hash]) acc[file.hash] = file;
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {}));
|
||||||
|
|
||||||
|
const [hashDuplicates, hashOriginals] = await findDuplicates(hashUniques, 'hash', 'hash', label);
|
||||||
|
|
||||||
|
const sourceHashes = metaFiles.concat(sourceDuplicates).reduce((acc, file) => {
|
||||||
|
acc[file.source] = file.hash;
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const associations = releases.map(release => release.photos.map(source => [release.id, sourceHashes[source]])).flat();
|
||||||
|
|
||||||
|
console.log(associations);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
async function storeTrailer(trailers, {
|
||||||
|
domain = 'releases',
|
||||||
|
role = 'trailer',
|
||||||
|
targetId,
|
||||||
|
subpath,
|
||||||
|
}, label) {
|
||||||
|
// support scrapers supplying multiple qualities
|
||||||
|
const trailer = Array.isArray(trailers)
|
||||||
|
? trailers.find(trailerX => config.media.trailerQuality.includes(trailerX.quality)) || trailers[0]
|
||||||
|
: trailers;
|
||||||
|
|
||||||
|
if (!trailer || !trailer.src) {
|
||||||
|
logger.info(`No ${role} available for ${label}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [sourceDuplicates, sourceOriginals] = await findDuplicates([trailer], 'source', 'src', label);
|
||||||
|
|
||||||
|
const metaFiles = await Promise.map(sourceOriginals, async (trailerX) => {
|
||||||
|
const { pathname } = new URL(trailerX.src);
|
||||||
|
const mimetype = trailerX.type || mime.getType(pathname);
|
||||||
|
|
||||||
|
const res = await bhttp.get(trailerX.src);
|
||||||
|
const hash = getHash(res.body);
|
||||||
|
const filepath = path.join(domain, subpath, `${role}${trailerX.quality ? `_${trailerX.quality}` : ''}.${mime.getExtension(mimetype)}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
trailer: res.body,
|
||||||
|
path: filepath,
|
||||||
|
mime: mimetype,
|
||||||
|
source: trailerX.src,
|
||||||
|
quality: trailerX.quality || null,
|
||||||
|
hash,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
|
||||||
|
|
||||||
|
const newTrailers = await knex('media')
|
||||||
|
.insert(hashOriginals.map(trailerX => ({
|
||||||
|
path: trailerX.path,
|
||||||
|
mime: trailerX.mime,
|
||||||
|
source: trailerX.source,
|
||||||
|
quality: trailerX.quality,
|
||||||
|
hash: trailerX.hash,
|
||||||
|
type: role,
|
||||||
|
})))
|
||||||
|
.returning('*');
|
||||||
|
|
||||||
|
await Promise.all(hashOriginals.map(trailerX => fs.writeFile(path.join(config.media.path, trailerX.path), trailerX.trailer)));
|
||||||
|
|
||||||
|
const trailerEntries = Array.isArray(newTrailers)
|
||||||
|
? [...sourceDuplicates, ...hashDuplicates, ...newTrailers]
|
||||||
|
: [...sourceDuplicates, ...hashDuplicates];
|
||||||
|
|
||||||
|
await upsert(`releases_${role}s`, trailerEntries.map(trailerEntry => ({
|
||||||
|
release_id: targetId,
|
||||||
|
media_id: trailerEntry.id,
|
||||||
|
})), ['release_id', 'media_id']);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
createMediaDirectory,
|
||||||
|
storePhotos,
|
||||||
|
// storeReleasePhotos,
|
||||||
|
storeTrailer,
|
||||||
|
};
|
|
@ -10,12 +10,20 @@ const argv = require('./argv');
|
||||||
const whereOr = require('./utils/where-or');
|
const whereOr = require('./utils/where-or');
|
||||||
const { associateTags } = require('./tags');
|
const { associateTags } = require('./tags');
|
||||||
const { associateActors, scrapeBasicActors } = require('./actors');
|
const { associateActors, scrapeBasicActors } = require('./actors');
|
||||||
|
/*
|
||||||
const {
|
const {
|
||||||
createMediaDirectory,
|
createMediaDirectory,
|
||||||
storePhotos,
|
storePhotos,
|
||||||
// storeReleasePhotos,
|
// storeReleasePhotos,
|
||||||
storeTrailer,
|
storeTrailer,
|
||||||
|
storeReleaseMedia,
|
||||||
} = require('./media');
|
} = require('./media');
|
||||||
|
*/
|
||||||
|
const {
|
||||||
|
createMediaDirectory,
|
||||||
|
storePhotos,
|
||||||
|
storeTrailer,
|
||||||
|
} = require('./media_legacy');
|
||||||
const { fetchSites, findSiteByUrl } = require('./sites');
|
const { fetchSites, findSiteByUrl } = require('./sites');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
|
|
||||||
|
@ -331,6 +339,8 @@ function accumulateMovies(releases) {
|
||||||
async function storeReleaseAssets(releases) {
|
async function storeReleaseAssets(releases) {
|
||||||
// await storeReleasePhotos(releases);
|
// await storeReleasePhotos(releases);
|
||||||
|
|
||||||
|
// return storeReleaseMedia(releases);
|
||||||
|
|
||||||
await Promise.map(releases, async (release) => {
|
await Promise.map(releases, async (release) => {
|
||||||
const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`;
|
const subpath = `${release.site.network.slug}/${release.site.slug}/${release.id}/`;
|
||||||
const identifier = `"${release.title}" (${release.id})`;
|
const identifier = `"${release.title}" (${release.id})`;
|
||||||
|
|
|
@ -8,7 +8,7 @@ function scrapeLatest(html, site) {
|
||||||
const { document } = new JSDOM(html).window;
|
const { document } = new JSDOM(html).window;
|
||||||
const { origin } = new URL(site.url);
|
const { origin } = new URL(site.url);
|
||||||
|
|
||||||
const videos = document.querySelectorAll('.video-releases-list').slice(-1)[0];
|
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||||
|
|
||||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||||
const release = { site };
|
const release = { site };
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const Promise = require('bluebird');
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const fs = require('fs-extra');
|
||||||
|
const knex = require('../knex');
|
||||||
|
|
||||||
|
async function init() {
|
||||||
|
const sites = await knex('sites')
|
||||||
|
.select('networks.name', 'sites.slug')
|
||||||
|
.join('networks', 'networks.id', 'sites.network_id')
|
||||||
|
.where('networks.slug', 'score');
|
||||||
|
|
||||||
|
await Promise.map(sites, async (site) => {
|
||||||
|
const url = `https://cdn77.scoreuniverse.com/${site.slug}/images/logo.png`;
|
||||||
|
|
||||||
|
console.log(url);
|
||||||
|
|
||||||
|
const res = await bhttp.get(url, {
|
||||||
|
responseTimeout: 5000,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.statusCode === 200) {
|
||||||
|
console.log(`Saving logo for ${site.slug}`);
|
||||||
|
|
||||||
|
await fs.writeFile(`./score/${site.slug}.png`, res.body);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`No logo found for ${site.slug}`);
|
||||||
|
}, {
|
||||||
|
concurrency: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
knex.destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
init();
|