Ignoring undefined video entropy.

This commit is contained in:
ThePendulum 2020-02-28 03:56:58 +01:00
parent 1f5b935beb
commit f1f33080f6
9 changed files with 9436 additions and 7545 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -300,7 +300,7 @@ exports.up = knex => Promise.resolve()
table.string('url');
table.string('platform');
table.integer('actor_id', 8)
table.integer('actor_id', 12)
.notNullable()
.references('id')
.inTable('actors');
@ -369,7 +369,7 @@ exports.up = knex => Promise.resolve()
.references('id')
.inTable('releases');
table.integer('actor_id', 8)
table.integer('actor_id', 12)
.notNullable()
.references('id')
.inTable('actors');

8
package-lock.json generated
View File

@ -6927,14 +6927,6 @@
"object-visit": "^1.0.0"
}
},
"markov-strings": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/markov-strings/-/markov-strings-2.1.0.tgz",
"integrity": "sha512-bJDOilLc7fUllWmGj0kZ6sJ0vAnP4IOkZ/KfYSduhKmKwW3iljvs8ffn2JsozZysmYvNkLPZLIAeoytfV7Cz9w==",
"requires": {
"lodash": "^4.17.14"
}
},
"md5.js": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",

View File

@ -79,6 +79,7 @@
"cheerio": "^1.0.0-rc.3",
"cli-confirm": "^1.0.1",
"config": "^3.2.5",
"csv-stringify": "^5.3.6",
"dayjs": "^1.8.21",
"express": "^4.17.1",
"express-promise-router": "^3.0.3",
@ -91,7 +92,6 @@
"knex": "^0.20.10",
"knex-migrate": "^1.7.4",
"longjohn": "^0.2.12",
"markov-strings": "^2.1.0",
"mime": "^2.4.4",
"moment": "^2.24.0",
"nanoid": "^2.1.11",

View File

@ -1,12 +1,12 @@
const Promise = require('bluebird');
const casual = require('casual');
const fs = require('fs-extra');
// const Markov = require('markov-strings').default;
const nanoid = require('nanoid');
const stringify = Promise.promisify(require('csv-stringify'));
const path = require('path');
const capitalize = require('../src/utils/capitalize');
const slugify = require('../src/utils/slugify');
const chunk = require('../src/utils/chunk');
const n = 100000;
@ -40,42 +40,6 @@ exports.seed = async knex => Promise.resolve()
const source = await fs.readFile('./assets/titles/titles', 'utf8');
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
/*
const markov = new Markov(titles, {
maxLength: 100,
minWords: 4,
stateSize: 2,
});
markov.buildCorpus();
const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => {
try {
const title = await markov.generateAsync({
maxTries: 100,
prng: Math.random,
filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()),
});
const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' ');
console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`);
console.log(title.refs.map(ref => ref.string));
return title;
} catch (error) {
console.log(error.message);
return null;
}
}, { concurrency: 10 });
const results = attempts.filter(Boolean).map(result => result.string);
console.log(results.join('\n'));
return results;
*/
return titles;
})
.then(async (titles) => {
@ -86,7 +50,6 @@ exports.seed = async knex => Promise.resolve()
]);
console.log('sites', sites.length);
console.time('releases');
const releases = Array.from({ length: n }, () => {
@ -98,7 +61,7 @@ exports.seed = async knex => Promise.resolve()
title,
slug: slugify(title, { limit: 50 }),
site_id: site.id,
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000),
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000).toISOString(),
batch: 'dummy',
};
});
@ -121,10 +84,12 @@ exports.seed = async knex => Promise.resolve()
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 });
const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 });
console.log('unique actors', uniqueActors.length);
console.log('ids', releaseIds.length, actorIds.length);
const releaseIds = releases.map((release, index) => index + 1);
const actorIds = uniqueActors.map((actor, index) => index + 1);
console.log('ids', releases.length, actorIds.length);
const actorAssociations = releaseIds.map((releaseId) => {
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
@ -143,11 +108,29 @@ exports.seed = async knex => Promise.resolve()
media_id: casual.random_value(media).id,
}));
console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length);
const [releasesCsv, actorsCsv, releaseActorsCsv, releaseTagsCsv, releasePostersCsv] = await Promise.all([
stringify(releases, { headers: true }),
stringify(actors, { headers: true }),
stringify(actorAssociations, { headers: true }),
stringify(tagAssociations, { headers: true }),
stringify(posterAssociations, { headers: true }),
]);
await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations)));
await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations)));
await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations)));
const releasesPath = path.join('/tmp', 'releases.csv');
const actorsPath = path.join('/tmp', 'actors.csv');
const releaseActorsPath = path.join('/tmp', 'releases_actors.csv');
const releaseTagsPath = path.join('/tmp', 'releases_tags.csv');
const releasePostersPath = path.join('/tmp', 'releases_posters.csv');
await updateReleasesSearch(knex);
await Promise.all([
fs.writeFile(releasesPath, releasesCsv),
fs.writeFile(actorsPath, actorsCsv),
fs.writeFile(releaseActorsPath, releaseActorsCsv),
fs.writeFile(releaseTagsPath, releaseTagsCsv),
fs.writeFile(releasePostersPath, releasePostersCsv),
]);
// console.log(releasesCsv);
// await updateReleasesSearch(knex);
});

View File

@ -140,10 +140,10 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
mimetype,
extension,
hash,
entropy,
size,
width,
height,
entropy: entropy || null,
size: size || null,
width: width || null,
height: height || null,
quality: source.quality || null,
source: originalSource?.src || originalSource || source.src || source,
scraper: source.scraper,

View File

@ -391,7 +391,7 @@ async function updateReleasesSearch(releaseIds) {
GROUP BY releases.id, sites.name, sites.slug;
`, [releaseIds]);
if (documents.row.length > 0) {
if (documents.row?.length > 0) {
const query = knex('releases_search').insert(documents.rows).toString();
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}

View File

@ -1,12 +1,12 @@
'use strict';
// const Promise = require('bluebird');
const Promise = require('bluebird');
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const logger = require('../logger');
const logger = require('../logger')(__filename);
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');