Ignoring undefined video entropy.
This commit is contained in:
parent
1f5b935beb
commit
f1f33080f6
10268
assets/titles/titles
10268
assets/titles/titles
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -300,7 +300,7 @@ exports.up = knex => Promise.resolve()
|
|||
table.string('url');
|
||||
table.string('platform');
|
||||
|
||||
table.integer('actor_id', 8)
|
||||
table.integer('actor_id', 12)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('actors');
|
||||
|
@ -369,7 +369,7 @@ exports.up = knex => Promise.resolve()
|
|||
.references('id')
|
||||
.inTable('releases');
|
||||
|
||||
table.integer('actor_id', 8)
|
||||
table.integer('actor_id', 12)
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('actors');
|
||||
|
|
|
@ -6927,14 +6927,6 @@
|
|||
"object-visit": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"markov-strings": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/markov-strings/-/markov-strings-2.1.0.tgz",
|
||||
"integrity": "sha512-bJDOilLc7fUllWmGj0kZ6sJ0vAnP4IOkZ/KfYSduhKmKwW3iljvs8ffn2JsozZysmYvNkLPZLIAeoytfV7Cz9w==",
|
||||
"requires": {
|
||||
"lodash": "^4.17.14"
|
||||
}
|
||||
},
|
||||
"md5.js": {
|
||||
"version": "1.3.5",
|
||||
"resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
"cheerio": "^1.0.0-rc.3",
|
||||
"cli-confirm": "^1.0.1",
|
||||
"config": "^3.2.5",
|
||||
"csv-stringify": "^5.3.6",
|
||||
"dayjs": "^1.8.21",
|
||||
"express": "^4.17.1",
|
||||
"express-promise-router": "^3.0.3",
|
||||
|
@ -91,7 +92,6 @@
|
|||
"knex": "^0.20.10",
|
||||
"knex-migrate": "^1.7.4",
|
||||
"longjohn": "^0.2.12",
|
||||
"markov-strings": "^2.1.0",
|
||||
"mime": "^2.4.4",
|
||||
"moment": "^2.24.0",
|
||||
"nanoid": "^2.1.11",
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
const Promise = require('bluebird');
|
||||
const casual = require('casual');
|
||||
const fs = require('fs-extra');
|
||||
// const Markov = require('markov-strings').default;
|
||||
const nanoid = require('nanoid');
|
||||
const stringify = Promise.promisify(require('csv-stringify'));
|
||||
const path = require('path');
|
||||
|
||||
const capitalize = require('../src/utils/capitalize');
|
||||
const slugify = require('../src/utils/slugify');
|
||||
const chunk = require('../src/utils/chunk');
|
||||
|
||||
const n = 100000;
|
||||
|
||||
|
@ -40,42 +40,6 @@ exports.seed = async knex => Promise.resolve()
|
|||
const source = await fs.readFile('./assets/titles/titles', 'utf8');
|
||||
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
|
||||
|
||||
/*
|
||||
const markov = new Markov(titles, {
|
||||
maxLength: 100,
|
||||
minWords: 4,
|
||||
stateSize: 2,
|
||||
});
|
||||
|
||||
markov.buildCorpus();
|
||||
|
||||
const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => {
|
||||
try {
|
||||
const title = await markov.generateAsync({
|
||||
maxTries: 100,
|
||||
prng: Math.random,
|
||||
filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()),
|
||||
});
|
||||
|
||||
const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' ');
|
||||
console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`);
|
||||
console.log(title.refs.map(ref => ref.string));
|
||||
|
||||
return title;
|
||||
} catch (error) {
|
||||
console.log(error.message);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, { concurrency: 10 });
|
||||
|
||||
const results = attempts.filter(Boolean).map(result => result.string);
|
||||
|
||||
console.log(results.join('\n'));
|
||||
|
||||
return results;
|
||||
*/
|
||||
|
||||
return titles;
|
||||
})
|
||||
.then(async (titles) => {
|
||||
|
@ -86,7 +50,6 @@ exports.seed = async knex => Promise.resolve()
|
|||
]);
|
||||
|
||||
console.log('sites', sites.length);
|
||||
|
||||
console.time('releases');
|
||||
|
||||
const releases = Array.from({ length: n }, () => {
|
||||
|
@ -98,7 +61,7 @@ exports.seed = async knex => Promise.resolve()
|
|||
title,
|
||||
slug: slugify(title, { limit: 50 }),
|
||||
site_id: site.id,
|
||||
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000),
|
||||
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000).toISOString(),
|
||||
batch: 'dummy',
|
||||
};
|
||||
});
|
||||
|
@ -121,10 +84,12 @@ exports.seed = async knex => Promise.resolve()
|
|||
|
||||
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
|
||||
|
||||
const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 });
|
||||
const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 });
|
||||
console.log('unique actors', uniqueActors.length);
|
||||
|
||||
console.log('ids', releaseIds.length, actorIds.length);
|
||||
const releaseIds = releases.map((release, index) => index + 1);
|
||||
const actorIds = uniqueActors.map((actor, index) => index + 1);
|
||||
|
||||
console.log('ids', releases.length, actorIds.length);
|
||||
|
||||
const actorAssociations = releaseIds.map((releaseId) => {
|
||||
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
|
||||
|
@ -143,11 +108,29 @@ exports.seed = async knex => Promise.resolve()
|
|||
media_id: casual.random_value(media).id,
|
||||
}));
|
||||
|
||||
console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length);
|
||||
const [releasesCsv, actorsCsv, releaseActorsCsv, releaseTagsCsv, releasePostersCsv] = await Promise.all([
|
||||
stringify(releases, { headers: true }),
|
||||
stringify(actors, { headers: true }),
|
||||
stringify(actorAssociations, { headers: true }),
|
||||
stringify(tagAssociations, { headers: true }),
|
||||
stringify(posterAssociations, { headers: true }),
|
||||
]);
|
||||
|
||||
await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations)));
|
||||
await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations)));
|
||||
await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations)));
|
||||
const releasesPath = path.join('/tmp', 'releases.csv');
|
||||
const actorsPath = path.join('/tmp', 'actors.csv');
|
||||
const releaseActorsPath = path.join('/tmp', 'releases_actors.csv');
|
||||
const releaseTagsPath = path.join('/tmp', 'releases_tags.csv');
|
||||
const releasePostersPath = path.join('/tmp', 'releases_posters.csv');
|
||||
|
||||
await updateReleasesSearch(knex);
|
||||
await Promise.all([
|
||||
fs.writeFile(releasesPath, releasesCsv),
|
||||
fs.writeFile(actorsPath, actorsCsv),
|
||||
fs.writeFile(releaseActorsPath, releaseActorsCsv),
|
||||
fs.writeFile(releaseTagsPath, releaseTagsCsv),
|
||||
fs.writeFile(releasePostersPath, releasePostersCsv),
|
||||
]);
|
||||
|
||||
// console.log(releasesCsv);
|
||||
|
||||
// await updateReleasesSearch(knex);
|
||||
});
|
||||
|
|
|
@ -140,10 +140,10 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
|||
mimetype,
|
||||
extension,
|
||||
hash,
|
||||
entropy,
|
||||
size,
|
||||
width,
|
||||
height,
|
||||
entropy: entropy || null,
|
||||
size: size || null,
|
||||
width: width || null,
|
||||
height: height || null,
|
||||
quality: source.quality || null,
|
||||
source: originalSource?.src || originalSource || source.src || source,
|
||||
scraper: source.scraper,
|
||||
|
|
|
@ -391,7 +391,7 @@ async function updateReleasesSearch(releaseIds) {
|
|||
GROUP BY releases.id, sites.name, sites.slug;
|
||||
`, [releaseIds]);
|
||||
|
||||
if (documents.row.length > 0) {
|
||||
if (documents.row?.length > 0) {
|
||||
const query = knex('releases_search').insert(documents.rows).toString();
|
||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||
}
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
'use strict';
|
||||
|
||||
// const Promise = require('bluebird');
|
||||
const Promise = require('bluebird');
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const logger = require('../logger');
|
||||
const logger = require('../logger')(__filename);
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
|
|
Loading…
Reference in New Issue