Ignoring undefined video entropy.
This commit is contained in:
parent
1f5b935beb
commit
f1f33080f6
10268
assets/titles/titles
10268
assets/titles/titles
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -300,7 +300,7 @@ exports.up = knex => Promise.resolve()
|
||||||
table.string('url');
|
table.string('url');
|
||||||
table.string('platform');
|
table.string('platform');
|
||||||
|
|
||||||
table.integer('actor_id', 8)
|
table.integer('actor_id', 12)
|
||||||
.notNullable()
|
.notNullable()
|
||||||
.references('id')
|
.references('id')
|
||||||
.inTable('actors');
|
.inTable('actors');
|
||||||
|
@ -369,7 +369,7 @@ exports.up = knex => Promise.resolve()
|
||||||
.references('id')
|
.references('id')
|
||||||
.inTable('releases');
|
.inTable('releases');
|
||||||
|
|
||||||
table.integer('actor_id', 8)
|
table.integer('actor_id', 12)
|
||||||
.notNullable()
|
.notNullable()
|
||||||
.references('id')
|
.references('id')
|
||||||
.inTable('actors');
|
.inTable('actors');
|
||||||
|
|
|
@ -6927,14 +6927,6 @@
|
||||||
"object-visit": "^1.0.0"
|
"object-visit": "^1.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"markov-strings": {
|
|
||||||
"version": "2.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/markov-strings/-/markov-strings-2.1.0.tgz",
|
|
||||||
"integrity": "sha512-bJDOilLc7fUllWmGj0kZ6sJ0vAnP4IOkZ/KfYSduhKmKwW3iljvs8ffn2JsozZysmYvNkLPZLIAeoytfV7Cz9w==",
|
|
||||||
"requires": {
|
|
||||||
"lodash": "^4.17.14"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"md5.js": {
|
"md5.js": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
"resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz",
|
||||||
|
|
|
@ -79,6 +79,7 @@
|
||||||
"cheerio": "^1.0.0-rc.3",
|
"cheerio": "^1.0.0-rc.3",
|
||||||
"cli-confirm": "^1.0.1",
|
"cli-confirm": "^1.0.1",
|
||||||
"config": "^3.2.5",
|
"config": "^3.2.5",
|
||||||
|
"csv-stringify": "^5.3.6",
|
||||||
"dayjs": "^1.8.21",
|
"dayjs": "^1.8.21",
|
||||||
"express": "^4.17.1",
|
"express": "^4.17.1",
|
||||||
"express-promise-router": "^3.0.3",
|
"express-promise-router": "^3.0.3",
|
||||||
|
@ -91,7 +92,6 @@
|
||||||
"knex": "^0.20.10",
|
"knex": "^0.20.10",
|
||||||
"knex-migrate": "^1.7.4",
|
"knex-migrate": "^1.7.4",
|
||||||
"longjohn": "^0.2.12",
|
"longjohn": "^0.2.12",
|
||||||
"markov-strings": "^2.1.0",
|
|
||||||
"mime": "^2.4.4",
|
"mime": "^2.4.4",
|
||||||
"moment": "^2.24.0",
|
"moment": "^2.24.0",
|
||||||
"nanoid": "^2.1.11",
|
"nanoid": "^2.1.11",
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const casual = require('casual');
|
const casual = require('casual');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
// const Markov = require('markov-strings').default;
|
|
||||||
const nanoid = require('nanoid');
|
const nanoid = require('nanoid');
|
||||||
|
const stringify = Promise.promisify(require('csv-stringify'));
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
const capitalize = require('../src/utils/capitalize');
|
const capitalize = require('../src/utils/capitalize');
|
||||||
const slugify = require('../src/utils/slugify');
|
const slugify = require('../src/utils/slugify');
|
||||||
const chunk = require('../src/utils/chunk');
|
|
||||||
|
|
||||||
const n = 100000;
|
const n = 100000;
|
||||||
|
|
||||||
|
@ -40,42 +40,6 @@ exports.seed = async knex => Promise.resolve()
|
||||||
const source = await fs.readFile('./assets/titles/titles', 'utf8');
|
const source = await fs.readFile('./assets/titles/titles', 'utf8');
|
||||||
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
|
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
|
||||||
|
|
||||||
/*
|
|
||||||
const markov = new Markov(titles, {
|
|
||||||
maxLength: 100,
|
|
||||||
minWords: 4,
|
|
||||||
stateSize: 2,
|
|
||||||
});
|
|
||||||
|
|
||||||
markov.buildCorpus();
|
|
||||||
|
|
||||||
const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => {
|
|
||||||
try {
|
|
||||||
const title = await markov.generateAsync({
|
|
||||||
maxTries: 100,
|
|
||||||
prng: Math.random,
|
|
||||||
filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()),
|
|
||||||
});
|
|
||||||
|
|
||||||
const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' ');
|
|
||||||
console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`);
|
|
||||||
console.log(title.refs.map(ref => ref.string));
|
|
||||||
|
|
||||||
return title;
|
|
||||||
} catch (error) {
|
|
||||||
console.log(error.message);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}, { concurrency: 10 });
|
|
||||||
|
|
||||||
const results = attempts.filter(Boolean).map(result => result.string);
|
|
||||||
|
|
||||||
console.log(results.join('\n'));
|
|
||||||
|
|
||||||
return results;
|
|
||||||
*/
|
|
||||||
|
|
||||||
return titles;
|
return titles;
|
||||||
})
|
})
|
||||||
.then(async (titles) => {
|
.then(async (titles) => {
|
||||||
|
@ -86,7 +50,6 @@ exports.seed = async knex => Promise.resolve()
|
||||||
]);
|
]);
|
||||||
|
|
||||||
console.log('sites', sites.length);
|
console.log('sites', sites.length);
|
||||||
|
|
||||||
console.time('releases');
|
console.time('releases');
|
||||||
|
|
||||||
const releases = Array.from({ length: n }, () => {
|
const releases = Array.from({ length: n }, () => {
|
||||||
|
@ -98,7 +61,7 @@ exports.seed = async knex => Promise.resolve()
|
||||||
title,
|
title,
|
||||||
slug: slugify(title, { limit: 50 }),
|
slug: slugify(title, { limit: 50 }),
|
||||||
site_id: site.id,
|
site_id: site.id,
|
||||||
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000),
|
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000).toISOString(),
|
||||||
batch: 'dummy',
|
batch: 'dummy',
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
@ -121,10 +84,12 @@ exports.seed = async knex => Promise.resolve()
|
||||||
|
|
||||||
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
|
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
|
||||||
|
|
||||||
const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 });
|
console.log('unique actors', uniqueActors.length);
|
||||||
const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 });
|
|
||||||
|
|
||||||
console.log('ids', releaseIds.length, actorIds.length);
|
const releaseIds = releases.map((release, index) => index + 1);
|
||||||
|
const actorIds = uniqueActors.map((actor, index) => index + 1);
|
||||||
|
|
||||||
|
console.log('ids', releases.length, actorIds.length);
|
||||||
|
|
||||||
const actorAssociations = releaseIds.map((releaseId) => {
|
const actorAssociations = releaseIds.map((releaseId) => {
|
||||||
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
|
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
|
||||||
|
@ -143,11 +108,29 @@ exports.seed = async knex => Promise.resolve()
|
||||||
media_id: casual.random_value(media).id,
|
media_id: casual.random_value(media).id,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length);
|
const [releasesCsv, actorsCsv, releaseActorsCsv, releaseTagsCsv, releasePostersCsv] = await Promise.all([
|
||||||
|
stringify(releases, { headers: true }),
|
||||||
|
stringify(actors, { headers: true }),
|
||||||
|
stringify(actorAssociations, { headers: true }),
|
||||||
|
stringify(tagAssociations, { headers: true }),
|
||||||
|
stringify(posterAssociations, { headers: true }),
|
||||||
|
]);
|
||||||
|
|
||||||
await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations)));
|
const releasesPath = path.join('/tmp', 'releases.csv');
|
||||||
await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations)));
|
const actorsPath = path.join('/tmp', 'actors.csv');
|
||||||
await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations)));
|
const releaseActorsPath = path.join('/tmp', 'releases_actors.csv');
|
||||||
|
const releaseTagsPath = path.join('/tmp', 'releases_tags.csv');
|
||||||
|
const releasePostersPath = path.join('/tmp', 'releases_posters.csv');
|
||||||
|
|
||||||
await updateReleasesSearch(knex);
|
await Promise.all([
|
||||||
|
fs.writeFile(releasesPath, releasesCsv),
|
||||||
|
fs.writeFile(actorsPath, actorsCsv),
|
||||||
|
fs.writeFile(releaseActorsPath, releaseActorsCsv),
|
||||||
|
fs.writeFile(releaseTagsPath, releaseTagsCsv),
|
||||||
|
fs.writeFile(releasePostersPath, releasePostersCsv),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// console.log(releasesCsv);
|
||||||
|
|
||||||
|
// await updateReleasesSearch(knex);
|
||||||
});
|
});
|
||||||
|
|
|
@ -140,10 +140,10 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
||||||
mimetype,
|
mimetype,
|
||||||
extension,
|
extension,
|
||||||
hash,
|
hash,
|
||||||
entropy,
|
entropy: entropy || null,
|
||||||
size,
|
size: size || null,
|
||||||
width,
|
width: width || null,
|
||||||
height,
|
height: height || null,
|
||||||
quality: source.quality || null,
|
quality: source.quality || null,
|
||||||
source: originalSource?.src || originalSource || source.src || source,
|
source: originalSource?.src || originalSource || source.src || source,
|
||||||
scraper: source.scraper,
|
scraper: source.scraper,
|
||||||
|
|
|
@ -391,7 +391,7 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
GROUP BY releases.id, sites.name, sites.slug;
|
GROUP BY releases.id, sites.name, sites.slug;
|
||||||
`, [releaseIds]);
|
`, [releaseIds]);
|
||||||
|
|
||||||
if (documents.row.length > 0) {
|
if (documents.row?.length > 0) {
|
||||||
const query = knex('releases_search').insert(documents.rows).toString();
|
const query = knex('releases_search').insert(documents.rows).toString();
|
||||||
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
await knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
// const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const bhttp = require('bhttp');
|
const bhttp = require('bhttp');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const logger = require('../logger');
|
const logger = require('../logger')(__filename);
|
||||||
const { heightToCm } = require('../utils/convert');
|
const { heightToCm } = require('../utils/convert');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue