forked from DebaucheryLibrarian/traxxx
Added fake data and Markov experiments.
This commit is contained in:
@@ -1,11 +1,16 @@
|
||||
const txtgen = require('txtgen');
|
||||
const Promise = require('bluebird');
|
||||
const casual = require('casual');
|
||||
const fs = require('fs-extra');
|
||||
// const Markov = require('markov-strings').default;
|
||||
const nanoid = require('nanoid');
|
||||
|
||||
const capitalize = require('../src/utils/capitalize');
|
||||
const slugify = require('../src/utils/slugify');
|
||||
const chunk = require('../src/utils/chunk');
|
||||
|
||||
async function updateReleasesSearch(releaseIds, knex) {
|
||||
const n = 100000;
|
||||
|
||||
async function updateReleasesSearch(knex) {
|
||||
const documents = await knex.raw(`
|
||||
SELECT
|
||||
releases.id as release_id,
|
||||
@@ -19,10 +24,10 @@ async function updateReleasesSearch(releaseIds, knex) {
|
||||
) as document
|
||||
FROM releases
|
||||
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
|
||||
JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||
JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
|
||||
LEFT JOIN sites ON releases.site_id = sites.id
|
||||
LEFT JOIN actors ON local_actors.actor_id = actors.id
|
||||
JOIN tags ON local_tags.tag_id = tags.id
|
||||
LEFT JOIN tags ON local_tags.tag_id = tags.id
|
||||
GROUP BY releases.id, sites.name, sites.slug;
|
||||
`);
|
||||
|
||||
@@ -32,14 +37,60 @@ async function updateReleasesSearch(releaseIds, knex) {
|
||||
|
||||
exports.seed = async knex => Promise.resolve()
|
||||
.then(async () => {
|
||||
const source = await fs.readFile('./assets/titles/titles', 'utf8');
|
||||
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
|
||||
|
||||
/*
|
||||
const markov = new Markov(titles, {
|
||||
maxLength: 100,
|
||||
minWords: 4,
|
||||
stateSize: 2,
|
||||
});
|
||||
|
||||
markov.buildCorpus();
|
||||
|
||||
const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => {
|
||||
try {
|
||||
const title = await markov.generateAsync({
|
||||
maxTries: 100,
|
||||
prng: Math.random,
|
||||
filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()),
|
||||
});
|
||||
|
||||
const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' ');
|
||||
console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`);
|
||||
console.log(title.refs.map(ref => ref.string));
|
||||
|
||||
return title;
|
||||
} catch (error) {
|
||||
console.log(error.message);
|
||||
|
||||
return null;
|
||||
}
|
||||
}, { concurrency: 10 });
|
||||
|
||||
const results = attempts.filter(Boolean).map(result => result.string);
|
||||
|
||||
console.log(results.join('\n'));
|
||||
|
||||
return results;
|
||||
*/
|
||||
|
||||
return titles;
|
||||
})
|
||||
.then(async (titles) => {
|
||||
const [sites, tags, media] = await Promise.all([
|
||||
knex('sites').select('*'),
|
||||
knex('tags').select('*').where('alias_for', null),
|
||||
knex('media').select('*'),
|
||||
]);
|
||||
|
||||
const releases = Array.from({ length: 1000 }, () => {
|
||||
const title = txtgen.sentence();
|
||||
console.log('sites', sites.length);
|
||||
|
||||
console.time('releases');
|
||||
|
||||
const releases = Array.from({ length: n }, () => {
|
||||
const title = casual.random_element(titles);
|
||||
const site = casual.random_value(sites);
|
||||
|
||||
return {
|
||||
@@ -52,7 +103,10 @@ exports.seed = async knex => Promise.resolve()
|
||||
};
|
||||
});
|
||||
|
||||
const actors = Array.from({ length: 100 }, () => {
|
||||
console.timeEnd('releases');
|
||||
console.time('actors');
|
||||
|
||||
const actors = Array.from({ length: Math.round(n / 20) }, () => {
|
||||
const name = capitalize(casual.full_name);
|
||||
const slug = slugify(name);
|
||||
|
||||
@@ -63,10 +117,14 @@ exports.seed = async knex => Promise.resolve()
|
||||
};
|
||||
});
|
||||
|
||||
console.timeEnd('actors');
|
||||
|
||||
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
|
||||
|
||||
const releaseIds = await knex('releases').insert(releases).returning('id');
|
||||
const actorIds = await knex('actors').insert(uniqueActors).returning('id');
|
||||
const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 });
|
||||
const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 });
|
||||
|
||||
console.log('ids', releaseIds.length, actorIds.length);
|
||||
|
||||
const actorAssociations = releaseIds.map((releaseId) => {
|
||||
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
|
||||
@@ -85,11 +143,11 @@ exports.seed = async knex => Promise.resolve()
|
||||
media_id: casual.random_value(media).id,
|
||||
}));
|
||||
|
||||
await Promise.all([
|
||||
knex('releases_actors').insert(actorAssociations),
|
||||
knex('releases_tags').insert(tagAssociations),
|
||||
knex('releases_posters').insert(posterAssociations),
|
||||
]);
|
||||
console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length);
|
||||
|
||||
await updateReleasesSearch(releaseIds, knex);
|
||||
await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations)));
|
||||
await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations)));
|
||||
await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations)));
|
||||
|
||||
await updateReleasesSearch(knex);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user