traxxx/seeds/ignore/99_fake.js

154 lines
5.9 KiB
JavaScript
Raw Normal View History

const Promise = require('bluebird');
2020-02-26 23:38:11 +00:00
const casual = require('casual');
const fs = require('fs-extra');
// const Markov = require('markov-strings').default;
2020-02-26 23:38:11 +00:00
const nanoid = require('nanoid');
const capitalize = require('../src/utils/capitalize');
const slugify = require('../src/utils/slugify');
const chunk = require('../src/utils/chunk');
2020-02-26 23:38:11 +00:00
const n = 100000;
async function updateReleasesSearch(knex) {
2020-02-26 23:38:11 +00:00
const documents = await knex.raw(`
SELECT
releases.id as release_id,
to_tsvector(
releases.title || ' ' ||
sites.name || ' ' ||
sites.slug || ' ' ||
replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' ||
string_agg(actors.name, ' ') || ' ' ||
string_agg(tags.name, ' ')
) as document
FROM releases
LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id
LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id
LEFT JOIN sites ON releases.site_id = sites.id
2020-02-26 23:38:11 +00:00
LEFT JOIN actors ON local_actors.actor_id = actors.id
LEFT JOIN tags ON local_tags.tag_id = tags.id
2020-02-26 23:38:11 +00:00
GROUP BY releases.id, sites.name, sites.slug;
`);
const query = knex('releases_search').insert(documents.rows).toString();
return knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`);
}
exports.seed = async knex => Promise.resolve()
.then(async () => {
const source = await fs.readFile('./assets/titles/titles', 'utf8');
const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean);
/*
const markov = new Markov(titles, {
maxLength: 100,
minWords: 4,
stateSize: 2,
});
markov.buildCorpus();
const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => {
try {
const title = await markov.generateAsync({
maxTries: 100,
prng: Math.random,
filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()),
});
const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' ');
console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`);
console.log(title.refs.map(ref => ref.string));
return title;
} catch (error) {
console.log(error.message);
return null;
}
}, { concurrency: 10 });
const results = attempts.filter(Boolean).map(result => result.string);
console.log(results.join('\n'));
return results;
*/
return titles;
})
.then(async (titles) => {
2020-02-26 23:38:11 +00:00
const [sites, tags, media] = await Promise.all([
knex('sites').select('*'),
knex('tags').select('*').where('alias_for', null),
knex('media').select('*'),
]);
console.log('sites', sites.length);
console.time('releases');
const releases = Array.from({ length: n }, () => {
const title = casual.random_element(titles);
2020-02-26 23:38:11 +00:00
const site = casual.random_value(sites);
return {
entry_id: nanoid(),
title,
slug: slugify(title, { limit: 50 }),
site_id: site.id,
date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000),
batch: 'dummy',
};
});
console.timeEnd('releases');
console.time('actors');
const actors = Array.from({ length: Math.round(n / 20) }, () => {
2020-02-26 23:38:11 +00:00
const name = capitalize(casual.full_name);
const slug = slugify(name);
return {
name,
slug,
gender: casual.random_element(['male', 'female']),
};
});
console.timeEnd('actors');
2020-02-26 23:38:11 +00:00
const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {}));
const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 });
const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 });
console.log('ids', releaseIds.length, actorIds.length);
2020-02-26 23:38:11 +00:00
const actorAssociations = releaseIds.map((releaseId) => {
const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds));
return Array.from(new Set(releaseActorIds)).map(actorId => ({ release_id: releaseId, actor_id: actorId }));
}).flat();
const tagAssociations = releaseIds.map((releaseId) => {
const releaseTags = Array.from({ length: Math.floor(Math.random() * 20) }, () => casual.random_value(tags));
return Array.from(new Set(releaseTags)).map(tag => ({ release_id: releaseId, tag_id: tag.id }));
}).flat();
const posterAssociations = releaseIds.map(releaseId => ({
release_id: releaseId,
media_id: casual.random_value(media).id,
}));
console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length);
await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations)));
await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations)));
await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations)));
2020-02-26 23:38:11 +00:00
await updateReleasesSearch(knex);
2020-02-26 23:38:11 +00:00
});