const Promise = require('bluebird'); const casual = require('casual'); const fs = require('fs-extra'); // const Markov = require('markov-strings').default; const nanoid = require('nanoid'); const capitalize = require('../src/utils/capitalize'); const slugify = require('../src/utils/slugify'); const chunk = require('../src/utils/chunk'); const n = 100000; async function updateReleasesSearch(knex) { const documents = await knex.raw(` SELECT releases.id as release_id, to_tsvector( releases.title || ' ' || sites.name || ' ' || sites.slug || ' ' || replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' || string_agg(actors.name, ' ') || ' ' || string_agg(tags.name, ' ') ) as document FROM releases LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN sites ON releases.site_id = sites.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN tags ON local_tags.tag_id = tags.id GROUP BY releases.id, sites.name, sites.slug; `); const query = knex('releases_search').insert(documents.rows).toString(); return knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`); } exports.seed = async knex => Promise.resolve() .then(async () => { const source = await fs.readFile('./assets/titles/titles', 'utf8'); const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean); /* const markov = new Markov(titles, { maxLength: 100, minWords: 4, stateSize: 2, }); markov.buildCorpus(); const attempts = await Promise.map(Array.from({ length: n * 2 }), async (value, index) => { try { const title = await markov.generateAsync({ maxTries: 100, prng: Math.random, filter: result => result.score >= 10 && result.refs.length > 3 && !result.refs.map(ref => ref.string.trim()).includes(result.string.trim()), }); const done = Math.round(((index + 1) / (n * 2)) * 100).toString().padStart(3, ' '); console.log(`${done}% Generated title ${index + 1}/${n * 2}: ${title.string}`); console.log(title.refs.map(ref => ref.string)); return title; } catch (error) { console.log(error.message); return null; } }, { concurrency: 10 }); const results = attempts.filter(Boolean).map(result => result.string); console.log(results.join('\n')); return results; */ return titles; }) .then(async (titles) => { const [sites, tags, media] = await Promise.all([ knex('sites').select('*'), knex('tags').select('*').where('alias_for', null), knex('media').select('*'), ]); console.log('sites', sites.length); console.time('releases'); const releases = Array.from({ length: n }, () => { const title = casual.random_element(titles); const site = casual.random_value(sites); return { entry_id: nanoid(), title, slug: slugify(title, { limit: 50 }), site_id: site.id, date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000), batch: 'dummy', }; }); console.timeEnd('releases'); console.time('actors'); const actors = Array.from({ length: Math.round(n / 20) }, () => { const name = capitalize(casual.full_name); const slug = slugify(name); return { name, slug, gender: casual.random_element(['male', 'female']), }; }); console.timeEnd('actors'); const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {})); const releaseIds = await Promise.map(chunk(releases, 100), async releasesChunk => knex('releases').insert(releasesChunk).returning('id'), { concurrency: 1 }); const actorIds = await Promise.map(chunk(uniqueActors, 100), async actorsChunk => knex('actors').insert(actorsChunk).returning('id'), { concurrency: 1 }); console.log('ids', releaseIds.length, actorIds.length); const actorAssociations = releaseIds.map((releaseId) => { const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds)); return Array.from(new Set(releaseActorIds)).map(actorId => ({ release_id: releaseId, actor_id: actorId })); }).flat(); const tagAssociations = releaseIds.map((releaseId) => { const releaseTags = Array.from({ length: Math.floor(Math.random() * 20) }, () => casual.random_value(tags)); return Array.from(new Set(releaseTags)).map(tag => ({ release_id: releaseId, tag_id: tag.id })); }).flat(); const posterAssociations = releaseIds.map(releaseId => ({ release_id: releaseId, media_id: casual.random_value(media).id, })); console.log('associations', actorAssociations.length, tagAssociations.length, posterAssociations.length); await Promise.all(chunk(actorAssociations, 10).map(async associations => knex('releases_actors').insert(associations))); await Promise.all(chunk(tagAssociations, 10).map(async associations => knex('releases_tags').insert(associations))); await Promise.all(chunk(posterAssociations, 10).map(async associations => knex('releases_posters').insert(associations))); await updateReleasesSearch(knex); });