const Promise = require('bluebird'); const casual = require('casual'); const fs = require('fs-extra'); const nanoid = require('nanoid'); const stringify = Promise.promisify(require('csv-stringify')); const path = require('path'); const capitalize = require('../src/utils/capitalize'); const slugify = require('../src/utils/slugify'); const n = 100000; async function updateReleasesSearch(knex) { const documents = await knex.raw(` SELECT releases.id as release_id, to_tsvector( releases.title || ' ' || sites.name || ' ' || sites.slug || ' ' || replace(CAST(releases.date AS VARCHAR), '-', ' ') || ' ' || string_agg(actors.name, ' ') || ' ' || string_agg(tags.name, ' ') ) as document FROM releases LEFT JOIN releases_actors AS local_actors ON local_actors.release_id = releases.id LEFT JOIN releases_tags AS local_tags ON local_tags.release_id = releases.id LEFT JOIN sites ON releases.site_id = sites.id LEFT JOIN actors ON local_actors.actor_id = actors.id LEFT JOIN tags ON local_tags.tag_id = tags.id GROUP BY releases.id, sites.name, sites.slug; `); const query = knex('releases_search').insert(documents.rows).toString(); return knex.raw(`${query} ON CONFLICT (release_id) DO UPDATE SET document = EXCLUDED.document`); } exports.seed = async knex => Promise.resolve() .then(async () => { const source = await fs.readFile('./assets/titles/titles', 'utf8'); const titles = source.split('\n').slice(0, -1).map(title => title.trim()).filter(Boolean); return titles; }) .then(async (titles) => { const [sites, tags, media] = await Promise.all([ knex('sites').select('*'), knex('tags').select('*').where('alias_for', null), knex('media').select('*'), ]); console.log('sites', sites.length); console.time('releases'); const releases = Array.from({ length: n }, () => { const title = casual.random_element(titles); const site = casual.random_value(sites); return { entry_id: nanoid(), title, slug: slugify(title, { limit: 50 }), site_id: site.id, date: new Date(Math.random() * (new Date().getTime() - 1500000000000) + 1500000000000).toISOString(), batch: 'dummy', }; }); console.timeEnd('releases'); console.time('actors'); const actors = Array.from({ length: Math.round(n / 20) }, () => { const name = capitalize(casual.full_name); const slug = slugify(name); return { name, slug, gender: casual.random_element(['male', 'female']), }; }); console.timeEnd('actors'); const uniqueActors = Object.values(actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor }), {})); console.log('unique actors', uniqueActors.length); const releaseIds = releases.map((release, index) => index + 1); const actorIds = uniqueActors.map((actor, index) => index + 1); console.log('ids', releases.length, actorIds.length); const actorAssociations = releaseIds.map((releaseId) => { const releaseActorIds = Array.from({ length: Math.floor(Math.random() * 3) + 1 }, () => casual.random_value(actorIds)); return Array.from(new Set(releaseActorIds)).map(actorId => ({ release_id: releaseId, actor_id: actorId })); }).flat(); const tagAssociations = releaseIds.map((releaseId) => { const releaseTags = Array.from({ length: Math.floor(Math.random() * 20) }, () => casual.random_value(tags)); return Array.from(new Set(releaseTags)).map(tag => ({ release_id: releaseId, tag_id: tag.id })); }).flat(); const posterAssociations = releaseIds.map(releaseId => ({ release_id: releaseId, media_id: casual.random_value(media).id, })); const [releasesCsv, actorsCsv, releaseActorsCsv, releaseTagsCsv, releasePostersCsv] = await Promise.all([ stringify(releases, { headers: true }), stringify(actors, { headers: true }), stringify(actorAssociations, { headers: true }), stringify(tagAssociations, { headers: true }), stringify(posterAssociations, { headers: true }), ]); const releasesPath = path.join('/tmp', 'releases.csv'); const actorsPath = path.join('/tmp', 'actors.csv'); const releaseActorsPath = path.join('/tmp', 'releases_actors.csv'); const releaseTagsPath = path.join('/tmp', 'releases_tags.csv'); const releasePostersPath = path.join('/tmp', 'releases_posters.csv'); await Promise.all([ fs.writeFile(releasesPath, releasesCsv), fs.writeFile(actorsPath, actorsCsv), fs.writeFile(releaseActorsPath, releaseActorsCsv), fs.writeFile(releaseTagsPath, releaseTagsCsv), fs.writeFile(releasePostersPath, releasePostersCsv), ]); // console.log(releasesCsv); // await updateReleasesSearch(knex); });