'use strict'; const config = require('config'); const logger = require('./logger')(__filename); const argv = require('./argv'); const knex = require('./knex'); const whereOr = require('./utils/where-or'); async function curateSite(site, includeParameters = false) { const tags = await knex('sites_tags') .select('tags.*', 'sites_tags.inherit') .where('site_id', site.id) .join('tags', 'tags.id', 'sites_tags.tag_id'); const parameters = JSON.parse(site.parameters); return { id: site.id, name: site.name, url: site.url, description: site.description, slug: site.slug, tags, independent: !!parameters && parameters.independent, parameters: includeParameters ? parameters : null, network: { id: site.network_id, name: site.network_name, description: site.network_description, slug: site.network_slug, url: site.network_url, parameters: includeParameters ? JSON.parse(site.network_parameters) : null, }, }; } function curateSites(sites, includeParameters) { return Promise.all(sites.map(async site => curateSite(site, includeParameters))); } function destructConfigNetworks(networks = []) { return networks.reduce((acc, network) => { if (Array.isArray(network)) { // network specifies sites return { ...acc, sites: [...acc.sites, ...network[1]], }; } return { ...acc, networks: [...acc.networks, network], }; }, { networks: [], sites: [], }); } async function findSiteByUrl(url) { const { origin, hostname, pathname } = new URL(url); // const domain = hostname.replace(/www.|tour./, ''); const dirUrl = `${origin}${pathname.split('/').slice(0, 2).join('/')}`; // allow for sites on URI directory const site = await knex('sites') .leftJoin('networks', 'sites.network_id', 'networks.id') .select( 'sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) .where('sites.url', url) .orWhere('sites.url', origin) .orWhere('sites.url', origin.replace(/www\.|tour\./, '')) .orWhere('sites.url', `https://www.${hostname}`) .orWhere('sites.url', `http://www.${hostname}`) .orWhere('sites.url', dirUrl) // .orWhere('sites.url', 'like', `%${domain}`) .first(); if (site) { const curatedSite = curateSite(site, true); return curatedSite; } return null; } function sitesByNetwork(sites) { const networks = sites.reduce((acc, site) => { if (acc[site.network.slug]) { acc[site.network.slug].sites = acc[site.network.slug].sites.concat(site); return acc; } acc[site.network.slug] = { ...site.network, sites: [site], }; return acc; }, {}); return Object.values(networks); } async function fetchSitesFromArgv() { const rawSites = await knex('sites') .select( 'sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) .where('sites.scrape', true) .whereIn('sites.slug', argv.sites || []) .orWhereIn('networks.slug', argv.networks || []) .leftJoin('networks', 'sites.network_id', 'networks.id'); const curatedSites = await curateSites(rawSites, true); logger.info(`Found ${curatedSites.length} sites in database`); return sitesByNetwork(curatedSites); } async function fetchSitesFromConfig() { const included = destructConfigNetworks(config.include); const excluded = destructConfigNetworks(config.exclude); const rawSites = await knex('sites') .select( 'sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) .leftJoin('networks', 'sites.network_id', 'networks.id') .where('sites.scrape', true) .where((builder) => { if (config.include) { builder .whereIn('sites.slug', included.sites) .orWhereIn('networks.slug', included.networks); } }) .whereNot((builder) => { builder .whereIn('sites.slug', excluded.sites) .orWhereIn('networks.slug', excluded.networks); }); const curatedSites = await curateSites(rawSites, true); logger.info(`Found ${curatedSites.length} sites in database`); return sitesByNetwork(curatedSites); } async function fetchIncludedSites() { if (argv.networks || argv.sites) { return fetchSitesFromArgv(); } return fetchSitesFromConfig(); } async function fetchSites(queryObject) { const sites = await knex('sites') .where(builder => whereOr(queryObject, 'sites', builder)) .select( 'sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters', ) .leftJoin('networks', 'sites.network_id', 'networks.id') .limit(100); return curateSites(sites); } async function fetchSitesFromReleases() { const sites = await knex('releases') .select('site_id', '') .leftJoin('sites', 'sites.id', 'releases.site_id') .groupBy('sites.id') .limit(100); return curateSites(sites); } module.exports = { curateSites, fetchIncludedSites, fetchSites, fetchSitesFromConfig, fetchSitesFromArgv, fetchSitesFromReleases, findSiteByUrl, };