Major refactor, cleand up site scrape module, fixed and cleaned up release scrape module. Removed old CLI code

This commit is contained in:
2019-11-16 03:33:36 +01:00
parent b07f88d023
commit b489c8fc33
35 changed files with 595 additions and 847 deletions

View File

@@ -1,9 +1,12 @@
'use strict';
const config = require('config');
const argv = require('./argv');
const knex = require('./knex');
const whereOr = require('./utils/where-or');
async function curateSite(site) {
async function curateSite(site, includeParameters = false) {
const parameters = JSON.parse(site.parameters);
return {
@@ -13,12 +16,14 @@ async function curateSite(site) {
description: site.description,
slug: site.slug,
independent: !!parameters && parameters.independent,
parameters: includeParameters ? JSON.parse(site.parameters) : null,
network: {
id: site.network_id,
name: site.network_name,
description: site.network_description,
slug: site.network_slug,
url: site.network_url,
parameters: includeParameters ? JSON.parse(site.network_parameters) : null,
},
};
}
@@ -27,12 +32,85 @@ function curateSites(sites) {
return Promise.all(sites.map(async site => curateSite(site)));
}
function destructConfigNetworks(networks) {
return networks.reduce((acc, network) => {
if (Array.isArray(network)) {
// network specifies sites
return {
...acc,
sites: [...acc.sites, ...network[1]],
};
}
return {
...acc,
networks: [...acc.networks, network],
};
}, {
networks: [],
sites: [],
});
}
async function findSiteByUrl(url) {
const { hostname } = new URL(url);
const domain = hostname.replace(/^www./, '');
const site = await knex('sites')
.leftJoin('networks', 'sites.network_id', 'networks.id')
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.where('sites.url', 'like', `%${domain}`)
.first();
if (site) {
return curateSite(site, true);
}
return null;
}
async function fetchSitesFromArgv() {
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name', 'networks.slug as network_slug', 'networks.parameters as network_parameters')
.whereIn('sites.slug', argv.sites || [])
.orWhereIn('networks.slug', argv.networks || [])
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
}
async function fetchSitesFromConfig() {
const included = destructConfigNetworks(config.include);
const networks = await knex('networks').select('id').whereIn('slug', included.networks || []);
const networkIds = networks.map(network => network.id);
const rawSites = await knex('sites')
.select('sites.*', 'networks.name as network_name')
.whereIn('sites.slug', included.sites || [])
.orWhereIn('network_id', networkIds)
.leftJoin('networks', 'sites.network_id', 'networks.id');
return curateSites(rawSites, true);
}
async function fetchIncludedSites() {
if (argv.networks || argv.sites) {
return fetchSitesFromArgv();
}
return fetchSitesFromConfig();
}
async function fetchSites(queryObject) {
const sites = await knex('sites')
.where(builder => whereOr(queryObject, 'sites', builder))
.select(
'sites.*',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as networks_description',
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
)
.leftJoin('networks', 'sites.network_id', 'networks.id')
.limit(100);
@@ -51,6 +129,11 @@ async function fetchSitesFromReleases() {
}
module.exports = {
curateSites,
fetchIncludedSites,
fetchSites,
fetchSitesFromConfig,
fetchSitesFromArgv,
fetchSitesFromReleases,
findSiteByUrl,
};