2019-11-10 03:20:22 +00:00
'use strict' ;
2019-11-16 02:33:36 +00:00
const config = require ( 'config' ) ;
2020-02-08 01:49:39 +00:00
const logger = require ( './logger' ) ( _ _filename ) ;
2019-11-16 02:33:36 +00:00
const argv = require ( './argv' ) ;
2019-11-10 03:20:22 +00:00
const knex = require ( './knex' ) ;
2019-11-12 00:22:20 +00:00
const whereOr = require ( './utils/where-or' ) ;
2019-11-10 03:20:22 +00:00
2019-11-16 02:33:36 +00:00
async function curateSite ( site , includeParameters = false ) {
2020-01-13 22:45:09 +00:00
const tags = await knex ( 'sites_tags' )
. select ( 'tags.*' , 'sites_tags.inherit' )
. where ( 'site_id' , site . id )
. join ( 'tags' , 'tags.id' , 'sites_tags.tag_id' ) ;
2019-11-10 03:20:22 +00:00
return {
id : site . id ,
name : site . name ,
url : site . url ,
description : site . description ,
slug : site . slug ,
2020-01-13 22:45:09 +00:00
tags ,
2020-02-09 18:41:39 +00:00
independent : ! ! site . parameters && site . parameters . independent ,
parameters : includeParameters ? site . parameters : null ,
2019-11-10 03:20:22 +00:00
network : {
2019-11-11 02:20:00 +00:00
id : site . network _id ,
name : site . network _name ,
2019-11-14 04:13:38 +00:00
description : site . network _description ,
2019-11-11 02:20:00 +00:00
slug : site . network _slug ,
url : site . network _url ,
2020-02-09 18:41:39 +00:00
parameters : includeParameters ? site . network _parameters : null ,
2019-11-10 03:20:22 +00:00
} ,
} ;
}
2019-11-16 22:37:33 +00:00
function curateSites ( sites , includeParameters ) {
return Promise . all ( sites . map ( async site => curateSite ( site , includeParameters ) ) ) ;
2019-11-10 03:20:22 +00:00
}
2020-02-01 01:26:00 +00:00
function destructConfigNetworks ( networks = [ ] ) {
2019-11-16 02:33:36 +00:00
return networks . reduce ( ( acc , network ) => {
if ( Array . isArray ( network ) ) {
// network specifies sites
return {
... acc ,
sites : [ ... acc . sites , ... network [ 1 ] ] ,
} ;
}
return {
... acc ,
networks : [ ... acc . networks , network ] ,
} ;
} , {
networks : [ ] ,
sites : [ ] ,
} ) ;
}
async function findSiteByUrl ( url ) {
2020-02-04 02:12:09 +00:00
const { origin , hostname , pathname } = new URL ( url ) ;
2020-02-03 23:18:53 +00:00
// const domain = hostname.replace(/www.|tour./, '');
const dirUrl = ` ${ origin } ${ pathname . split ( '/' ) . slice ( 0 , 2 ) . join ( '/' ) } ` ; // allow for sites on URI directory
2019-11-16 02:33:36 +00:00
2020-02-03 23:18:53 +00:00
const site = await knex ( 'sites' )
2019-11-16 02:33:36 +00:00
. leftJoin ( 'networks' , 'sites.network_id' , 'networks.id' )
. select (
'sites.*' ,
'networks.name as network_name' , 'networks.slug as network_slug' , 'networks.url as network_url' , 'networks.description as network_description' , 'networks.parameters as network_parameters' ,
)
2020-02-03 23:18:53 +00:00
. where ( 'sites.url' , url )
. orWhere ( 'sites.url' , origin )
2020-02-04 02:12:09 +00:00
. orWhere ( 'sites.url' , origin . replace ( /www\.|tour\./ , '' ) )
. orWhere ( 'sites.url' , ` https://www. ${ hostname } ` )
. orWhere ( 'sites.url' , ` http://www. ${ hostname } ` )
2020-02-03 23:18:53 +00:00
. orWhere ( 'sites.url' , dirUrl )
// .orWhere('sites.url', 'like', `%${domain}`)
. first ( ) ;
if ( site ) {
const curatedSite = curateSite ( site , true ) ;
2019-12-12 02:12:05 +00:00
return curatedSite ;
2019-11-16 02:33:36 +00:00
}
return null ;
}
2019-12-04 20:58:08 +00:00
function sitesByNetwork ( sites ) {
const networks = sites . reduce ( ( acc , site ) => {
if ( acc [ site . network . slug ] ) {
acc [ site . network . slug ] . sites = acc [ site . network . slug ] . sites . concat ( site ) ;
return acc ;
}
acc [ site . network . slug ] = {
... site . network ,
sites : [ site ] ,
} ;
return acc ;
} , { } ) ;
return Object . values ( networks ) ;
}
2019-11-16 02:33:36 +00:00
async function fetchSitesFromArgv ( ) {
const rawSites = await knex ( 'sites' )
2020-01-14 03:50:42 +00:00
. select (
'sites.*' ,
'networks.name as network_name' , 'networks.slug as network_slug' , 'networks.url as network_url' , 'networks.description as network_description' , 'networks.parameters as network_parameters' ,
)
2019-11-16 02:33:36 +00:00
. whereIn ( 'sites.slug' , argv . sites || [ ] )
. orWhereIn ( 'networks.slug' , argv . networks || [ ] )
2020-02-10 22:11:11 +00:00
. where ( 'sites.scrape' , true )
2019-11-16 02:33:36 +00:00
. leftJoin ( 'networks' , 'sites.network_id' , 'networks.id' ) ;
2019-12-04 20:58:08 +00:00
const curatedSites = await curateSites ( rawSites , true ) ;
2020-01-10 03:40:41 +00:00
logger . info ( ` Found ${ curatedSites . length } sites in database ` ) ;
2019-12-04 20:58:08 +00:00
return sitesByNetwork ( curatedSites ) ;
2019-11-16 02:33:36 +00:00
}
async function fetchSitesFromConfig ( ) {
const included = destructConfigNetworks ( config . include ) ;
2020-02-01 01:26:00 +00:00
const excluded = destructConfigNetworks ( config . exclude ) ;
2019-11-16 02:33:36 +00:00
const rawSites = await knex ( 'sites' )
2020-01-14 03:50:42 +00:00
. select (
'sites.*' ,
'networks.name as network_name' , 'networks.slug as network_slug' , 'networks.url as network_url' , 'networks.description as network_description' , 'networks.parameters as network_parameters' ,
)
2020-01-17 00:55:54 +00:00
. leftJoin ( 'networks' , 'sites.network_id' , 'networks.id' )
2020-02-01 01:26:00 +00:00
. where ( ( builder ) => {
if ( config . include ) {
builder
. whereIn ( 'sites.slug' , included . sites )
. orWhereIn ( 'networks.slug' , included . networks ) ;
}
} )
. whereNot ( ( builder ) => {
builder
. whereIn ( 'sites.slug' , excluded . sites )
. orWhereIn ( 'networks.slug' , excluded . networks ) ;
} ) ;
2019-11-16 02:33:36 +00:00
2019-12-04 20:58:08 +00:00
const curatedSites = await curateSites ( rawSites , true ) ;
2020-01-10 03:40:41 +00:00
logger . info ( ` Found ${ curatedSites . length } sites in database ` ) ;
2019-12-04 20:58:08 +00:00
return sitesByNetwork ( curatedSites ) ;
2019-11-16 02:33:36 +00:00
}
async function fetchIncludedSites ( ) {
if ( argv . networks || argv . sites ) {
return fetchSitesFromArgv ( ) ;
}
return fetchSitesFromConfig ( ) ;
}
2019-11-13 02:14:24 +00:00
async function fetchSites ( queryObject ) {
2019-11-11 02:20:00 +00:00
const sites = await knex ( 'sites' )
2019-11-13 02:14:24 +00:00
. where ( builder => whereOr ( queryObject , 'sites' , builder ) )
2019-11-11 02:20:00 +00:00
. select (
'sites.*' ,
2019-11-16 02:33:36 +00:00
'networks.name as network_name' , 'networks.slug as network_slug' , 'networks.url as network_url' , 'networks.description as network_description' , 'networks.parameters as network_parameters' ,
2019-11-11 02:20:00 +00:00
)
. leftJoin ( 'networks' , 'sites.network_id' , 'networks.id' )
2019-11-10 03:20:22 +00:00
. limit ( 100 ) ;
2019-11-11 02:20:00 +00:00
return curateSites ( sites ) ;
2019-11-10 03:20:22 +00:00
}
async function fetchSitesFromReleases ( ) {
2019-11-11 02:20:00 +00:00
const sites = await knex ( 'releases' )
2019-11-10 03:20:22 +00:00
. select ( 'site_id' , '' )
. leftJoin ( 'sites' , 'sites.id' , 'releases.site_id' )
. groupBy ( 'sites.id' )
. limit ( 100 ) ;
2019-11-11 02:20:00 +00:00
return curateSites ( sites ) ;
2019-11-10 03:20:22 +00:00
}
module . exports = {
2019-11-16 02:33:36 +00:00
curateSites ,
fetchIncludedSites ,
2019-11-10 03:20:22 +00:00
fetchSites ,
2019-11-16 02:33:36 +00:00
fetchSitesFromConfig ,
fetchSitesFromArgv ,
2019-11-10 03:20:22 +00:00
fetchSitesFromReleases ,
2019-11-16 02:33:36 +00:00
findSiteByUrl ,
2019-11-10 03:20:22 +00:00
} ;