Added bash CLI shorthand. Properly setting parameters in site results. Removed obsolete URL restriction from Reality Kings scraper.
This commit is contained in:
parent
5c551bade2
commit
2857387441
|
@ -1,5 +1,3 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const upsert = require('../src/utils/upsert');
|
const upsert = require('../src/utils/upsert');
|
||||||
|
|
||||||
function getSites(networksMap) {
|
function getSites(networksMap) {
|
||||||
|
|
|
@ -7,7 +7,6 @@ const initServer = require('./web/server');
|
||||||
const scrapeSites = require('./scrape-sites');
|
const scrapeSites = require('./scrape-sites');
|
||||||
const scrapeRelease = require('./scrape-release');
|
const scrapeRelease = require('./scrape-release');
|
||||||
|
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
if (argv.url) {
|
if (argv.url) {
|
||||||
await scrapeRelease(argv.url);
|
await scrapeRelease(argv.url);
|
||||||
|
@ -17,7 +16,7 @@ async function init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (argv.networks || argv.sites) {
|
if (argv.scrape || argv.networks || argv.sites) {
|
||||||
await scrapeSites();
|
await scrapeSites();
|
||||||
knex.destroy();
|
knex.destroy();
|
||||||
|
|
||||||
|
|
|
@ -5,13 +5,17 @@ const yargs = require('yargs');
|
||||||
|
|
||||||
const { argv } = yargs
|
const { argv } = yargs
|
||||||
.command('npm start')
|
.command('npm start')
|
||||||
|
.option('scrape', {
|
||||||
|
describe: 'Scrape sites and networks defined in configuration',
|
||||||
|
type: 'boolean',
|
||||||
|
})
|
||||||
.option('networks', {
|
.option('networks', {
|
||||||
describe: 'Networks to scrape (overrides config)',
|
describe: 'Networks to scrape (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'network',
|
alias: 'network',
|
||||||
})
|
})
|
||||||
.option('sites', {
|
.option('sites', {
|
||||||
describe: 'Sites to scrape (overrides config)',
|
describe: 'Sites to scrape (overrides configuration)',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'site',
|
alias: 'site',
|
||||||
})
|
})
|
||||||
|
|
|
@ -20,6 +20,7 @@ async function findSite(url, release) {
|
||||||
if (network) {
|
if (network) {
|
||||||
return {
|
return {
|
||||||
...network,
|
...network,
|
||||||
|
network,
|
||||||
isFallback: true,
|
isFallback: true,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,11 +134,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
if (site.isFallback) {
|
const entryId = url.match(/\d+/)[0];
|
||||||
throw new Error('Cannot fetch scene details from this resource');
|
|
||||||
}
|
|
||||||
|
|
||||||
const entryId = url.split('/').slice(-1)[0];
|
|
||||||
|
|
||||||
const cookieJar = new CookieJar();
|
const cookieJar = new CookieJar();
|
||||||
const session = bhttp.session({ cookieJar });
|
const session = bhttp.session({ cookieJar });
|
||||||
|
|
|
@ -16,7 +16,7 @@ async function curateSite(site, includeParameters = false) {
|
||||||
description: site.description,
|
description: site.description,
|
||||||
slug: site.slug,
|
slug: site.slug,
|
||||||
independent: !!parameters && parameters.independent,
|
independent: !!parameters && parameters.independent,
|
||||||
parameters: includeParameters ? JSON.parse(site.parameters) : null,
|
parameters: includeParameters ? parameters : null,
|
||||||
network: {
|
network: {
|
||||||
id: site.network_id,
|
id: site.network_id,
|
||||||
name: site.network_name,
|
name: site.network_name,
|
||||||
|
@ -28,8 +28,8 @@ async function curateSite(site, includeParameters = false) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function curateSites(sites) {
|
function curateSites(sites, includeParameters) {
|
||||||
return Promise.all(sites.map(async site => curateSite(site)));
|
return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
|
||||||
}
|
}
|
||||||
|
|
||||||
function destructConfigNetworks(networks) {
|
function destructConfigNetworks(networks) {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
const fetchScene = require('../fetch-scene');
|
const fetchScene = require('../scrape-release');
|
||||||
|
|
||||||
const argv = require('../argv');
|
const argv = require('../argv');
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue