From 2857387441998ff3685f28830cf6521a4a4eb409 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 16 Nov 2019 23:37:33 +0100 Subject: [PATCH] Added bash CLI shorthand. Properly setting parameters in site results. Removed obsolete URL restriction from Reality Kings scraper. --- seeds/01_sites.js | 2 -- src/app.js | 3 +-- src/argv.js | 8 ++++++-- src/scrape-release.js | 1 + src/scrapers/realitykings.js | 6 +----- src/sites.js | 6 +++--- src/utils/rename.js | 2 +- 7 files changed, 13 insertions(+), 15 deletions(-) diff --git a/seeds/01_sites.js b/seeds/01_sites.js index 936eba23..200fbd9c 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -1,5 +1,3 @@ -'use strict'; - const upsert = require('../src/utils/upsert'); function getSites(networksMap) { diff --git a/src/app.js b/src/app.js index 0274f4ac..c3fb9703 100644 --- a/src/app.js +++ b/src/app.js @@ -7,7 +7,6 @@ const initServer = require('./web/server'); const scrapeSites = require('./scrape-sites'); const scrapeRelease = require('./scrape-release'); - async function init() { if (argv.url) { await scrapeRelease(argv.url); @@ -17,7 +16,7 @@ async function init() { } - if (argv.networks || argv.sites) { + if (argv.scrape || argv.networks || argv.sites) { await scrapeSites(); knex.destroy(); diff --git a/src/argv.js b/src/argv.js index bcea4381..00ed5c51 100644 --- a/src/argv.js +++ b/src/argv.js @@ -5,13 +5,17 @@ const yargs = require('yargs'); const { argv } = yargs .command('npm start') + .option('scrape', { + describe: 'Scrape sites and networks defined in configuration', + type: 'boolean', + }) .option('networks', { - describe: 'Networks to scrape (overrides config)', + describe: 'Networks to scrape (overrides configuration)', type: 'array', alias: 'network', }) .option('sites', { - describe: 'Sites to scrape (overrides config)', + describe: 'Sites to scrape (overrides configuration)', type: 'array', alias: 'site', }) diff --git a/src/scrape-release.js b/src/scrape-release.js index 368a64b5..4b44ae5c 100644 --- a/src/scrape-release.js +++ b/src/scrape-release.js @@ -20,6 +20,7 @@ async function findSite(url, release) { if (network) { return { ...network, + network, isFallback: true, }; } diff --git a/src/scrapers/realitykings.js b/src/scrapers/realitykings.js index 11b1acad..747c320e 100644 --- a/src/scrapers/realitykings.js +++ b/src/scrapers/realitykings.js @@ -134,11 +134,7 @@ async function fetchLatest(site, page = 1) { } async function fetchScene(url, site) { - if (site.isFallback) { - throw new Error('Cannot fetch scene details from this resource'); - } - - const entryId = url.split('/').slice(-1)[0]; + const entryId = url.match(/\d+/)[0]; const cookieJar = new CookieJar(); const session = bhttp.session({ cookieJar }); diff --git a/src/sites.js b/src/sites.js index 46f3d110..b2c9218d 100644 --- a/src/sites.js +++ b/src/sites.js @@ -16,7 +16,7 @@ async function curateSite(site, includeParameters = false) { description: site.description, slug: site.slug, independent: !!parameters && parameters.independent, - parameters: includeParameters ? JSON.parse(site.parameters) : null, + parameters: includeParameters ? parameters : null, network: { id: site.network_id, name: site.network_name, @@ -28,8 +28,8 @@ async function curateSite(site, includeParameters = false) { }; } -function curateSites(sites) { - return Promise.all(sites.map(async site => curateSite(site))); +function curateSites(sites, includeParameters) { + return Promise.all(sites.map(async site => curateSite(site, includeParameters))); } function destructConfigNetworks(networks) { diff --git a/src/utils/rename.js b/src/utils/rename.js index a4346887..bed1e5ed 100644 --- a/src/utils/rename.js +++ b/src/utils/rename.js @@ -3,7 +3,7 @@ const path = require('path'); const Promise = require('bluebird'); const fs = require('fs-extra'); -const fetchScene = require('../fetch-scene'); +const fetchScene = require('../scrape-release'); const argv = require('../argv');