Added bash CLI shorthand. Properly setting parameters in site results. Removed obsolete URL restriction from Reality Kings scraper.

This commit is contained in:
ThePendulum 2019-11-16 23:37:33 +01:00
parent 5c551bade2
commit 2857387441
7 changed files with 13 additions and 15 deletions

View File

@ -1,5 +1,3 @@
'use strict';
const upsert = require('../src/utils/upsert'); const upsert = require('../src/utils/upsert');
function getSites(networksMap) { function getSites(networksMap) {

View File

@ -7,7 +7,6 @@ const initServer = require('./web/server');
const scrapeSites = require('./scrape-sites'); const scrapeSites = require('./scrape-sites');
const scrapeRelease = require('./scrape-release'); const scrapeRelease = require('./scrape-release');
async function init() { async function init() {
if (argv.url) { if (argv.url) {
await scrapeRelease(argv.url); await scrapeRelease(argv.url);
@ -17,7 +16,7 @@ async function init() {
} }
if (argv.networks || argv.sites) { if (argv.scrape || argv.networks || argv.sites) {
await scrapeSites(); await scrapeSites();
knex.destroy(); knex.destroy();

View File

@ -5,13 +5,17 @@ const yargs = require('yargs');
const { argv } = yargs const { argv } = yargs
.command('npm start') .command('npm start')
.option('scrape', {
describe: 'Scrape sites and networks defined in configuration',
type: 'boolean',
})
.option('networks', { .option('networks', {
describe: 'Networks to scrape (overrides config)', describe: 'Networks to scrape (overrides configuration)',
type: 'array', type: 'array',
alias: 'network', alias: 'network',
}) })
.option('sites', { .option('sites', {
describe: 'Sites to scrape (overrides config)', describe: 'Sites to scrape (overrides configuration)',
type: 'array', type: 'array',
alias: 'site', alias: 'site',
}) })

View File

@ -20,6 +20,7 @@ async function findSite(url, release) {
if (network) { if (network) {
return { return {
...network, ...network,
network,
isFallback: true, isFallback: true,
}; };
} }

View File

@ -134,11 +134,7 @@ async function fetchLatest(site, page = 1) {
} }
async function fetchScene(url, site) { async function fetchScene(url, site) {
if (site.isFallback) { const entryId = url.match(/\d+/)[0];
throw new Error('Cannot fetch scene details from this resource');
}
const entryId = url.split('/').slice(-1)[0];
const cookieJar = new CookieJar(); const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar }); const session = bhttp.session({ cookieJar });

View File

@ -16,7 +16,7 @@ async function curateSite(site, includeParameters = false) {
description: site.description, description: site.description,
slug: site.slug, slug: site.slug,
independent: !!parameters && parameters.independent, independent: !!parameters && parameters.independent,
parameters: includeParameters ? JSON.parse(site.parameters) : null, parameters: includeParameters ? parameters : null,
network: { network: {
id: site.network_id, id: site.network_id,
name: site.network_name, name: site.network_name,
@ -28,8 +28,8 @@ async function curateSite(site, includeParameters = false) {
}; };
} }
function curateSites(sites) { function curateSites(sites, includeParameters) {
return Promise.all(sites.map(async site => curateSite(site))); return Promise.all(sites.map(async site => curateSite(site, includeParameters)));
} }
function destructConfigNetworks(networks) { function destructConfigNetworks(networks) {

View File

@ -3,7 +3,7 @@
const path = require('path'); const path = require('path');
const Promise = require('bluebird'); const Promise = require('bluebird');
const fs = require('fs-extra'); const fs = require('fs-extra');
const fetchScene = require('../fetch-scene'); const fetchScene = require('../scrape-release');
const argv = require('../argv'); const argv = require('../argv');