Added teaser support. Added Score network with scraper for Scoreland. Improved q. Added assets.

This commit is contained in:
2020-02-02 05:14:58 +01:00
parent 14e5695b6e
commit a97c6defca
52 changed files with 4291 additions and 3435 deletions

View File

@@ -3,11 +3,21 @@
const { JSDOM } = require('jsdom');
const moment = require('moment');
function prefixProtocol(url, protocol = 'https') {
if (protocol && /^\/\//.test(url)) {
return `${protocol}:${url}`;
}
return url;
}
function q(context, selector, attrArg, trim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
const value = context.querySelector(selector)?.[attr];
const value = selector
? context.querySelector(selector)?.[attr]
: context[attr];
return trim ? value?.trim() : value;
}
@@ -30,12 +40,14 @@ function qmeta(context, selector, attrArg = 'content', trim = true) {
}
function qdate(context, selector, format, match, attr = 'textContent') {
const dateString = context.querySelector(selector)?.[attr];
const dateString = selector
? context.querySelector(selector)?.[attr]
: context[attr];
if (!dateString) return null;
if (match) {
const dateStamp = dateString.match(match);
const dateStamp = dateString.trim().match(match);
if (dateStamp) return moment.utc(dateStamp[0], format).toDate();
return null;
@@ -44,20 +56,41 @@ function qdate(context, selector, format, match, attr = 'textContent') {
return moment.utc(dateString.trim(), format).toDate();
}
function qimages(context, selector = 'img', attr = 'src') {
return qall(context, selector, attr);
function qimage(context, selector = 'img', attr = 'src', protocol = 'https') {
const image = q(context, selector, attr);
// no attribute means q output will be HTML element
return attr ? prefixProtocol(image, protocol) : image;
}
function qurls(context, selector = 'a', attr = 'href') {
return qall(context, selector, attr);
function qimages(context, selector = 'img', attr = 'src', protocol = 'https') {
const images = qall(context, selector, attr);
return attr ? images.map(image => prefixProtocol(image, protocol)) : images;
}
function qposter(context, selector = 'video', attr = 'poster') {
return q(context, selector, attr);
function qurl(context, selector = 'a', attr = 'href', protocol = 'https') {
const url = q(context, selector, attr);
return attr ? prefixProtocol(url, protocol) : url;
}
function qtrailer(context, selector = 'source', attr = 'src') {
return q(context, selector, attr);
function qurls(context, selector = 'a', attr = 'href', protocol = 'https') {
const urls = qall(context, selector, attr);
return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls;
}
function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') {
const poster = q(context, selector, attr);
return attr ? prefixProtocol(poster, protocol) : poster;
}
function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') {
const trailer = q(context, selector, attr);
return attr ? prefixProtocol(trailer, protocol) : trailer;
}
function qlength(context, selector, attr = 'textContent') {
@@ -77,20 +110,24 @@ const funcs = {
q,
qall,
qdate,
qimage,
qimages,
qposter,
qlength,
qmeta,
qtrailer,
qurls,
qurl,
qa: qall,
qd: qdate,
qi: qimages,
qi: qimage,
qis: qimages,
qp: qposter,
ql: qlength,
qm: qmeta,
qt: qtrailer,
qu: qurls,
qu: qurl,
qus: qurls,
};
function ctx(element, window) {
@@ -110,18 +147,29 @@ function ctx(element, window) {
};
}
function ctxa(context, selector) {
return Array.from(context.querySelectorAll(selector)).map(element => ctx(element));
function ctxa(context, selector, window) {
return Array.from(context.querySelectorAll(selector)).map(element => ctx(element, window));
}
function ex(html) {
function ex(html, selector) {
const { window } = new JSDOM(html);
if (selector) {
return ctx(window.document.querySelector(selector), window);
}
return ctx(window.document, window);
}
function exa(html, selector) {
const { window } = new JSDOM(html);
return ctxa(window.document, selector, window);
}
module.exports = {
ex,
exa,
ctx,
ctxa,
...funcs,

View File

@@ -1,7 +1,7 @@
'use strict';
function slugify(string, encode = false) {
const slug = string.trim().toLowerCase().match(/\w+/g).join('-');
function slugify(string, encode = false, delimiter = '-') {
const slug = string.trim().toLowerCase().match(/\w+/g).join(delimiter);
return encode ? encodeURI(slug) : slug;
}