Major API change for 'q', renamed to 'qu', refactored modules. Fixed Gamma URL entry ID regex.
This commit is contained in:
303
src/utils/q.js
303
src/utils/q.js
@@ -1,304 +1,5 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const http = require('./http');
|
||||
const qu = require('./qu');
|
||||
|
||||
function trim(str) {
|
||||
if (!str) return null;
|
||||
return str.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function extractDate(dateString, format, match) {
|
||||
if (match) {
|
||||
const dateStamp = trim(dateString).match(match);
|
||||
|
||||
if (dateStamp) {
|
||||
const date = moment.utc(dateStamp[0], format);
|
||||
|
||||
return date.isValid() ? date.toDate() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const date = moment.utc(trim(dateString), format);
|
||||
|
||||
return date.isValid() ? date.toDate() : null;
|
||||
}
|
||||
|
||||
function formatDate(date, format, inputFormat) {
|
||||
if (inputFormat) return moment(date, inputFormat).format(format);
|
||||
|
||||
return moment(date).format(format);
|
||||
}
|
||||
|
||||
function prefixProtocol(url, protocol = 'https') {
|
||||
if (protocol && /^\/\//.test(url)) {
|
||||
return `${protocol}:${url}`;
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
const value = selector
|
||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||
: context[attr] || context[attr]?.attributes[attr]?.value;
|
||||
|
||||
return applyTrim && value ? trim(value) : value;
|
||||
}
|
||||
|
||||
return selector ? context.querySelector(selector) : context;
|
||||
}
|
||||
|
||||
function qall(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr]));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function qhtml(context, selector) {
|
||||
const el = q(context, selector, null, true);
|
||||
|
||||
return el && el.innerHTML;
|
||||
}
|
||||
|
||||
function qtexts(context, selector, applyTrim = true, filter = true) {
|
||||
const el = q(context, selector, null, applyTrim);
|
||||
if (!el) return null;
|
||||
|
||||
const nodes = Array.from(el.childNodes)
|
||||
.filter(node => node.nodeName === '#text')
|
||||
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||
|
||||
return filter ? nodes.filter(Boolean) : nodes;
|
||||
}
|
||||
|
||||
function qtext(context, selector, applyTrim = true) {
|
||||
const nodes = qtexts(context, selector, applyTrim, true);
|
||||
if (!nodes) return null;
|
||||
|
||||
const text = nodes.join(' ');
|
||||
|
||||
return applyTrim ? trim(text) : text;
|
||||
}
|
||||
|
||||
function qmeta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||
if (/meta\[.*\]/.test(selector)) {
|
||||
return q(context, selector, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
function qdate(context, selector, format, match, attr = 'textContent') {
|
||||
const dateString = q(context, selector, attr, true);
|
||||
|
||||
if (!dateString) return null;
|
||||
|
||||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function qimage(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||
const image = q(context, selector, attr);
|
||||
|
||||
// no attribute means q output will be HTML element
|
||||
return attr ? prefixProtocol(image, protocol) : image;
|
||||
}
|
||||
|
||||
function qimages(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||
const images = qall(context, selector, attr);
|
||||
|
||||
return attr ? images.map(image => prefixProtocol(image, protocol)) : images;
|
||||
}
|
||||
|
||||
function qurl(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||
const url = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(url, protocol) : url;
|
||||
}
|
||||
|
||||
function qurls(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||
const urls = qall(context, selector, attr);
|
||||
|
||||
return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls;
|
||||
}
|
||||
|
||||
function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') {
|
||||
const poster = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(poster, protocol) : poster;
|
||||
}
|
||||
|
||||
function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||
const trailer = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(trailer, protocol) : trailer;
|
||||
}
|
||||
|
||||
function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||
const trailers = qall(context, selector, attr);
|
||||
|
||||
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
|
||||
}
|
||||
|
||||
function qlength(context, selector, match, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
|
||||
if (!durationString) return null;
|
||||
const duration = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
|
||||
if (duration) {
|
||||
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const funcs = {
|
||||
q,
|
||||
qa: qall,
|
||||
qall,
|
||||
qd: qdate,
|
||||
qdate,
|
||||
qh: qhtml,
|
||||
qhtml,
|
||||
qi: qimage,
|
||||
qimage,
|
||||
qimages,
|
||||
qis: qimages,
|
||||
ql: qlength,
|
||||
qlength,
|
||||
qm: qmeta,
|
||||
qmeta,
|
||||
qp: qposter,
|
||||
qposter,
|
||||
qs: qall,
|
||||
qt: qtrailer,
|
||||
qtext,
|
||||
qtexts,
|
||||
qtrailer,
|
||||
qtrailers,
|
||||
qts: qtrailers,
|
||||
qtx: qtext,
|
||||
qtxs: qtexts,
|
||||
qtxt: qtext,
|
||||
qtxts: qtexts,
|
||||
qu: qurl,
|
||||
qurl,
|
||||
qurls,
|
||||
qus: qurls,
|
||||
};
|
||||
|
||||
function init(element, window) {
|
||||
if (!element) return null;
|
||||
|
||||
const contextFuncs = Object.entries(funcs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
|
||||
return {
|
||||
element,
|
||||
el: element,
|
||||
html: element.outerHTML || element.body.outerHTML,
|
||||
text: trim(element.textContent),
|
||||
...(window && {
|
||||
window,
|
||||
document: window.document,
|
||||
}),
|
||||
...contextFuncs,
|
||||
};
|
||||
}
|
||||
|
||||
function initAll(context, selector, window) {
|
||||
if (Array.isArray(context)) {
|
||||
return context.map(element => init(element, window));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector))
|
||||
.map(element => init(element, window));
|
||||
}
|
||||
|
||||
function extract(html, selector) {
|
||||
const { window } = new JSDOM(html);
|
||||
|
||||
if (selector) {
|
||||
return init(window.document.querySelector(selector), window);
|
||||
}
|
||||
|
||||
return init(window.document, window);
|
||||
}
|
||||
|
||||
function extractAll(html, selector) {
|
||||
const { window } = new JSDOM(html);
|
||||
|
||||
return initAll(window.document, selector, window);
|
||||
}
|
||||
|
||||
async function get(url, selector, headers, all = false) {
|
||||
const res = await http.get(url, {
|
||||
headers,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const item = all
|
||||
? extractAll(res.body.toString(), selector)
|
||||
: extract(res.body.toString(), selector);
|
||||
|
||||
return {
|
||||
item,
|
||||
items: all ? item : [item],
|
||||
res,
|
||||
ok: true,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
item: null,
|
||||
items: [],
|
||||
res,
|
||||
ok: false,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
async function getAll(url, selector, headers) {
|
||||
return get(url, selector, headers, true);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractDate,
|
||||
extract,
|
||||
extractAll,
|
||||
init,
|
||||
initAll,
|
||||
formatDate,
|
||||
get,
|
||||
getAll,
|
||||
context: init,
|
||||
contextAll: initAll,
|
||||
ed: extractDate,
|
||||
ex: extract,
|
||||
exa: extractAll,
|
||||
fd: formatDate,
|
||||
ctx: init,
|
||||
ctxa: initAll,
|
||||
geta: getAll,
|
||||
edate: extractDate,
|
||||
fdate: formatDate,
|
||||
...funcs,
|
||||
};
|
||||
module.exports = qu;
|
||||
|
||||
346
src/utils/qu.js
Normal file
346
src/utils/qu.js
Normal file
@@ -0,0 +1,346 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const http = require('./http');
|
||||
|
||||
function trim(str) {
|
||||
if (!str) return null;
|
||||
return str.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function extractDate(dateString, format, match) {
|
||||
if (match) {
|
||||
const dateStamp = trim(dateString).match(match);
|
||||
|
||||
if (dateStamp) {
|
||||
const dateValue = moment.utc(dateStamp[0], format);
|
||||
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const dateValue = moment.utc(trim(dateString), format);
|
||||
|
||||
return dateValue.isValid() ? dateValue.toDate() : null;
|
||||
}
|
||||
|
||||
function formatDate(dateValue, format, inputFormat) {
|
||||
if (inputFormat) {
|
||||
return moment(dateValue, inputFormat).format(format);
|
||||
}
|
||||
|
||||
return moment(dateValue).format(format);
|
||||
}
|
||||
|
||||
function prefixProtocol(urlValue, protocol = 'https') {
|
||||
if (protocol && /^\/\//.test(urlValue)) {
|
||||
return `${protocol}:${urlValue}`;
|
||||
}
|
||||
|
||||
return urlValue;
|
||||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
const value = selector
|
||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||
: context[attr] || context[attr]?.attributes[attr]?.value;
|
||||
|
||||
return applyTrim && value ? trim(value) : value;
|
||||
}
|
||||
|
||||
return selector ? context.querySelector(selector) : context;
|
||||
}
|
||||
|
||||
function all(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr]));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function exists(context, selector) {
|
||||
return !!q(context, selector);
|
||||
}
|
||||
|
||||
function content(context, selector) {
|
||||
const el = q(context, selector, null, true);
|
||||
|
||||
return el && el.innerHTML;
|
||||
}
|
||||
|
||||
function texts(context, selector, applyTrim = true, filter = true) {
|
||||
const el = q(context, selector, null, applyTrim);
|
||||
if (!el) return null;
|
||||
|
||||
const nodes = Array.from(el.childNodes)
|
||||
.filter(node => node.nodeName === '#text')
|
||||
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||
|
||||
return filter ? nodes.filter(Boolean) : nodes;
|
||||
}
|
||||
|
||||
function text(context, selector, applyTrim = true) {
|
||||
const nodes = texts(context, selector, applyTrim, true);
|
||||
if (!nodes) return null;
|
||||
|
||||
const textValue = nodes.join(' ');
|
||||
|
||||
return applyTrim ? trim(textValue) : textValue;
|
||||
}
|
||||
|
||||
function meta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||
if (/meta\[.*\]/.test(selector)) {
|
||||
return q(context, selector, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
return q(context, `meta[${selector}]`, attrArg, applyTrim);
|
||||
}
|
||||
|
||||
function date(context, selector, format, match, attr = 'textContent') {
|
||||
const dateString = q(context, selector, attr, true);
|
||||
|
||||
if (!dateString) return null;
|
||||
|
||||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function image(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||
const imageEl = q(context, selector, attr);
|
||||
|
||||
// no attribute means q output will be HTML element
|
||||
return attr ? prefixProtocol(imageEl, protocol) : imageEl;
|
||||
}
|
||||
|
||||
function images(context, selector = 'img', attr = 'src', protocol = 'https') {
|
||||
const imageEls = all(context, selector, attr);
|
||||
|
||||
return attr ? imageEls.map(imageEl => prefixProtocol(imageEl, protocol)) : imageEls;
|
||||
}
|
||||
|
||||
function url(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||
const urlEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(urlEl, protocol) : urlEl;
|
||||
}
|
||||
|
||||
function urls(context, selector = 'a', attr = 'href', protocol = 'https') {
|
||||
const urlEls = all(context, selector, attr);
|
||||
|
||||
return attr ? urlEls.map(urlEl => prefixProtocol(urlEl, protocol)) : urlEls;
|
||||
}
|
||||
|
||||
function poster(context, selector = 'video', attr = 'poster', protocol = 'https') {
|
||||
const posterEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(posterEl, protocol) : posterEl;
|
||||
}
|
||||
|
||||
function video(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||
const trailerEl = q(context, selector, attr);
|
||||
|
||||
return attr ? prefixProtocol(trailerEl, protocol) : trailerEl;
|
||||
}
|
||||
|
||||
function videos(context, selector = 'source', attr = 'src', protocol = 'https') {
|
||||
const trailerEls = all(context, selector, attr);
|
||||
|
||||
return attr ? trailerEls.map(trailerEl => prefixProtocol(trailerEl, protocol)) : trailerEls;
|
||||
}
|
||||
|
||||
function duration(context, selector, match, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
|
||||
if (!durationString) return null;
|
||||
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
|
||||
if (durationMatch) {
|
||||
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const legacyFuncs = {
|
||||
q,
|
||||
qa: all,
|
||||
qall: all,
|
||||
qd: date,
|
||||
qdate: date,
|
||||
qh: content,
|
||||
qhtml: content,
|
||||
qi: image,
|
||||
qimage: image,
|
||||
qimages: images,
|
||||
qis: images,
|
||||
ql: duration,
|
||||
qlength: duration,
|
||||
qm: meta,
|
||||
qmeta: meta,
|
||||
qp: poster,
|
||||
qposter: poster,
|
||||
qs: all,
|
||||
qt: video,
|
||||
qtext: text,
|
||||
qtexts: texts,
|
||||
qtrailer: video,
|
||||
qtrailers: videos,
|
||||
qts: videos,
|
||||
qtx: text,
|
||||
qtxs: texts,
|
||||
qtxt: text,
|
||||
qtxts: texts,
|
||||
// qu: url,
|
||||
qurl: url,
|
||||
qurls: urls,
|
||||
qus: urls,
|
||||
};
|
||||
|
||||
const quFuncs = {
|
||||
all,
|
||||
body: content,
|
||||
content,
|
||||
date,
|
||||
dur: duration,
|
||||
duration,
|
||||
exists,
|
||||
image,
|
||||
images,
|
||||
img: image,
|
||||
imgs: images,
|
||||
inner: content,
|
||||
length: duration,
|
||||
meta,
|
||||
poster,
|
||||
q,
|
||||
text,
|
||||
texts,
|
||||
trailer: video,
|
||||
url,
|
||||
urls,
|
||||
video,
|
||||
videos,
|
||||
};
|
||||
|
||||
function init(element, window) {
|
||||
if (!element) return null;
|
||||
|
||||
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
|
||||
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
|
||||
.reduce((acc, [key, func]) => ({
|
||||
...acc,
|
||||
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
|
||||
? func(...args)
|
||||
: func(element, ...args)),
|
||||
}), {});
|
||||
|
||||
return {
|
||||
element,
|
||||
el: element,
|
||||
html: element.outerHTML || element.body.outerHTML,
|
||||
text: trim(element.textContent),
|
||||
...(window && {
|
||||
window,
|
||||
document: window.document,
|
||||
}),
|
||||
...legacyContextFuncs,
|
||||
qu: quContextFuncs,
|
||||
};
|
||||
}
|
||||
|
||||
function initAll(context, selector, window) {
|
||||
if (Array.isArray(context)) {
|
||||
return context.map(element => init(element, window));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector))
|
||||
.map(element => init(element, window));
|
||||
}
|
||||
|
||||
function extract(htmlValue, selector) {
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
|
||||
if (selector) {
|
||||
return init(window.document.querySelector(selector), window);
|
||||
}
|
||||
|
||||
return init(window.document, window);
|
||||
}
|
||||
|
||||
function extractAll(htmlValue, selector) {
|
||||
const { window } = new JSDOM(htmlValue);
|
||||
|
||||
return initAll(window.document, selector, window);
|
||||
}
|
||||
|
||||
async function get(urlValue, selector, headers, queryAll = false) {
|
||||
const res = await http.get(urlValue, {
|
||||
headers,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const item = queryAll
|
||||
? extractAll(res.body.toString(), selector)
|
||||
: extract(res.body.toString(), selector);
|
||||
|
||||
return {
|
||||
item,
|
||||
items: all ? item : [item],
|
||||
res,
|
||||
ok: true,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
item: null,
|
||||
items: [],
|
||||
res,
|
||||
ok: false,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
async function getAll(urlValue, selector, headers) {
|
||||
return get(urlValue, selector, headers, true);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractDate,
|
||||
extract,
|
||||
extractAll,
|
||||
init,
|
||||
initAll,
|
||||
formatDate,
|
||||
get,
|
||||
getAll,
|
||||
context: init,
|
||||
contextAll: initAll,
|
||||
ed: extractDate,
|
||||
ex: extract,
|
||||
exa: extractAll,
|
||||
fd: formatDate,
|
||||
ctx: init,
|
||||
ctxa: initAll,
|
||||
geta: getAll,
|
||||
edate: extractDate,
|
||||
fdate: formatDate,
|
||||
qu: quFuncs,
|
||||
...legacyFuncs,
|
||||
};
|
||||
Reference in New Issue
Block a user