traxxx/src/utils/q.js

305 lines
7.5 KiB
JavaScript
Raw Normal View History

'use strict';
const { JSDOM } = require('jsdom');
const moment = require('moment');
const http = require('./http');
function trim(str) {
2020-02-23 23:31:36 +00:00
if (!str) return null;
return str.trim().replace(/\s+/g, ' ');
}
function extractDate(dateString, format, match) {
if (match) {
const dateStamp = trim(dateString).match(match);
if (dateStamp) {
const date = moment.utc(dateStamp[0], format);
return date.isValid() ? date.toDate() : null;
}
return null;
}
const date = moment.utc(trim(dateString), format);
return date.isValid() ? date.toDate() : null;
}
function formatDate(date, format, inputFormat) {
if (inputFormat) return moment(date, inputFormat).format(format);
return moment(date).format(format);
}
function prefixProtocol(url, protocol = 'https') {
if (protocol && /^\/\//.test(url)) {
return `${protocol}:${url}`;
}
return url;
}
function q(context, selector, attrArg, applyTrim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
const value = selector
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
: context[attr] || context[attr]?.attributes[attr]?.value;
return applyTrim && value ? trim(value) : value;
}
return selector ? context.querySelector(selector) : context;
}
function qall(context, selector, attrArg, applyTrim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr]));
}
return Array.from(context.querySelectorAll(selector));
}
function qhtml(context, selector) {
const el = q(context, selector, null, true);
return el && el.innerHTML;
}
function qtexts(context, selector, applyTrim = true, filter = true) {
const el = q(context, selector, null, applyTrim);
if (!el) return null;
const nodes = Array.from(el.childNodes)
.filter(node => node.nodeName === '#text')
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
return filter ? nodes.filter(Boolean) : nodes;
}
function qtext(context, selector, applyTrim = true) {
const nodes = qtexts(context, selector, applyTrim, true);
if (!nodes) return null;
const text = nodes.join(' ');
return applyTrim ? trim(text) : text;
}
function qmeta(context, selector, attrArg = 'content', applyTrim = true) {
if (/meta\[.*\]/.test(selector)) {
return q(context, selector, attrArg, applyTrim);
}
return q(context, `meta[${selector}]`, attrArg, applyTrim);
}
function qdate(context, selector, format, match, attr = 'textContent') {
const dateString = q(context, selector, attr, true);
if (!dateString) return null;
return extractDate(dateString, format, match);
}
function qimage(context, selector = 'img', attr = 'src', protocol = 'https') {
const image = q(context, selector, attr);
// no attribute means q output will be HTML element
return attr ? prefixProtocol(image, protocol) : image;
}
function qimages(context, selector = 'img', attr = 'src', protocol = 'https') {
const images = qall(context, selector, attr);
return attr ? images.map(image => prefixProtocol(image, protocol)) : images;
}
function qurl(context, selector = 'a', attr = 'href', protocol = 'https') {
const url = q(context, selector, attr);
return attr ? prefixProtocol(url, protocol) : url;
}
function qurls(context, selector = 'a', attr = 'href', protocol = 'https') {
const urls = qall(context, selector, attr);
return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls;
}
function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') {
const poster = q(context, selector, attr);
return attr ? prefixProtocol(poster, protocol) : poster;
}
function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') {
const trailer = q(context, selector, attr);
return attr ? prefixProtocol(trailer, protocol) : trailer;
}
function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') {
const trailers = qall(context, selector, attr);
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
}
function qlength(context, selector, match, attr = 'textContent') {
const durationString = q(context, selector, attr);
if (!durationString) return null;
const duration = durationString.match(match || /(\d+:)?\d+:\d+/);
if (duration) {
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
return moment.duration(segments.join(':')).asSeconds();
}
return null;
}
const funcs = {
q,
qa: qall,
qall,
qd: qdate,
qdate,
qh: qhtml,
qhtml,
qi: qimage,
qimage,
qimages,
qis: qimages,
ql: qlength,
qlength,
qm: qmeta,
qmeta,
qp: qposter,
qposter,
qs: qall,
qt: qtrailer,
qtext,
qtexts,
qtrailer,
qtrailers,
qts: qtrailers,
qtx: qtext,
qtxs: qtexts,
qtxt: qtext,
qtxts: qtexts,
qu: qurl,
qurl,
qurls,
qus: qurls,
};
function init(element, window) {
if (!element) return null;
2020-01-16 20:56:33 +00:00
const contextFuncs = Object.entries(funcs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
return {
element,
el: element,
html: element.outerHTML || element.body.outerHTML,
2020-02-23 23:31:36 +00:00
text: trim(element.textContent),
...(window && {
window,
document: window.document,
}),
...contextFuncs,
};
}
function initAll(context, selector, window) {
2020-02-12 22:00:32 +00:00
if (Array.isArray(context)) {
return context.map(element => init(element, window));
}
return Array.from(context.querySelectorAll(selector))
.map(element => init(element, window));
}
function extract(html, selector) {
const { window } = new JSDOM(html);
if (selector) {
return init(window.document.querySelector(selector), window);
}
return init(window.document, window);
}
function extractAll(html, selector) {
const { window } = new JSDOM(html);
return initAll(window.document, selector, window);
}
async function get(url, selector, headers, all = false) {
const res = await http.get(url, {
headers,
});
if (res.statusCode === 200) {
const item = all
? extractAll(res.body.toString(), selector)
: extract(res.body.toString(), selector);
return {
item,
items: all ? item : [item],
res,
ok: true,
status: res.statusCode,
};
}
return {
item: null,
items: [],
res,
ok: false,
status: res.statusCode,
};
}
async function getAll(url, selector, headers) {
return get(url, selector, headers, true);
}
module.exports = {
extractDate,
extract,
extractAll,
init,
initAll,
formatDate,
get,
getAll,
context: init,
contextAll: initAll,
ed: extractDate,
ex: extract,
exa: extractAll,
fd: formatDate,
ctx: init,
ctxa: initAll,
geta: getAll,
edate: extractDate,
fdate: formatDate,
...funcs,
};