traxxx/src/utils/qu.js

606 lines
13 KiB
JavaScript

'use strict';
const { JSDOM } = require('jsdom');
const moment = require('moment');
const http = require('./http');
const virtualConsole = require('./virtual-console')(__filename);
const { window: globalWindow } = new JSDOM('', { virtualConsole });
function trim(str) {
if (typeof str !== 'string') {
return str;
}
return str.trim().replace(/\s+/g, ' ');
}
function extractDate(dateString, format, match) {
if (match) {
const dateStamp = trim(dateString).match(match);
if (dateStamp) {
const dateValue = moment.utc(dateStamp[0], format);
return dateValue.isValid() ? dateValue.toDate() : null;
}
return null;
}
const dateValue = moment.utc(trim(dateString), format);
return dateValue.isValid() ? dateValue.toDate() : null;
}
function formatDate(dateValue, format, inputFormat) {
if (inputFormat) {
return moment(dateValue, inputFormat).format(format);
}
return moment(dateValue).format(format);
}
function durationToSeconds(durationString, match) {
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
if (durationMatch) {
const segments = ['00'].concat(durationMatch[0].split(/[:hm]/)).slice(-3);
return moment.duration(segments.join(':')).asSeconds();
}
return null;
}
function prefixUrl(urlValue, origin, protocol = 'https') {
if (!urlValue) {
return null;
}
if (/^http/.test(urlValue)) {
return urlValue;
}
if (protocol && /^\/\//.test(urlValue)) {
return `${protocol}:${urlValue}`;
}
if (origin && /^\//.test(urlValue)) {
return `${origin}${urlValue}`;
}
if (origin && /^\.\//.test(urlValue)) {
return `${origin}${urlValue.slice(1)}`;
}
if (origin) {
return `${origin}/${urlValue}`;
}
return urlValue;
}
function q(context, selector, attrArg, applyTrim = true) {
if (!selector && context.nodeName === '#document') {
return null;
}
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
const value = selector
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
: context[attr] || context.getAttribute(attr);
return applyTrim && typeof value === 'string' ? trim(value) : value;
}
return selector ? context.querySelector(selector) : context;
}
function all(context, selector, attrArg, applyTrim = true) {
const attr = attrArg === true ? 'textContent' : attrArg;
if (attr) {
return Array.from(context.querySelectorAll(selector), (el) => q(el, null, attr, applyTrim));
}
return Array.from(context.querySelectorAll(selector));
}
function exists(context, selector) {
return !!q(context, selector);
}
function count(context, selector) {
return all(context, selector)?.length || 0;
}
function content(context, selector, applyTrim = true) {
return q(context, selector, 'textContent', applyTrim);
}
function contents(context, selector, applyTrim) {
return all(context, selector, 'textContent', applyTrim);
}
function html(context, selector) {
const el = q(context, selector, null, true);
return el && el.innerHTML;
}
function json(context, selector) {
const el = q(context, selector, null, true);
try {
return JSON.parse(el?.innerHTML);
} catch (error) {
return null;
}
}
function jsons(context, selector) {
const els = all(context, selector, null, true);
return els.map((el) => {
try {
return JSON.parse(el?.innerHTML);
} catch (error) {
return null;
}
});
}
function htmls(context, selector) {
const els = all(context, selector, null, true);
return els.map((el) => el.innerHTML);
}
function texts(context, selector, applyTrim = true, filter = true) {
const el = q(context, selector, null, applyTrim);
if (!el) return null;
const nodes = Array.from(el.childNodes)
.filter((node) => node.nodeName === '#text')
.map((node) => (applyTrim ? trim(node.textContent) : node.textContent));
return filter ? nodes.filter(Boolean) : nodes;
}
function text(context, selector, applyTrim = true) {
const nodes = texts(context, selector, applyTrim, true);
if (!nodes) return null;
const textValue = nodes.join(' ');
return applyTrim ? trim(textValue) : textValue;
}
function removeStyleFunctionSpaces(el) {
// jsdom appears to have a bug where it ignores inline CSS attributes set to a function() containing spaces, e.g. url( image.png )
el.setAttribute('style', el.getAttribute('style').replace(/\(\s+(.*)\s+\)/g, (match, cssArgs) => `(${cssArgs})`));
}
function style(context, selector, styleAttr) {
const el = q(context, selector);
if (el?.hasAttribute('style')) {
removeStyleFunctionSpaces(el);
return styleAttr ? el.style[styleAttr] : el.style;
}
return null;
}
function styles(context, selector, styleAttr) {
const elStyles = Array.from(context.querySelectorAll(selector), (el) => {
removeStyleFunctionSpaces(el);
return styleAttr ? el.style[styleAttr] : el.style;
});
return elStyles;
}
function number(context, selector, match = /\d+(\.\d*)?/, attr = 'textContent') {
const value = q(context, selector, attr);
if (value && match) {
return Number(value.match(match)?.[0]);
}
if (value) {
return Number(value);
}
return null;
}
function meta(context, selector, attrArg = 'content', applyTrim = true) {
if (/meta\[.*\]/.test(selector)) {
return q(context, selector, attrArg, applyTrim);
}
return q(context, `meta[${selector}]`, attrArg, applyTrim);
}
function date(context, selector, format, match, attr = 'textContent') {
const dateString = q(context, selector, attr, true);
if (!dateString) return null;
return extractDate(dateString, format, match);
}
function dateAgo(context, selector, match = /(\d+)\s*(\w+)/, attr = 'textContent') {
const timeString = q(context, selector, attr, 'textContent');
if (!timeString) {
return null;
}
const timeMatch = timeString.match(match);
if (timeMatch) {
const [n, period] = timeMatch.slice(1);
const thenDate = moment.utc().subtract(Number(n), period);
return {
date: thenDate.toDate(),
precision: period.replace(/s$/, ''),
};
}
return null;
}
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
const imageEl = (attr && q(context, selector, attr))
|| q(context, selector, 'data-src')
|| q(context, selector, 'src');
return prefixUrl(imageEl, origin, protocol);
}
function images(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
const attribute = attr
|| (q(context, selector, 'data-src') && 'data-src')
|| (q(context, selector, 'src') && 'src');
const imageEls = all(context, selector, attribute);
return imageEls.map((imageEl) => prefixUrl(imageEl, origin, protocol));
}
function url(context, selector = 'a', attr = 'href', { origin, protocol = 'https', object = false } = {}) {
const urlEl = q(context, selector, attr);
const prefixedUrl = prefixUrl(urlEl, origin, protocol);
if (prefixedUrl && object) {
return new URL(prefixedUrl);
}
return prefixedUrl;
}
function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'https' } = {}) {
const urlEls = all(context, selector, attr);
return attr ? urlEls.map((urlEl) => prefixUrl(urlEl, origin, protocol)) : urlEls;
}
function sourceSet(context, selector, attr = 'srcset', options = {}) {
const srcset = q(context, selector, attr);
if (!srcset) {
return null;
}
const sources = srcset
.split(/\s*,\s*/)
.map((source) => {
const [link, descriptor] = source.split(' ');
if (link) {
return {
descriptor: descriptor || 'fallback',
url: prefixUrl(link, options.origin, options.protocol),
};
}
return null;
})
.filter(Boolean)
.sort((sourceA, sourceB) => {
if (sourceB.descriptor === 'fallback' || parseInt(sourceA.descriptor, 10) > parseInt(sourceB.descriptor, 10)) {
return -1;
}
if (parseInt(sourceA.descriptor, 10) < parseInt(sourceB.descriptor, 10)) {
return 1;
}
return 0;
});
if (options.includeDescriptor) {
return sources;
}
return sources.map((source) => source.url);
}
function poster(context, selector = 'video', attr = 'poster', { origin, protocol = 'https' } = {}) {
const posterEl = q(context, selector, attr);
return attr ? prefixUrl(posterEl, origin, protocol) : posterEl;
}
function video(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
const trailerEl = q(context, selector, attr);
return attr ? prefixUrl(trailerEl, origin, protocol) : trailerEl;
}
function videos(context, selector = 'source', attr = 'src', { origin, protocol = 'https' } = {}) {
const trailerEls = all(context, selector, attr);
return attr ? trailerEls.map((trailerEl) => prefixUrl(trailerEl, origin, protocol)) : trailerEls;
}
function duration(context, selector, match, attr = 'textContent') {
const durationString = q(context, selector, attr);
if (!durationString) {
return null;
}
const durationMatch = durationToSeconds(durationString, match);
if (durationMatch) {
return durationMatch;
}
const timestampMatch = durationString.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
if (timestampMatch) {
const hours = timestampMatch[0].match(/(\d+)H/i)?.[1] || 0;
const minutes = timestampMatch[0].match(/(\d+)M/i)?.[1] || 0;
const seconds = timestampMatch[0].match(/(\d+)(S|$)/i)?.[1] || 0;
return (Number(hours) * 3600) + (Number(minutes) * 60) + Number(seconds);
}
return null;
}
const legacyFuncs = {
q,
qa: all,
qall: all,
qd: date,
qdate: date,
qh: html,
qhtml: html,
qi: image,
qimage: image,
qimages: images,
qis: images,
ql: duration,
qlength: duration,
qm: meta,
qmeta: meta,
qp: poster,
qposter: poster,
qs: all,
qt: video,
qtext: text,
qtexts: texts,
qtrailer: video,
qtrailers: videos,
qts: videos,
qtx: text,
qtxs: texts,
qtxt: text,
qtxts: texts,
// qu: url,
qurl: url,
qurls: urls,
qus: urls,
};
const quFuncs = {
all,
cnt: content,
cnts: contents,
content,
contents,
count,
date,
dateAgo,
dur: duration,
duration,
el: q,
element: q,
exists,
html,
htmls,
image,
images,
img: image,
imgs: images,
json,
jsons,
length: duration,
meta,
num: number,
number,
poster,
q,
sourceSet,
sources: sourceSet,
srcs: sourceSet,
srcset: sourceSet,
style,
styles,
text,
texts,
trailer: video,
url,
urls,
video,
videos,
};
function init(context, selector, window) {
if (!context) {
return null;
}
const element = selector ? context.querySelector(selector) : context;
if (!element) {
return null;
}
const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (args[0] instanceof globalWindow.HTMLElement // allow for different context
? func(...args)
: func(element, ...args)),
}), {});
const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context
.reduce((acc, [key, func]) => ({
...acc,
[key]: (...args) => (args[0]?.nodeType === undefined // allow for different context
? func(element, ...args)
: func(...args)),
}), {});
return {
element,
el: element,
html: element.outerHTML || element.body.outerHTML,
text: trim(element.textContent),
...(window && {
window,
document: window.document,
}),
...legacyContextFuncs,
qu: quContextFuncs,
query: quContextFuncs,
};
}
function initAll(context, selector, window) {
if (Array.isArray(context)) {
return context.map((element) => init(element, null, window));
}
return Array.from(context.querySelectorAll(selector))
.map((element) => init(element, null, window));
}
function extract(htmlValue, selector, options) {
const { window } = new JSDOM(htmlValue, { virtualConsole, ...options });
return init(window.document, selector, window);
}
function extractAll(htmlValue, selector, options) {
const { window } = new JSDOM(htmlValue, { virtualConsole, ...options });
return initAll(window.document, selector, window);
}
async function request(method = 'get', urlValue, body, selector, headers, options, queryAll = false) {
const res = await (method === 'post'
? http.post(urlValue, body, { ...options, headers })
: http[method](urlValue, {
...options,
headers,
parse: true,
}));
if (res.ok) {
const item = queryAll
? initAll(res.document, selector, res.window)
: init(res.document, selector, res.window);
return {
item,
items: all ? item : [item],
res,
ok: true,
status: res.statusCode,
...res,
};
}
return {
item: null,
items: [],
res,
ok: false,
status: res.statusCode,
...res,
};
}
async function get(urlValue, selector, headers, options) {
return request('get', urlValue, null, selector, headers, options, false);
}
async function post(urlValue, body, selector, headers, options) {
return request('post', urlValue, body, selector, headers, options, false);
}
async function getAll(urlValue, selector, headers, options) {
return request('get', urlValue, null, selector, headers, options, true);
}
async function postAll(urlValue, body, selector, headers, options) {
return request('post', urlValue, body, selector, headers, options, true);
}
function session(headers, options) {
return http.session(headers, options);
}
module.exports = {
extractDate,
extract,
extractAll,
durationToSeconds,
init,
initAll,
formatDate,
get,
getAll,
http,
fetch: get,
fetchAll: getAll,
context: init,
contextAll: initAll,
ed: extractDate,
ex: extract,
exa: extractAll,
fd: formatDate,
parseDate: extractDate,
ctx: init,
ctxa: initAll,
geta: getAll,
qu: quFuncs,
query: quFuncs,
post,
postAll,
prefixUrl,
session,
...legacyFuncs,
};