'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const bhttp = require('bhttp'); function prefixProtocol(url, protocol = 'https') { if (protocol && /^\/\//.test(url)) { return `${protocol}:${url}`; } return url; } function q(context, selector, attrArg, trim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { const value = selector ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value : context[attr] || context[attr]?.attributes[attr]?.value; return trim ? value?.trim() : value; } return selector ? context.querySelector(selector) : context; } function qall(context, selector, attrArg, trim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { return Array.from(context.querySelectorAll(selector), el => (trim ? el[attr]?.trim() : el[attr])); } return Array.from(context.querySelectorAll(selector)); } function qtext(context, selector, trim = true) { const el = q(context, selector, null, trim); if (!el) return null; const text = Array.from(el.childNodes) .filter(node => node.nodeName === '#text') .map(node => (trim ? node.textContent : node.textContent.trim())) .join(' '); if (trim) return text.trim(); return text; } function qmeta(context, selector, attrArg = 'content', trim = true) { if (/meta\[.*\]/.test(selector)) { return q(context, selector, attrArg, trim); } return q(context, `meta[${selector}]`, attrArg, trim); } function date(dateString, format, match) { if (match) { const dateStamp = dateString.trim().match(match); if (dateStamp) return moment.utc(dateStamp[0], format).toDate(); return null; } return moment.utc(dateString.trim(), format).toDate(); } function qdate(context, selector, format, match, attr = 'textContent') { const dateString = q(context, selector, attr, true); if (!dateString) return null; return date(dateString, format, match); } function qimage(context, selector = 'img', attr = 'src', protocol = 'https') { const image = q(context, selector, attr); // no attribute means q output will be HTML element return attr ? prefixProtocol(image, protocol) : image; } function qimages(context, selector = 'img', attr = 'src', protocol = 'https') { const images = qall(context, selector, attr); return attr ? images.map(image => prefixProtocol(image, protocol)) : images; } function qurl(context, selector = 'a', attr = 'href', protocol = 'https') { const url = q(context, selector, attr); return attr ? prefixProtocol(url, protocol) : url; } function qurls(context, selector = 'a', attr = 'href', protocol = 'https') { const urls = qall(context, selector, attr); return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls; } function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') { const poster = q(context, selector, attr); return attr ? prefixProtocol(poster, protocol) : poster; } function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') { const trailer = q(context, selector, attr); return attr ? prefixProtocol(trailer, protocol) : trailer; } function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') { const trailers = qall(context, selector, attr); return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers; } function qlength(context, selector, match, attr = 'textContent') { const durationString = q(context, selector, attr); if (!durationString) return null; const duration = durationString.match(match || /(\d+:)?\d+:\d+/); if (duration) { const segments = ['00'].concat(duration[0].split(':')).slice(-3); return moment.duration(segments.join(':')).asSeconds(); } return null; } const funcs = { q, qall, qdate, qimage, qimages, qposter, qlength, qmeta, qtext, qtrailer, qtrailers, qurl, qurls, qa: qall, qd: qdate, qi: qimage, qis: qimages, qp: qposter, ql: qlength, qm: qmeta, qt: qtrailer, qts: qtrailers, qtx: qtext, qu: qurl, qus: qurls, }; function init(element, window) { if (!element) return null; const contextFuncs = Object.entries(funcs) // dynamically attach methods with context .reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context ? func(...args) : func(element, ...args)), }), {}); return { element, html: element.outerHTML || element.body.outerHTML, ...(window && { window, document: window.document, }), ...contextFuncs, }; } function initAll(context, selector, window) { return Array.from(context.querySelectorAll(selector)) .map(element => init(element, window)); } function extract(html, selector) { const { window } = new JSDOM(html); if (selector) { return init(window.document.querySelector(selector), window); } return init(window.document, window); } function extractAll(html, selector) { const { window } = new JSDOM(html); return initAll(window.document, selector, window); } async function get(url, selector, headers, all = false) { const res = await bhttp.get(url, { headers, }); if (res.statusCode === 200) { return all ? extractAll(res.body.toString(), selector) : extract(res.body.toString(), selector); } return null; } async function getAll(url, selector, headers) { return get(url, selector, headers, true); } module.exports = { date, extract, extractAll, init, initAll, get, getAll, context: init, contextAll: initAll, d: date, ex: extract, exa: extractAll, ctx: init, ctxa: initAll, geta: getAll, ...funcs, };