'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const http = require('./http'); function trim(str) { if (!str) return null; return str.trim().replace(/\s+/g, ' '); } function extractDate(dateString, format, match) { if (match) { const dateStamp = trim(dateString).match(match); if (dateStamp) { const date = moment.utc(dateStamp[0], format); return date.isValid() ? date.toDate() : null; } return null; } const date = moment.utc(trim(dateString), format); return date.isValid() ? date.toDate() : null; } function formatDate(date, format, inputFormat) { if (inputFormat) return moment(date, inputFormat).format(format); return moment(date).format(format); } function prefixProtocol(url, protocol = 'https') { if (protocol && /^\/\//.test(url)) { return `${protocol}:${url}`; } return url; } function q(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { const value = selector ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value : context[attr] || context[attr]?.attributes[attr]?.value; return applyTrim && value ? trim(value) : value; } return selector ? context.querySelector(selector) : context; } function qall(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr])); } return Array.from(context.querySelectorAll(selector)); } function qhtml(context, selector) { const el = q(context, selector, null, true); return el && el.innerHTML; } function qtexts(context, selector, applyTrim = true, filter = true) { const el = q(context, selector, null, applyTrim); if (!el) return null; const nodes = Array.from(el.childNodes) .filter(node => node.nodeName === '#text') .map(node => (applyTrim ? trim(node.textContent) : node.textContent)); return filter ? nodes.filter(Boolean) : nodes; } function qtext(context, selector, applyTrim = true) { const nodes = qtexts(context, selector, applyTrim, true); if (!nodes) return null; const text = nodes.join(' '); return applyTrim ? trim(text) : text; } function qmeta(context, selector, attrArg = 'content', applyTrim = true) { if (/meta\[.*\]/.test(selector)) { return q(context, selector, attrArg, applyTrim); } return q(context, `meta[${selector}]`, attrArg, applyTrim); } function qdate(context, selector, format, match, attr = 'textContent') { const dateString = q(context, selector, attr, true); if (!dateString) return null; return extractDate(dateString, format, match); } function qimage(context, selector = 'img', attr = 'src', protocol = 'https') { const image = q(context, selector, attr); // no attribute means q output will be HTML element return attr ? prefixProtocol(image, protocol) : image; } function qimages(context, selector = 'img', attr = 'src', protocol = 'https') { const images = qall(context, selector, attr); return attr ? images.map(image => prefixProtocol(image, protocol)) : images; } function qurl(context, selector = 'a', attr = 'href', protocol = 'https') { const url = q(context, selector, attr); return attr ? prefixProtocol(url, protocol) : url; } function qurls(context, selector = 'a', attr = 'href', protocol = 'https') { const urls = qall(context, selector, attr); return attr ? urls.map(url => prefixProtocol(url, protocol)) : urls; } function qposter(context, selector = 'video', attr = 'poster', protocol = 'https') { const poster = q(context, selector, attr); return attr ? prefixProtocol(poster, protocol) : poster; } function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https') { const trailer = q(context, selector, attr); return attr ? prefixProtocol(trailer, protocol) : trailer; } function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') { const trailers = qall(context, selector, attr); return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers; } function qlength(context, selector, match, attr = 'textContent') { const durationString = q(context, selector, attr); if (!durationString) return null; const duration = durationString.match(match || /(\d+:)?\d+:\d+/); if (duration) { const segments = ['00'].concat(duration[0].split(':')).slice(-3); return moment.duration(segments.join(':')).asSeconds(); } return null; } const funcs = { q, qa: qall, qall, qd: qdate, qdate, qh: qhtml, qhtml, qi: qimage, qimage, qimages, qis: qimages, ql: qlength, qlength, qm: qmeta, qmeta, qp: qposter, qposter, qs: qall, qt: qtrailer, qtext, qtexts, qtrailer, qtrailers, qts: qtrailers, qtx: qtext, qtxs: qtexts, qtxt: qtext, qtxts: qtexts, qu: qurl, qurl, qurls, qus: qurls, }; function init(element, window) { if (!element) return null; const contextFuncs = Object.entries(funcs) // dynamically attach methods with context .reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context ? func(...args) : func(element, ...args)), }), {}); return { element, el: element, html: element.outerHTML || element.body.outerHTML, text: trim(element.textContent), ...(window && { window, document: window.document, }), ...contextFuncs, }; } function initAll(context, selector, window) { if (Array.isArray(context)) { return context.map(element => init(element, window)); } return Array.from(context.querySelectorAll(selector)) .map(element => init(element, window)); } function extract(html, selector) { const { window } = new JSDOM(html); if (selector) { return init(window.document.querySelector(selector), window); } return init(window.document, window); } function extractAll(html, selector) { const { window } = new JSDOM(html); return initAll(window.document, selector, window); } async function get(url, selector, headers, all = false) { const res = await http.get(url, { headers, }); if (res.statusCode === 200) { const item = all ? extractAll(res.body.toString(), selector) : extract(res.body.toString(), selector); return { item, items: all ? item : [item], res, ok: true, status: res.statusCode, }; } return { item: null, items: [], res, ok: false, status: res.statusCode, }; } async function getAll(url, selector, headers) { return get(url, selector, headers, true); } module.exports = { extractDate, extract, extractAll, init, initAll, formatDate, get, getAll, context: init, contextAll: initAll, ed: extractDate, ex: extract, exa: extractAll, fd: formatDate, ctx: init, ctxa: initAll, geta: getAll, edate: extractDate, fdate: formatDate, ...funcs, };