'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const http = require('./http'); function trim(str) { if (!str) return null; return str.trim().replace(/\s+/g, ' '); } function extractDate(dateString, format, match) { if (match) { const dateStamp = trim(dateString).match(match); if (dateStamp) { const dateValue = moment.utc(dateStamp[0], format); return dateValue.isValid() ? dateValue.toDate() : null; } return null; } const dateValue = moment.utc(trim(dateString), format); return dateValue.isValid() ? dateValue.toDate() : null; } function formatDate(dateValue, format, inputFormat) { if (inputFormat) { return moment(dateValue, inputFormat).format(format); } return moment(dateValue).format(format); } function prefixProtocol(urlValue, protocol = 'https') { if (protocol && /^\/\//.test(urlValue)) { return `${protocol}:${urlValue}`; } return urlValue; } function q(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { const value = selector ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value : context[attr] || context[attr]?.attributes[attr]?.value; return applyTrim && value ? trim(value) : value; } return selector ? context.querySelector(selector) : context; } function all(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; if (attr) { return Array.from(context.querySelectorAll(selector), el => (applyTrim && el[attr] ? trim(el[attr]) : el[attr])); } return Array.from(context.querySelectorAll(selector)); } function exists(context, selector) { return !!q(context, selector); } function html(context, selector) { const el = q(context, selector, null, true); return el && el.innerHTML; } function texts(context, selector, applyTrim = true, filter = true) { const el = q(context, selector, null, applyTrim); if (!el) return null; const nodes = Array.from(el.childNodes) .filter(node => node.nodeName === '#text') .map(node => (applyTrim ? trim(node.textContent) : node.textContent)); return filter ? nodes.filter(Boolean) : nodes; } function text(context, selector, applyTrim = true) { const nodes = texts(context, selector, applyTrim, true); if (!nodes) return null; const textValue = nodes.join(' '); return applyTrim ? trim(textValue) : textValue; } function meta(context, selector, attrArg = 'content', applyTrim = true) { if (/meta\[.*\]/.test(selector)) { return q(context, selector, attrArg, applyTrim); } return q(context, `meta[${selector}]`, attrArg, applyTrim); } function date(context, selector, format, match, attr = 'textContent') { const dateString = q(context, selector, attr, true); if (!dateString) return null; return extractDate(dateString, format, match); } function image(context, selector = 'img', attr = 'src', protocol = 'https') { const imageEl = q(context, selector, attr); // no attribute means q output will be HTML element return attr ? prefixProtocol(imageEl, protocol) : imageEl; } function images(context, selector = 'img', attr = 'src', protocol = 'https') { const imageEls = all(context, selector, attr); return attr ? imageEls.map(imageEl => prefixProtocol(imageEl, protocol)) : imageEls; } function url(context, selector = 'a', attr = 'href', protocol = 'https') { const urlEl = q(context, selector, attr); return attr ? prefixProtocol(urlEl, protocol) : urlEl; } function urls(context, selector = 'a', attr = 'href', protocol = 'https') { const urlEls = all(context, selector, attr); return attr ? urlEls.map(urlEl => prefixProtocol(urlEl, protocol)) : urlEls; } function poster(context, selector = 'video', attr = 'poster', protocol = 'https') { const posterEl = q(context, selector, attr); return attr ? prefixProtocol(posterEl, protocol) : posterEl; } function video(context, selector = 'source', attr = 'src', protocol = 'https') { const trailerEl = q(context, selector, attr); return attr ? prefixProtocol(trailerEl, protocol) : trailerEl; } function videos(context, selector = 'source', attr = 'src', protocol = 'https') { const trailerEls = all(context, selector, attr); return attr ? trailerEls.map(trailerEl => prefixProtocol(trailerEl, protocol)) : trailerEls; } function duration(context, selector, match, attr = 'textContent') { const durationString = q(context, selector, attr); if (!durationString) return null; const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/); if (durationMatch) { const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3); return moment.duration(segments.join(':')).asSeconds(); } return null; } const legacyFuncs = { q, qa: all, qall: all, qd: date, qdate: date, qh: html, qhtml: html, qi: image, qimage: image, qimages: images, qis: images, ql: duration, qlength: duration, qm: meta, qmeta: meta, qp: poster, qposter: poster, qs: all, qt: video, qtext: text, qtexts: texts, qtrailer: video, qtrailers: videos, qts: videos, qtx: text, qtxs: texts, qtxt: text, qtxts: texts, // qu: url, qurl: url, qurls: urls, qus: urls, }; const quFuncs = { all, html, date, dur: duration, duration, exists, image, images, img: image, imgs: images, length: duration, meta, poster, q, text, texts, trailer: video, url, urls, video, videos, }; function init(element, window) { if (!element) return null; const legacyContextFuncs = Object.entries(legacyFuncs) // dynamically attach methods with context .reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context ? func(...args) : func(element, ...args)), }), {}); const quContextFuncs = Object.entries(quFuncs) // dynamically attach methods with context .reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => (window && args[0] instanceof window.HTMLElement // allow for different context ? func(...args) : func(element, ...args)), }), {}); return { element, el: element, html: element.outerHTML || element.body.outerHTML, text: trim(element.textContent), ...(window && { window, document: window.document, }), ...legacyContextFuncs, qu: quContextFuncs, }; } function initAll(context, selector, window) { if (Array.isArray(context)) { return context.map(element => init(element, window)); } return Array.from(context.querySelectorAll(selector)) .map(element => init(element, window)); } function extract(htmlValue, selector) { const { window } = new JSDOM(htmlValue); if (selector) { return init(window.document.querySelector(selector), window); } return init(window.document, window); } function extractAll(htmlValue, selector) { const { window } = new JSDOM(htmlValue); return initAll(window.document, selector, window); } async function get(urlValue, selector, headers, options, queryAll = false) { const res = await http.get(urlValue, headers); if (res.statusCode === 200) { const item = queryAll ? extractAll(res.body.toString(), selector) : extract(res.body.toString(), selector); return { item, items: all ? item : [item], res, ok: true, status: res.statusCode, }; } return { item: null, items: [], res, ok: false, status: res.statusCode, }; } async function getAll(urlValue, selector, headers, options) { return get(urlValue, selector, headers, options, true); } module.exports = { extractDate, extract, extractAll, init, initAll, formatDate, get, getAll, context: init, contextAll: initAll, ed: extractDate, ex: extract, exa: extractAll, fd: formatDate, parseDate: extractDate, ctx: init, ctxa: initAll, geta: getAll, qu: quFuncs, ...legacyFuncs, };