From dc187a9a3a97d5f1e4fdda58810828757031a12a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 19 Dec 2021 23:03:44 +0100 Subject: [PATCH] Added execute method to qu, removed runScripts from Gamma's fetchMovie to observe effect on memory usage. --- src/scrapers/gamma.js | 10 ++--- src/utils/qu.js | 93 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index da338d6d..9ddc1abb 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -463,7 +463,9 @@ async function fetchMovieTrailer(release) { async function scrapeMovie({ query, el }, window, url, entity, options) { const release = {}; - const rawData = window.dataLayer[0]?.dvdDetails; + + const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']); + const rawData = dataLayer?.[0]?.dvdDetails; const data = rawData.dvdId && rawData; // dvdDetails is mostly empty in some cache states release.entryId = new URL(url).pathname.match(/\/(\d+)(\/|$)/)?.[1]; @@ -750,11 +752,7 @@ async function fetchScene(url, site, baseRelease, options) { } async function fetchMovie(url, channel, baseRelease, options) { - const res = await qu.get(url, null, null, { - extract: { - runScripts: 'dangerously', - }, - }); + const res = await qu.get(url, null, null); if (res.ok) { return scrapeMovie(res.item, res.window, url, channel, options); diff --git a/src/utils/qu.js b/src/utils/qu.js index 0c7b54fe..1989c650 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -84,32 +84,69 @@ function prefixUrl(urlValue, origin, protocol = 'https') { return urlValue; } +function iterateXPathResult(iterator, results = []) { + const element = iterator.iterateNext(); + + if (element) { + return iterateXPathResult(iterator, [...results, element]); + } + + return results; +} + +function getElements(context, selector, first = false) { + if (!selector) { + return context; + } + + if (/^\/\//.test(selector)) { + // XPath selector + const iterator = globalWindow.document.evaluate(selector, context, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); + + if (first) { + return iterator.iterateNext(); + } + + return iterateXPathResult(iterator); + } + + if (first) { + return context.querySelector(selector); + } + + return Array.from(context.querySelectorAll(selector)); +} + function q(context, selector, attrArg, applyTrim = true) { if (!selector && context.nodeName === '#document') { return null; } const attr = attrArg === true ? 'textContent' : attrArg; + const element = getElements(context, selector, true); + + if (!element) { + return null; + } if (attr) { - const value = selector - ? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value - : context[attr] || context.getAttribute(attr); + const value = element[attr] || element.getAttribute(attr); return applyTrim && typeof value === 'string' ? trim(value) : value; } - return selector ? context.querySelector(selector) : context; + return element; } function all(context, selector, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; + const elements = getElements(context, selector); if (attr) { - return Array.from(context.querySelectorAll(selector), (el) => q(el, null, attr, applyTrim)); + return elements.map((el) => q(el, null, attr, applyTrim)); } - return Array.from(context.querySelectorAll(selector)); + return elements; } function exists(context, selector) { @@ -134,6 +171,42 @@ function html(context, selector) { return el && el.innerHTML; } +function htmls(context, selector) { + const els = all(context, selector, null, true); + + return els.map((el) => el.innerHTML); +} + +function execute(context, selector = 'script') { + const scripts = htmls(context, selector); + const originalGlobal = Object.fromEntries(Object.entries(global)); + + const errors = scripts?.reduce((accErrors, script) => { + try { + Function(script)(); /* eslint-disable-line no-new-func */ + + return accErrors; + } catch (error) { + // the script failed + return [...accErrors, error]; + } + }, []); + + const data = Object.fromEntries(Object.entries(global).filter(([key, value]) => { + if (originalGlobal[key] !== value) { + delete global[key]; + return true; + } + + return false; + })); + + return { + ...data, + errors, + }; +} + function json(context, selector) { const el = q(context, selector, null, true); @@ -156,12 +229,6 @@ function jsons(context, selector) { }); } -function htmls(context, selector) { - const els = all(context, selector, null, true); - - return els.map((el) => el.innerHTML); -} - function texts(context, selector, applyTrim = true, filter = true) { const el = q(context, selector, null, applyTrim); if (!el) return null; @@ -429,6 +496,8 @@ const quFuncs = { duration, el: q, element: q, + execute, + exec: execute, exists, html, htmls,