Added execute method to qu, removed runScripts from Gamma's fetchMovie to observe effect on memory usage.
This commit is contained in:
parent
5f89c6e14c
commit
dc187a9a3a
|
@ -463,7 +463,9 @@ async function fetchMovieTrailer(release) {
|
|||
|
||||
async function scrapeMovie({ query, el }, window, url, entity, options) {
|
||||
const release = {};
|
||||
const rawData = window.dataLayer[0]?.dvdDetails;
|
||||
|
||||
const { dataLayer } = query.exec('//script[contains(text(), "dataLayer")]', ['dataLayer']);
|
||||
const rawData = dataLayer?.[0]?.dvdDetails;
|
||||
const data = rawData.dvdId && rawData; // dvdDetails is mostly empty in some cache states
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)(\/|$)/)?.[1];
|
||||
|
@ -750,11 +752,7 @@ async function fetchScene(url, site, baseRelease, options) {
|
|||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease, options) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
extract: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
});
|
||||
const res = await qu.get(url, null, null);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeMovie(res.item, res.window, url, channel, options);
|
||||
|
|
|
@ -84,32 +84,69 @@ function prefixUrl(urlValue, origin, protocol = 'https') {
|
|||
return urlValue;
|
||||
}
|
||||
|
||||
function iterateXPathResult(iterator, results = []) {
|
||||
const element = iterator.iterateNext();
|
||||
|
||||
if (element) {
|
||||
return iterateXPathResult(iterator, [...results, element]);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
function getElements(context, selector, first = false) {
|
||||
if (!selector) {
|
||||
return context;
|
||||
}
|
||||
|
||||
if (/^\/\//.test(selector)) {
|
||||
// XPath selector
|
||||
const iterator = globalWindow.document.evaluate(selector, context, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
|
||||
if (first) {
|
||||
return iterator.iterateNext();
|
||||
}
|
||||
|
||||
return iterateXPathResult(iterator);
|
||||
}
|
||||
|
||||
if (first) {
|
||||
return context.querySelector(selector);
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
if (!selector && context.nodeName === '#document') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
const element = getElements(context, selector, true);
|
||||
|
||||
if (!element) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (attr) {
|
||||
const value = selector
|
||||
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
|
||||
: context[attr] || context.getAttribute(attr);
|
||||
const value = element[attr] || element.getAttribute(attr);
|
||||
|
||||
return applyTrim && typeof value === 'string' ? trim(value) : value;
|
||||
}
|
||||
|
||||
return selector ? context.querySelector(selector) : context;
|
||||
return element;
|
||||
}
|
||||
|
||||
function all(context, selector, attrArg, applyTrim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
const elements = getElements(context, selector);
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), (el) => q(el, null, attr, applyTrim));
|
||||
return elements.map((el) => q(el, null, attr, applyTrim));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
return elements;
|
||||
}
|
||||
|
||||
function exists(context, selector) {
|
||||
|
@ -134,6 +171,42 @@ function html(context, selector) {
|
|||
return el && el.innerHTML;
|
||||
}
|
||||
|
||||
function htmls(context, selector) {
|
||||
const els = all(context, selector, null, true);
|
||||
|
||||
return els.map((el) => el.innerHTML);
|
||||
}
|
||||
|
||||
function execute(context, selector = 'script') {
|
||||
const scripts = htmls(context, selector);
|
||||
const originalGlobal = Object.fromEntries(Object.entries(global));
|
||||
|
||||
const errors = scripts?.reduce((accErrors, script) => {
|
||||
try {
|
||||
Function(script)(); /* eslint-disable-line no-new-func */
|
||||
|
||||
return accErrors;
|
||||
} catch (error) {
|
||||
// the script failed
|
||||
return [...accErrors, error];
|
||||
}
|
||||
}, []);
|
||||
|
||||
const data = Object.fromEntries(Object.entries(global).filter(([key, value]) => {
|
||||
if (originalGlobal[key] !== value) {
|
||||
delete global[key];
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}));
|
||||
|
||||
return {
|
||||
...data,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
|
||||
function json(context, selector) {
|
||||
const el = q(context, selector, null, true);
|
||||
|
||||
|
@ -156,12 +229,6 @@ function jsons(context, selector) {
|
|||
});
|
||||
}
|
||||
|
||||
function htmls(context, selector) {
|
||||
const els = all(context, selector, null, true);
|
||||
|
||||
return els.map((el) => el.innerHTML);
|
||||
}
|
||||
|
||||
function texts(context, selector, applyTrim = true, filter = true) {
|
||||
const el = q(context, selector, null, applyTrim);
|
||||
if (!el) return null;
|
||||
|
@ -429,6 +496,8 @@ const quFuncs = {
|
|||
duration,
|
||||
el: q,
|
||||
element: q,
|
||||
execute,
|
||||
exec: execute,
|
||||
exists,
|
||||
html,
|
||||
htmls,
|
||||
|
|
Loading…
Reference in New Issue