Added Fame Digital. Added actor release scraping to DDF Network. Improved q and Gamma scraper.

This commit is contained in:
2020-02-06 23:15:28 +01:00
parent db14eaa5f9
commit 6e1de52a40
42 changed files with 752 additions and 168 deletions

View File

@@ -17,8 +17,8 @@ function q(context, selector, attrArg, trim = true) {
if (attr) {
const value = selector
? context.querySelector(selector)?.[attr]
: context[attr];
? context.querySelector(selector)?.[attr] || context.querySelector(selector)?.attributes[attr]?.value
: context[attr] || context[attr]?.attributes[attr]?.value;
return trim ? value?.trim() : value;
}
@@ -50,16 +50,14 @@ function qtext(context, selector, trim = true) {
}
function qmeta(context, selector, attrArg = 'content', trim = true) {
return q(context, selector, attrArg, trim);
if (/meta\[.*\]/.test(selector)) {
return q(context, selector, attrArg, trim);
}
return q(context, `meta[${selector}]`, attrArg, trim);
}
function qdate(context, selector, format, match, attr = 'textContent') {
const dateString = selector
? context.querySelector(selector)?.[attr]
: context[attr];
if (!dateString) return null;
function date(dateString, format, match) {
if (match) {
const dateStamp = dateString.trim().match(match);
@@ -70,6 +68,14 @@ function qdate(context, selector, format, match, attr = 'textContent') {
return moment.utc(dateString.trim(), format).toDate();
}
function qdate(context, selector, format, match, attr = 'textContent') {
const dateString = q(context, selector, attr, true);
if (!dateString) return null;
return date(dateString, format, match);
}
function qimage(context, selector = 'img', attr = 'src', protocol = 'https') {
const image = q(context, selector, attr);
@@ -107,6 +113,12 @@ function qtrailer(context, selector = 'source', attr = 'src', protocol = 'https'
return attr ? prefixProtocol(trailer, protocol) : trailer;
}
function qtrailers(context, selector = 'source', attr = 'src', protocol = 'https') {
const trailers = qall(context, selector, attr);
return attr ? trailers.map(trailer => prefixProtocol(trailer, protocol)) : trailers;
}
function qlength(context, selector, attr = 'textContent') {
const durationString = q(context, selector, attr);
@@ -133,8 +145,9 @@ const funcs = {
qmeta,
qtext,
qtrailer,
qurls,
qtrailers,
qurl,
qurls,
qa: qall,
qd: qdate,
qi: qimage,
@@ -143,6 +156,7 @@ const funcs = {
ql: qlength,
qm: qmeta,
qt: qtrailer,
qts: qtrailers,
qtx: qtext,
qu: qurl,
qus: qurls,
@@ -161,6 +175,7 @@ function init(element, window) {
return {
element,
html: element.outerHTML || element.body.outerHTML,
...(window && {
window,
document: window.document,
@@ -209,6 +224,7 @@ async function getAll(url, selector, headers) {
}
module.exports = {
date,
extract,
extractAll,
init,
@@ -217,6 +233,7 @@ module.exports = {
getAll,
context: init,
contextAll: initAll,
d: date,
ex: extract,
exa: extractAll,
ctx: init,