forked from DebaucheryLibrarian/traxxx
Added q scraping helper. Added Perfect Gonzo scraper.
This commit is contained in:
@@ -1,23 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
function q(context, selector) {
|
||||
return context.querySelector(selector);
|
||||
}
|
||||
|
||||
function qa(context, selector) {
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function ex(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return {
|
||||
document,
|
||||
q: selector => q(document, selector),
|
||||
qa: selector => qa(document, selector),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = ex;
|
||||
107
src/utils/q.js
Normal file
107
src/utils/q.js
Normal file
@@ -0,0 +1,107 @@
|
||||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function q(context, selector, attrArg, trim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
const value = context.querySelector(selector)[attr];
|
||||
|
||||
return trim ? value.trim() : value;
|
||||
}
|
||||
|
||||
return context.querySelector(selector);
|
||||
}
|
||||
|
||||
function qall(context, selector, attrArg, trim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => (trim ? el[attr]?.trim() : el[attr]));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function qdate(context, selector, format, match, attr = 'textContent') {
|
||||
const dateString = context.querySelector(selector)[attr];
|
||||
|
||||
if (match) {
|
||||
const dateStamp = dateString.match(match);
|
||||
|
||||
if (dateStamp) return moment.utc(dateStamp[0], format).toDate();
|
||||
return null;
|
||||
}
|
||||
|
||||
return moment.utc(dateString.trim(), format).toDate();
|
||||
}
|
||||
|
||||
function qimages(context, selector = 'img', attr = 'src') {
|
||||
return qall(context, selector, attr);
|
||||
}
|
||||
|
||||
function qposter(context, selector = 'video', attr = 'poster') {
|
||||
return q(context, selector, attr);
|
||||
}
|
||||
|
||||
function qtrailer(context, selector = 'source', attr = 'src') {
|
||||
return q(context, selector, attr);
|
||||
}
|
||||
|
||||
function qlength(context, selector, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
const duration = durationString.match(/(\d+:)?\d+:\d+/);
|
||||
|
||||
if (duration) {
|
||||
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const funcs = {
|
||||
q,
|
||||
qall,
|
||||
qdate,
|
||||
qimages,
|
||||
qposter,
|
||||
qlength,
|
||||
qtrailer,
|
||||
qa: qall,
|
||||
qd: qdate,
|
||||
qi: qimages,
|
||||
qp: qposter,
|
||||
ql: qlength,
|
||||
qt: qtrailer,
|
||||
};
|
||||
|
||||
function ctx(element) {
|
||||
const contextFuncs = Object.entries(funcs)
|
||||
.reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => func(element, ...args) }), {});
|
||||
|
||||
return {
|
||||
element,
|
||||
...contextFuncs,
|
||||
};
|
||||
}
|
||||
|
||||
function ctxa(context, selector) {
|
||||
return Array.from(context.querySelectorAll(selector)).map(element => ctx(element));
|
||||
}
|
||||
|
||||
function ex(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return ctx(document);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ex,
|
||||
ctx,
|
||||
ctxa,
|
||||
...funcs,
|
||||
};
|
||||
Reference in New Issue
Block a user