Updated Dorcel scraper, added movie support.
This commit is contained in:
@@ -242,6 +242,42 @@ function urls(context, selector = 'a', attr = 'href', { origin, protocol = 'http
|
||||
return attr ? urlEls.map(urlEl => prefixUrl(urlEl, origin, protocol)) : urlEls;
|
||||
}
|
||||
|
||||
function sourceSet(context, selector, attr, options = {}) {
|
||||
const srcset = q(context, selector, attr);
|
||||
|
||||
if (!srcset) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sources = srcset
|
||||
.split(/\s*,\s*/)
|
||||
.map((source) => {
|
||||
const [link, descriptor] = source.split(' ');
|
||||
|
||||
return {
|
||||
descriptor: descriptor || 'fallback',
|
||||
url: prefixUrl(link, options.origin, options.protocol),
|
||||
};
|
||||
})
|
||||
.sort((sourceA, sourceB) => {
|
||||
if (sourceB.descriptor === 'fallback' || parseInt(sourceA.descriptor, 10) > parseInt(sourceB.descriptor, 10)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (parseInt(sourceA.descriptor, 10) < parseInt(sourceB.descriptor, 10)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
if (options.includeDescriptor) {
|
||||
return sources;
|
||||
}
|
||||
|
||||
return sources.map(source => source.url);
|
||||
}
|
||||
|
||||
function poster(context, selector = 'video', attr = 'poster', { origin, protocol = 'https' } = {}) {
|
||||
const posterEl = q(context, selector, attr);
|
||||
|
||||
@@ -267,17 +303,17 @@ function duration(context, selector, match, attr = 'textContent') {
|
||||
const durationMatch = durationString.match(match || /(\d+:)?\d+:\d+/);
|
||||
|
||||
if (durationMatch) {
|
||||
const segments = ['00'].concat(durationMatch[0].split(':')).slice(-3);
|
||||
const segments = ['00'].concat(durationMatch[0].split(/[:hm]/)).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
const timestampMatch = durationString.match(/T(\d+H)?(\d+M)?\d+S/);
|
||||
const timestampMatch = durationString.match(/(\d+H)?\s*(\d+M)?\s*\d+S?/i);
|
||||
|
||||
if (timestampMatch) {
|
||||
const hours = timestampMatch[0].match(/(\d+)H/)?.[1] || 0;
|
||||
const minutes = timestampMatch[0].match(/(\d+)M/)?.[1] || 0;
|
||||
const seconds = timestampMatch[0].match(/(\d+)S/)?.[1] || 0;
|
||||
const hours = timestampMatch[0].match(/(\d+)H/i)?.[1] || 0;
|
||||
const minutes = timestampMatch[0].match(/(\d+)M/i)?.[1] || 0;
|
||||
const seconds = timestampMatch[0].match(/(\d+)(S|$)/i)?.[1] || 0;
|
||||
|
||||
return (Number(hours) * 3600) + (Number(minutes) * 60) + Number(seconds);
|
||||
}
|
||||
@@ -345,6 +381,10 @@ const quFuncs = {
|
||||
num: number,
|
||||
poster,
|
||||
q,
|
||||
sourceSet,
|
||||
sources: sourceSet,
|
||||
srcs: sourceSet,
|
||||
srcset: sourceSet,
|
||||
style,
|
||||
styles,
|
||||
text,
|
||||
@@ -415,10 +455,12 @@ function extractAll(htmlValue, selector) {
|
||||
return initAll(window.document, selector, window);
|
||||
}
|
||||
|
||||
async function get(urlValue, selector, headers, options, queryAll = false) {
|
||||
const res = await http.get(urlValue, headers, options);
|
||||
async function request(method = 'get', urlValue, body, selector, headers, options, queryAll = false) {
|
||||
const res = await (method === 'post'
|
||||
? http.post(urlValue, body, headers, options)
|
||||
: http[method](urlValue, headers, options));
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
if (res.ok) {
|
||||
const item = queryAll
|
||||
? extractAll(res.body.toString(), selector)
|
||||
: extract(res.body.toString(), selector);
|
||||
@@ -443,8 +485,20 @@ async function get(urlValue, selector, headers, options, queryAll = false) {
|
||||
};
|
||||
}
|
||||
|
||||
async function get(urlValue, selector, headers, options) {
|
||||
return request('get', urlValue, null, selector, headers, options, false);
|
||||
}
|
||||
|
||||
async function post(urlValue, body, selector, headers, options) {
|
||||
return request('post', urlValue, body, selector, headers, options, false);
|
||||
}
|
||||
|
||||
async function getAll(urlValue, selector, headers, options) {
|
||||
return get(urlValue, selector, headers, options, true);
|
||||
return request('get,', urlValue, selector, headers, options, true);
|
||||
}
|
||||
|
||||
async function postAll(urlValue, body, selector, headers, options) {
|
||||
return request('post', urlValue, body, selector, headers, options, true);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
@@ -470,6 +524,8 @@ module.exports = {
|
||||
geta: getAll,
|
||||
qu: quFuncs,
|
||||
query: quFuncs,
|
||||
post,
|
||||
postAll,
|
||||
prefixUrl,
|
||||
...legacyFuncs,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user