Restored Mike Adriano scraper.

This commit is contained in:
DebaucheryLibrarian 2020-12-29 02:05:22 +01:00
parent 5476597343
commit bc944c2373
2 changed files with 12 additions and 16 deletions

View File

@ -37,8 +37,6 @@ async function scrapeScene({ query }, url) {
const pathname = new URL(url).pathname; const pathname = new URL(url).pathname;
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1]; release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
console.log(release);
release.title = query.cnt('.content-page-info .title'); release.title = query.cnt('.content-page-info .title');
release.description = query.cnt('.content-page-info .desc'); release.description = query.cnt('.content-page-info .desc');
release.date = query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY'); release.date = query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY');
@ -73,29 +71,23 @@ async function fetchScene(url, channel) {
const cookieJar = http.cookieJar(); const cookieJar = http.cookieJar();
const session = http.session({ cookieJar }); const session = http.session({ cookieJar });
console.log(cookieJar); /* not working
const resA = await http.get(url, { const resA = await http.get(url, {
session, session,
extract: { extract: {
cookieJar, runScripts: 'dangerously',
// runScripts: 'dangerously',
}, },
}); });
console.log(resA.headers, cookieJar.getCookiesSync(url)); cookieJar.setCookieSync(http.toughCookie.Cookie.parse(resA.document.cookie), url);
const cookie = cookieJar.getCookieStringSync(url);
console.log(cookie); console.log(res.req);
*/
const res = await http.get(url, { const res = await http.get(url, {
headers: { session,
cookie,
},
}); });
// console.log(res.req);
if (res.ok) { if (res.ok) {
const item = qu.init(res.document); const item = qu.init(res.document);

View File

@ -6,7 +6,7 @@ const util = require('util');
const stream = require('stream'); const stream = require('stream');
const tunnel = require('tunnel'); const tunnel = require('tunnel');
const Bottleneck = require('bottleneck'); const Bottleneck = require('bottleneck');
const { JSDOM, CookieJar } = require('jsdom'); const { JSDOM, toughCookie } = require('jsdom');
const logger = require('../logger')(__filename); const logger = require('../logger')(__filename);
const virtualConsole = require('./virtual-console')(__filename); const virtualConsole = require('./virtual-console')(__filename);
@ -166,10 +166,14 @@ function getSession(options) {
} }
function getCookieJar(store, options) { function getCookieJar(store, options) {
return new CookieJar(store, options); return new toughCookie.CookieJar(store, {
looseMode: true,
...options,
});
} }
module.exports = { module.exports = {
toughCookie,
get, get,
head, head,
post, post,