Restored Mike Adriano scraper.

This commit is contained in:
DebaucheryLibrarian 2020-12-29 02:05:22 +01:00
parent 5476597343
commit bc944c2373
2 changed files with 12 additions and 16 deletions

View File

@ -37,8 +37,6 @@ async function scrapeScene({ query }, url) {
const pathname = new URL(url).pathname;
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
console.log(release);
release.title = query.cnt('.content-page-info .title');
release.description = query.cnt('.content-page-info .desc');
release.date = query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY');
@ -73,29 +71,23 @@ async function fetchScene(url, channel) {
const cookieJar = http.cookieJar();
const session = http.session({ cookieJar });
console.log(cookieJar);
/* not working
const resA = await http.get(url, {
session,
extract: {
cookieJar,
// runScripts: 'dangerously',
runScripts: 'dangerously',
},
});
console.log(resA.headers, cookieJar.getCookiesSync(url));
const cookie = cookieJar.getCookieStringSync(url);
cookieJar.setCookieSync(http.toughCookie.Cookie.parse(resA.document.cookie), url);
console.log(cookie);
console.log(res.req);
*/
const res = await http.get(url, {
headers: {
cookie,
},
session,
});
// console.log(res.req);
if (res.ok) {
const item = qu.init(res.document);

View File

@ -6,7 +6,7 @@ const util = require('util');
const stream = require('stream');
const tunnel = require('tunnel');
const Bottleneck = require('bottleneck');
const { JSDOM, CookieJar } = require('jsdom');
const { JSDOM, toughCookie } = require('jsdom');
const logger = require('../logger')(__filename);
const virtualConsole = require('./virtual-console')(__filename);
@ -166,10 +166,14 @@ function getSession(options) {
}
function getCookieJar(store, options) {
return new CookieJar(store, options);
return new toughCookie.CookieJar(store, {
looseMode: true,
...options,
});
}
module.exports = {
toughCookie,
get,
head,
post,