From 9ef5ea8fb6cae200fd8142a1b39f4658bc670fda Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 23 Jul 2020 00:55:55 +0200 Subject: [PATCH] Added the awkward pagination to PornCZ scraper. --- assets/components/filters/filter-bar.vue | 4 ++-- config/default.js | 1 + src/scrapers/porncz.js | 22 ++++++++++++++-------- src/utils/http.js | 12 ++++++++++-- src/utils/qu.js | 2 ++ 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/assets/components/filters/filter-bar.vue b/assets/components/filters/filter-bar.vue index 9692b062..c46baaf6 100644 --- a/assets/components/filters/filter-bar.vue +++ b/assets/components/filters/filter-bar.vue @@ -253,11 +253,11 @@ export default { } .filters-filter:not(:last-child) .filter { - padding: 1rem .5rem; + padding: .5rem; } .filters-filter:last-child .filter { - padding: 1rem 0 1rem .5rem; + padding: .5rem 0 .5rem .5rem; } } diff --git a/config/default.js b/config/default.js index c343e340..b35b32e2 100644 --- a/config/default.js +++ b/config/default.js @@ -170,6 +170,7 @@ module.exports = { 'silverstonedvd', 'silviasaint', ], + 'porncz', 'gangbangcreampie', 'gloryholesecrets', 'aziani', diff --git a/src/scrapers/porncz.js b/src/scrapers/porncz.js index 19025143..747a20d0 100644 --- a/src/scrapers/porncz.js +++ b/src/scrapers/porncz.js @@ -17,8 +17,6 @@ function scrapeAll(scenes, channel) { release.poster = query.img('.product-item-image img', 'src', { origin: channel.url }); - console.log(release); - return release; }); } @@ -65,15 +63,23 @@ function scrapeProfile({ query }, entity) { return profile; } -async function fetchLatest(channel, _page = 1) { - const url = `${channel.url}/en/new-videos?option=date&do=sort`; +async function fetchLatest(channel, page = 1) { + const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`; - // TODO: session - await http.head(url); - const res = await qu.getAll(url, '.product-item'); + // pagination state is kept in session, and new each 'page' includes all previous pages + const session = http.session(); + const headers = { 'X-Requested-With': 'XMLHttpRequest' }; + + for (let i = 0; i < page - 1; i += 1) { + await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop + } + + const res = await http.get(url, headers, { useSession: session }); if (res.ok) { - return scrapeAll(res.items, channel); + const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item'); + + return scrapeAll(items.slice((page - 1) * 16), channel); } return res.status; diff --git a/src/utils/http.js b/src/utils/http.js index 71c0ebe9..e59b80fa 100644 --- a/src/utils/http.js +++ b/src/utils/http.js @@ -65,8 +65,8 @@ async function handler({ } const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase()) - ? await bhttp[method.toLowerCase()](url, body, reqOptions) - : await bhttp[method.toLowerCase()](url, reqOptions); + ? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions) + : await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions); if (options?.stream && options?.destination) { await pipeline(res, ...(options?.transforms || []), options?.destination); @@ -131,8 +131,16 @@ async function post(url, body, headers, options) { }); } +function session(headers, options) { + return bhttp.session({ + headers, + options, + }); +} + module.exports = { get, post, head, + session, }; diff --git a/src/utils/qu.js b/src/utils/qu.js index d10cc652..847d2487 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -435,6 +435,8 @@ module.exports = { formatDate, get, getAll, + fetch: get, + fetchAll: getAll, context: init, contextAll: initAll, ed: extractDate,