Added the awkward pagination to PornCZ scraper.

This commit is contained in:
DebaucheryLibrarian 2020-07-23 00:55:55 +02:00
parent 46c6c4dd21
commit 9ef5ea8fb6
5 changed files with 29 additions and 12 deletions

View File

@ -253,11 +253,11 @@ export default {
}
.filters-filter:not(:last-child) .filter {
padding: 1rem .5rem;
padding: .5rem;
}
.filters-filter:last-child .filter {
padding: 1rem 0 1rem .5rem;
padding: .5rem 0 .5rem .5rem;
}
}
</style>

View File

@ -170,6 +170,7 @@ module.exports = {
'silverstonedvd',
'silviasaint',
],
'porncz',
'gangbangcreampie',
'gloryholesecrets',
'aziani',

View File

@ -17,8 +17,6 @@ function scrapeAll(scenes, channel) {
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
console.log(release);
return release;
});
}
@ -65,15 +63,23 @@ function scrapeProfile({ query }, entity) {
return profile;
}
async function fetchLatest(channel, _page = 1) {
const url = `${channel.url}/en/new-videos?option=date&do=sort`;
async function fetchLatest(channel, page = 1) {
const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
// TODO: session
await http.head(url);
const res = await qu.getAll(url, '.product-item');
// pagination state is kept in session, and new each 'page' includes all previous pages
const session = http.session();
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
for (let i = 0; i < page - 1; i += 1) {
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
}
const res = await http.get(url, headers, { useSession: session });
if (res.ok) {
return scrapeAll(res.items, channel);
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
return scrapeAll(items.slice((page - 1) * 16), channel);
}
return res.status;

View File

@ -65,8 +65,8 @@ async function handler({
}
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
? await bhttp[method.toLowerCase()](url, body, reqOptions)
: await bhttp[method.toLowerCase()](url, reqOptions);
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
if (options?.stream && options?.destination) {
await pipeline(res, ...(options?.transforms || []), options?.destination);
@ -131,8 +131,16 @@ async function post(url, body, headers, options) {
});
}
function session(headers, options) {
return bhttp.session({
headers,
options,
});
}
module.exports = {
get,
post,
head,
session,
};

View File

@ -435,6 +435,8 @@ module.exports = {
formatDate,
get,
getAll,
fetch: get,
fetchAll: getAll,
context: init,
contextAll: initAll,
ed: extractDate,