Added the awkward pagination to PornCZ scraper.

This commit is contained in:
DebaucheryLibrarian 2020-07-23 00:55:55 +02:00
parent 46c6c4dd21
commit 9ef5ea8fb6
5 changed files with 29 additions and 12 deletions

View File

@ -253,11 +253,11 @@ export default {
} }
.filters-filter:not(:last-child) .filter { .filters-filter:not(:last-child) .filter {
padding: 1rem .5rem; padding: .5rem;
} }
.filters-filter:last-child .filter { .filters-filter:last-child .filter {
padding: 1rem 0 1rem .5rem; padding: .5rem 0 .5rem .5rem;
} }
} }
</style> </style>

View File

@ -170,6 +170,7 @@ module.exports = {
'silverstonedvd', 'silverstonedvd',
'silviasaint', 'silviasaint',
], ],
'porncz',
'gangbangcreampie', 'gangbangcreampie',
'gloryholesecrets', 'gloryholesecrets',
'aziani', 'aziani',

View File

@ -17,8 +17,6 @@ function scrapeAll(scenes, channel) {
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url }); release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
console.log(release);
return release; return release;
}); });
} }
@ -65,15 +63,23 @@ function scrapeProfile({ query }, entity) {
return profile; return profile;
} }
async function fetchLatest(channel, _page = 1) { async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/en/new-videos?option=date&do=sort`; const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
// TODO: session // pagination state is kept in session, and new each 'page' includes all previous pages
await http.head(url); const session = http.session();
const res = await qu.getAll(url, '.product-item'); const headers = { 'X-Requested-With': 'XMLHttpRequest' };
for (let i = 0; i < page - 1; i += 1) {
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
}
const res = await http.get(url, headers, { useSession: session });
if (res.ok) { if (res.ok) {
return scrapeAll(res.items, channel); const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
return scrapeAll(items.slice((page - 1) * 16), channel);
} }
return res.status; return res.status;

View File

@ -65,8 +65,8 @@ async function handler({
} }
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase()) const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
? await bhttp[method.toLowerCase()](url, body, reqOptions) ? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
: await bhttp[method.toLowerCase()](url, reqOptions); : await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
if (options?.stream && options?.destination) { if (options?.stream && options?.destination) {
await pipeline(res, ...(options?.transforms || []), options?.destination); await pipeline(res, ...(options?.transforms || []), options?.destination);
@ -131,8 +131,16 @@ async function post(url, body, headers, options) {
}); });
} }
function session(headers, options) {
return bhttp.session({
headers,
options,
});
}
module.exports = { module.exports = {
get, get,
post, post,
head, head,
session,
}; };

View File

@ -435,6 +435,8 @@ module.exports = {
formatDate, formatDate,
get, get,
getAll, getAll,
fetch: get,
fetchAll: getAll,
context: init, context: init,
contextAll: initAll, contextAll: initAll,
ed: extractDate, ed: extractDate,