Added the awkward pagination to PornCZ scraper.
This commit is contained in:
parent
46c6c4dd21
commit
9ef5ea8fb6
|
@ -253,11 +253,11 @@ export default {
|
||||||
}
|
}
|
||||||
|
|
||||||
.filters-filter:not(:last-child) .filter {
|
.filters-filter:not(:last-child) .filter {
|
||||||
padding: 1rem .5rem;
|
padding: .5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.filters-filter:last-child .filter {
|
.filters-filter:last-child .filter {
|
||||||
padding: 1rem 0 1rem .5rem;
|
padding: .5rem 0 .5rem .5rem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
|
@ -170,6 +170,7 @@ module.exports = {
|
||||||
'silverstonedvd',
|
'silverstonedvd',
|
||||||
'silviasaint',
|
'silviasaint',
|
||||||
],
|
],
|
||||||
|
'porncz',
|
||||||
'gangbangcreampie',
|
'gangbangcreampie',
|
||||||
'gloryholesecrets',
|
'gloryholesecrets',
|
||||||
'aziani',
|
'aziani',
|
||||||
|
|
|
@ -17,8 +17,6 @@ function scrapeAll(scenes, channel) {
|
||||||
|
|
||||||
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
|
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
|
||||||
|
|
||||||
console.log(release);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -65,15 +63,23 @@ function scrapeProfile({ query }, entity) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, _page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const url = `${channel.url}/en/new-videos?option=date&do=sort`;
|
const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
|
||||||
|
|
||||||
// TODO: session
|
// pagination state is kept in session, and new each 'page' includes all previous pages
|
||||||
await http.head(url);
|
const session = http.session();
|
||||||
const res = await qu.getAll(url, '.product-item');
|
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
||||||
|
|
||||||
|
for (let i = 0; i < page - 1; i += 1) {
|
||||||
|
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await http.get(url, headers, { useSession: session });
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeAll(res.items, channel);
|
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
||||||
|
|
||||||
|
return scrapeAll(items.slice((page - 1) * 16), channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
|
|
@ -65,8 +65,8 @@ async function handler({
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||||
? await bhttp[method.toLowerCase()](url, body, reqOptions)
|
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||||
: await bhttp[method.toLowerCase()](url, reqOptions);
|
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||||
|
|
||||||
if (options?.stream && options?.destination) {
|
if (options?.stream && options?.destination) {
|
||||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||||
|
@ -131,8 +131,16 @@ async function post(url, body, headers, options) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function session(headers, options) {
|
||||||
|
return bhttp.session({
|
||||||
|
headers,
|
||||||
|
options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
get,
|
get,
|
||||||
post,
|
post,
|
||||||
head,
|
head,
|
||||||
|
session,
|
||||||
};
|
};
|
||||||
|
|
|
@ -435,6 +435,8 @@ module.exports = {
|
||||||
formatDate,
|
formatDate,
|
||||||
get,
|
get,
|
||||||
getAll,
|
getAll,
|
||||||
|
fetch: get,
|
||||||
|
fetchAll: getAll,
|
||||||
context: init,
|
context: init,
|
||||||
contextAll: initAll,
|
contextAll: initAll,
|
||||||
ed: extractDate,
|
ed: extractDate,
|
||||||
|
|
Loading…
Reference in New Issue