Added the awkward pagination to PornCZ scraper.
This commit is contained in:
@@ -17,8 +17,6 @@ function scrapeAll(scenes, channel) {
|
||||
|
||||
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
@@ -65,15 +63,23 @@ function scrapeProfile({ query }, entity) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, _page = 1) {
|
||||
const url = `${channel.url}/en/new-videos?option=date&do=sort`;
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
|
||||
|
||||
// TODO: session
|
||||
await http.head(url);
|
||||
const res = await qu.getAll(url, '.product-item');
|
||||
// pagination state is kept in session, and new each 'page' includes all previous pages
|
||||
const session = http.session();
|
||||
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
||||
|
||||
for (let i = 0; i < page - 1; i += 1) {
|
||||
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
|
||||
}
|
||||
|
||||
const res = await http.get(url, headers, { useSession: session });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
||||
|
||||
return scrapeAll(items.slice((page - 1) * 16), channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
Reference in New Issue
Block a user