Updated Puppeteer, minor refactor Kink scraper.

This commit is contained in:
DebaucheryLibrarian
2025-03-12 01:45:58 +01:00
parent 4496f44fb3
commit e20999d438
3 changed files with 373 additions and 323 deletions

View File

@@ -56,12 +56,35 @@ function scrapeAll(scenes, entity) {
release.trailer = `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`;
release.channel = slugify(query.content('.shoot-detail-legend a[href*="/channel"]'), '');
release.rating = query.number('.thumb-up') / 10;
return release;
});
}
async function fetchLatest(channel, page = 1) {
const { tab } = await http.getBrowserSession('kink');
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const items = unprint.initAll(html, '.container .card');
const scenes = scrapeAll(items, channel);
await tab.close();
return scenes;
}
await tab.close();
return status;
}
function scrapeScene({ query }, url, entity) {
const release = { url };
const data = query.json('div[data-setup]', { attribute: 'data-setup' });
@@ -70,9 +93,10 @@ function scrapeScene({ query }, url, entity) {
release.entryId = data?.id || release.shootId;
release.title = data?.title || query.attribute('#shootPage #favoriteShootButton', 'data-title') || query.content('#shootPage h1');
release.description = query.content('//h4[contains(text(), \'Description\')]/following-sibling::span/p');
release.description = query.content('//*[contains(text(), \'Description\')]/following-sibling::span/p');
release.date = query.date('.shoot-detail-legend', 'MMM D, YYYY');
release.date = query.date('.shoot-detail-legend', 'MM/DD/YY');
release.duration = data?.duration
? data.duration / 1000
: query.duration('#shootPage .clock');
@@ -114,7 +138,7 @@ function scrapeScene({ query }, url, entity) {
`https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`,
];
release.tags = query.contents('#shootPage a[href*="/tag"]').map((tag) => tag.replace(/,\s*/, ''));
release.tags = query.contents('#shootPage a[href*="/tag"]').map((tag) => tag.trim());
release.channel = data?.channelName?.name || slugify(query.url('.shoot-detail-legend a[href*="/channel"]')?.split('/').slice(-1)[0], '');
release.qualities = data?.resolutions
@@ -124,6 +148,28 @@ function scrapeScene({ query }, url, entity) {
return release;
}
async function fetchScene(url, channel) {
const { tab } = await http.getBrowserSession('kink');
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html);
const scene = scrapeScene(item, url, channel);
await tab.close();
return scene;
}
await tab.close();
return status;
}
async function scrapeProfile({ query }, actorUrl) {
const profile = {};
@@ -152,73 +198,27 @@ async function scrapeProfile({ query }, actorUrl) {
if ((tags.includes('big dick') || tags.includes('foreskin'))
&& (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual';
[profile.avatar, ...profile.photos] = query.imgs('.kink-slider-img:not([data-src*="Missing"])', { attribute: 'data-src' });
[profile.avatar, ...profile.photos] = query.imgs('.kink-slider-images img:not([data-src*="missing"])', { attribute: 'data-src' });
profile.social = query.urls('.content-container a[href*="twitter.com"], .content-container a[href*="x.com"]');
return profile;
}
async function fetchLatest(channel, page = 1) {
const { tab } = await http.getBrowserSession('kink');
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const items = unprint.initAll(html, '.container .card');
const scenes = scrapeAll(items, channel);
await tab.close();
return scenes;
}
await tab.close();
return status;
}
async function fetchScene(url, channel) {
const { tab } = await http.getBrowserSession('kink');
const res = await tab.goto(url);
const status = res.status();
if (status === 200) {
const html = await tab.content();
const item = unprint.init(html);
const scene = scrapeScene(item, url, channel);
await tab.close();
return scene;
}
await tab.close();
return status;
}
async function fetchProfile({ name: actorName }, entity) {
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
const { tab } = await http.getBrowserSession('kink');
const searchRes = await tab.goto(`${networkUrl}/search?type=performers&q=${actorName}`);
const searchStatus = searchRes.status();
// const searchRes = await tab.goto(`${networkUrl}/search?type=performers&q=${actorName}`);
const searchApiRes = await tab.goto(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`);
const searchStatus = searchApiRes.status();
if (searchStatus === 200) {
const searchHtml = await tab.content();
const data = unprint.init(searchHtml).query.json('body pre');
const actorId = data.find((actor) => actor.label === actorName)?.id;
const searchResItems = unprint.initAll(searchHtml, '.ratio-model');
const actorItem = searchResItems.find((item) => item.query.exists(`//span[contains(text(), '${actorName}')]`));
if (actorItem) {
const actorPath = actorItem.query.url(null);
const actorUrl = `${networkUrl}${actorPath}`;
if (actorId) {
const actorUrl = `${networkUrl}/model/${actorId}/${slugify(actorName)}`;
const actorRes = await tab.goto(actorUrl);
const actorStatus = actorRes.status();
@@ -239,7 +239,7 @@ async function fetchProfile({ name: actorName }, entity) {
return null;
}
return searchRes.status;
return searchStatus;
}
module.exports = {