Refactored Kink scraper to use unprint browser. Improved socials handling in actors module.
This commit is contained in:
@@ -2,7 +2,6 @@
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { stripQuery } = require('../utils/url');
|
||||
|
||||
@@ -40,23 +39,24 @@ function scrapeAll(scenes, entity) {
|
||||
}));
|
||||
|
||||
try {
|
||||
release.photos = JSON.parse(query.attribute('.ratio-thumbnail img', 'data-cycle')).map((src) => [
|
||||
stripQuery(src).replace('_thumb', '_full'),
|
||||
stripQuery(src),
|
||||
src,
|
||||
].filter(Boolean).map((source) => ({
|
||||
src: source,
|
||||
expectType: {
|
||||
PNG: 'image/png',
|
||||
},
|
||||
})));
|
||||
release.photos = JSON.parse(query.attribute('.ratio-thumbnail img', 'data-cycle'))
|
||||
.map((src) => Array.from(new Set([
|
||||
stripQuery(src).replace('_thumb', '_full'),
|
||||
stripQuery(src),
|
||||
src,
|
||||
])).filter(Boolean).map((source) => ({
|
||||
src: source,
|
||||
expectType: {
|
||||
PNG: 'image/png',
|
||||
},
|
||||
})));
|
||||
} catch (error) {
|
||||
// no photos
|
||||
}
|
||||
|
||||
release.trailer = `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`;
|
||||
|
||||
release.channel = slugify(query.content('.shoot-detail-legend a[href*="/channel"]'), '');
|
||||
release.channel = slugify(query.content('.shoot-thumbnail-footer a[href*="/channel"]'), '');
|
||||
release.rating = query.number('.thumb-up') / 10;
|
||||
|
||||
return release;
|
||||
@@ -64,25 +64,21 @@ function scrapeAll(scenes, entity) {
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true });
|
||||
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
|
||||
const res = await tab.goto(url);
|
||||
const status = res.status();
|
||||
|
||||
if (status === 200) {
|
||||
const html = await tab.content();
|
||||
const items = unprint.initAll(html, '.container .card');
|
||||
const res = await unprint.browserRequest(url, {
|
||||
selectAll: '.container .card',
|
||||
});
|
||||
|
||||
const scenes = scrapeAll(items, channel);
|
||||
if (res.status === 200) {
|
||||
// const items = unprint.initAll(html, '.container .card');
|
||||
|
||||
await tab.close();
|
||||
const scenes = scrapeAll(res.context, channel);
|
||||
|
||||
return scenes;
|
||||
}
|
||||
|
||||
await tab.close();
|
||||
|
||||
return status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, entity) {
|
||||
@@ -149,29 +145,19 @@ function scrapeScene({ query }, url, entity) {
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true });
|
||||
const res = await tab.goto(url);
|
||||
const res = await unprint.browserRequest(url);
|
||||
|
||||
const status = res.status();
|
||||
|
||||
if (status === 200) {
|
||||
const html = await tab.content();
|
||||
const item = unprint.init(html);
|
||||
|
||||
const scene = scrapeScene(item, url, channel);
|
||||
|
||||
await tab.close();
|
||||
if (res.status === 200) {
|
||||
const scene = scrapeScene(res.context, url, channel);
|
||||
|
||||
return scene;
|
||||
}
|
||||
|
||||
await tab.close();
|
||||
|
||||
return status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }, actorUrl) {
|
||||
const profile = {};
|
||||
const profile = { url: actorUrl };
|
||||
|
||||
profile.entryId = actorUrl.match(/\/model\/(\d+)\//)?.[1] || query.attribute('h1 + button[data-id]', 'data-id');
|
||||
profile.description = query.content('.content-container #expand-text')?.trim();
|
||||
@@ -204,42 +190,43 @@ async function scrapeProfile({ query }, actorUrl) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity) {
|
||||
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
||||
const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true });
|
||||
async function getActorUrl({ name: actorName, url }, networkUrl) {
|
||||
if (url) {
|
||||
return url;
|
||||
}
|
||||
|
||||
// const searchRes = await tab.goto(`${networkUrl}/search?type=performers&q=${actorName}`);
|
||||
const searchApiRes = await tab.goto(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`);
|
||||
const searchStatus = searchApiRes.status();
|
||||
const searchApiRes = await unprint.browserRequest(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`);
|
||||
|
||||
if (searchStatus === 200) {
|
||||
const searchHtml = await tab.content();
|
||||
const data = unprint.init(searchHtml).query.json('body pre');
|
||||
if (searchApiRes.status === 200) {
|
||||
const data = searchApiRes.context.query.json('body pre');
|
||||
const actorId = data.find((actor) => actor.label === actorName)?.id;
|
||||
|
||||
if (actorId) {
|
||||
const actorUrl = `${networkUrl}/model/${actorId}/${slugify(actorName)}`;
|
||||
const actorRes = await tab.goto(actorUrl);
|
||||
const actorStatus = actorRes.status();
|
||||
|
||||
if (actorStatus === 200) {
|
||||
const actorHtml = await tab.content();
|
||||
const item = unprint.init(actorHtml);
|
||||
|
||||
await tab.close();
|
||||
|
||||
return scrapeProfile(item, actorUrl);
|
||||
}
|
||||
|
||||
await tab.close();
|
||||
|
||||
return actorRes.status;
|
||||
return actorUrl;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return searchStatus;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, entity) {
|
||||
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
||||
const actorUrl = await getActorUrl(actor, networkUrl);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await unprint.browserRequest(actorUrl);
|
||||
|
||||
if (actorRes.status === 200) {
|
||||
return scrapeProfile(actorRes.context, actorUrl);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user