Fixed Kink scraper.
This commit is contained in:
parent
b6a402d964
commit
ab46e8558d
|
@ -88,7 +88,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.10.11",
|
"unprint": "^0.10.12",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
@ -18293,9 +18293,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/unprint": {
|
"node_modules/unprint": {
|
||||||
"version": "0.10.11",
|
"version": "0.10.12",
|
||||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.11.tgz",
|
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.12.tgz",
|
||||||
"integrity": "sha512-+OL+8BFF9SYvayp57l8ifq77I6ok2ilPCidBVka7VbMALJgqHxkHqrqkCupw2RKX2tNfPT/TGa+NJsYGboFnRQ==",
|
"integrity": "sha512-EbRGhkoOcmnMmQBaKZA6Tky6gpEwrhy4tDB1KeajSGhqli7zhlNe3WqsTQPtLBNKa/4M2PJZS8l0GOOjvTLndQ==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^0.27.2",
|
"axios": "^0.27.2",
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
|
|
|
@ -147,7 +147,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.10.11",
|
"unprint": "^0.10.12",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
|
@ -4,40 +4,49 @@ const unprint = require('unprint');
|
||||||
|
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const { stripQuery } = require('../utils/url');
|
||||||
|
|
||||||
function scrapeAll(scenes, entity) {
|
function scrapeAll(scenes, entity) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
||||||
|
|
||||||
const href = query.url('.shoot-link');
|
const href = query.url('a[href*="/shoot"]');
|
||||||
|
|
||||||
release.url = `${networkUrl}${href}`;
|
release.url = `${networkUrl}${href}`;
|
||||||
|
|
||||||
release.shootId = href.split('/').slice(-1)[0];
|
release.shootId = href.split('/').slice(-1)[0];
|
||||||
release.entryId = release.shootId;
|
release.entryId = release.shootId;
|
||||||
|
|
||||||
release.title = query.content('.shoot-thumb-title a', true);
|
release.title = query.content('.card-body a[href*="/shoot"]').trim();
|
||||||
release.date = query.date('.date', 'MMM DD, YYYY');
|
release.date = query.date('small > span', 'MMM D, YYYY');
|
||||||
|
|
||||||
release.actors = query.all('.shoot-thumb-models a').map((actorEl) => ({
|
release.actors = query.all('a[href*="/model"]').map((actorEl) => ({
|
||||||
name: unprint.query.content(actorEl),
|
name: unprint.query.content(actorEl),
|
||||||
url: unprint.query.url(actorEl, null, { origin: networkUrl }),
|
url: unprint.query.url(actorEl, null, { origin: networkUrl }),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.rating = query.number('.thumb-ratings') / 10;
|
const poster = query.img('.ratio-thumbnail img');
|
||||||
|
|
||||||
release.poster = query.img('.adimage');
|
release.poster = [
|
||||||
release.photos = query.imgs('.rollover .roll-image', { attribute: 'data-imagesrc' }).map((photo) => [
|
stripQuery(poster).replace('_thumb', '_full'),
|
||||||
photo
|
stripQuery(poster),
|
||||||
.replace('410/', '830/')
|
poster,
|
||||||
.replace('_thumb', '_full'),
|
];
|
||||||
photo,
|
|
||||||
]);
|
try {
|
||||||
|
release.photos = JSON.parse(query.attribute('.ratio-thumbnail img', 'data-cycle')).map((src) => [
|
||||||
|
stripQuery(src).replace('_thumb', '_full'),
|
||||||
|
stripQuery(src),
|
||||||
|
src,
|
||||||
|
]);
|
||||||
|
} catch (error) {
|
||||||
|
// no photos
|
||||||
|
}
|
||||||
|
|
||||||
release.trailer = `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`;
|
release.trailer = `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`;
|
||||||
|
|
||||||
release.duration = query.dur('.video span');
|
release.rating = query.number('.thumb-up') / 10;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
|
@ -45,65 +54,63 @@ function scrapeAll(scenes, entity) {
|
||||||
|
|
||||||
function scrapeScene({ query }, url, entity) {
|
function scrapeScene({ query }, url, entity) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
const data = query.json('div[data-setup]', { attribute: 'data-setup' });
|
||||||
|
|
||||||
release.shootId = new URL(url).pathname.split('/')[2];
|
release.shootId = data?.id || new URL(url).pathname.split('/')[2];
|
||||||
release.entryId = release.shootId;
|
release.entryId = data?.id || release.shootId;
|
||||||
|
|
||||||
release.title = query.attribute('.shoot-title .favorite-button', 'data-title') || query.content('.shoot-title');
|
release.title = data?.title || query.attribute('#shootPage #favoriteShootButton', 'data-title') || query.content('#shootPage h1');
|
||||||
release.description = query.content('.description-text');
|
release.description = query.content('//h4[contains(text(), \'Description\')]/following-sibling::span/p');
|
||||||
|
|
||||||
release.date = query.date('.shoot-date', 'MMMM DD, YYYY');
|
release.date = query.date('.shoot-detail-legend', 'MM/DD/YY');
|
||||||
|
release.duration = data?.duration
|
||||||
|
? data.duration / 1000
|
||||||
|
: query.duration('#shootPage .clock');
|
||||||
|
|
||||||
release.actors = query.elements('.names a').map((actorEl) => ({
|
release.actors = query.elements('#shootPage h1 + span a[href*="/model"]').map((actorEl) => ({
|
||||||
name: unprint.query.content(actorEl).replace(/,\s*/, ''),
|
name: unprint.query.content(actorEl).replace(/,\s*/, ''),
|
||||||
url: unprint.query.url(actorEl, null, { origin: entity.type === 'channel' ? entity.parent.url : entity.url }),
|
url: unprint.query.url(actorEl, null, { origin: entity.type === 'channel' ? entity.parent.url : entity.url }),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.director = query.content('.director-name');
|
release.director = query.content('.director-name')?.trim();
|
||||||
|
|
||||||
release.photos = query.imgs('.gallery .thumb img, #gallerySlider .gallery-img', { attribute: 'data-image-file' });
|
const poster = data?.posterUrl || query.poster();
|
||||||
release.poster = query.poster();
|
|
||||||
release.trailer = query.dataset('.player span[data-type="trailer-src"]', 'url') || `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`;
|
|
||||||
|
|
||||||
release.tags = query.contents('.tag-list a[href*="/tag"]').map((tag) => tag.replace(/,\s*/, ''));
|
release.poster = [
|
||||||
|
stripQuery(poster),
|
||||||
|
poster,
|
||||||
|
];
|
||||||
|
|
||||||
release.channel = slugify(query.url('.shoot-logo a')?.split('/').slice(-1)[0], '');
|
release.photos = query.json('#galleryImagesContainer', { attribute: 'data-images' })?.map((src) => [
|
||||||
|
src.fullPath,
|
||||||
|
src.thumbFullPath,
|
||||||
|
]);
|
||||||
|
|
||||||
|
release.trailer = [
|
||||||
|
...(data?.trailer?.sources?.map((source) => ({
|
||||||
|
src: source.url,
|
||||||
|
quality: source.resolution,
|
||||||
|
})) || []),
|
||||||
|
`https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`,
|
||||||
|
];
|
||||||
|
|
||||||
|
release.tags = query.contents('#shootPage a[href*="/tag"]').map((tag) => tag.replace(/,\s*/, ''));
|
||||||
|
release.channel = data?.channelName?.name || slugify(query.url('.shoot-detail-legend a[href*="/channel"]')?.split('/').slice(-1)[0], '');
|
||||||
|
|
||||||
|
release.qualities = data?.resolutions
|
||||||
|
? Object.entries(data.resolutions).filter(([, enabled]) => enabled).map(([res]) => parseInt(res, 10))
|
||||||
|
: null;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchActorReleases(actorId, entity, page = 1, accReleases = []) {
|
async function scrapeProfile({ query }, actorUrl) {
|
||||||
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
|
||||||
const { tab } = await http.getBrowserSession('kink');
|
|
||||||
const res = await tab.goto(`${networkUrl}/search?type=shoots&performerIds=${actorId}&sort=published&page=${page}`);
|
|
||||||
|
|
||||||
if (res.status() === 200) {
|
|
||||||
const html = await tab.content();
|
|
||||||
const item = unprint.init(html);
|
|
||||||
const releases = scrapeAll(unprint.initAll(html, '.results .shoot-card'), entity);
|
|
||||||
const hasNextPage = item.query.exists('.paginated-nav li:last-child:not(.disabled)');
|
|
||||||
|
|
||||||
await tab.close();
|
|
||||||
|
|
||||||
if (hasNextPage) {
|
|
||||||
return fetchActorReleases(actorId, entity, page + 1, accReleases.concat(releases));
|
|
||||||
}
|
|
||||||
|
|
||||||
return accReleases.concat(releases);
|
|
||||||
}
|
|
||||||
|
|
||||||
await tab.close();
|
|
||||||
|
|
||||||
return accReleases;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeProfile({ query }, actorUrl, entity, include) {
|
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
profile.entryId = actorUrl.match(/\/model\/(\d+)\//)?.[1] || query.attribute('.favorite-button.bio-favorite', 'data-id');
|
profile.entryId = actorUrl.match(/\/model\/(\d+)\//)?.[1] || query.attribute('h1 + button[data-id]', 'data-id');
|
||||||
profile.description = query.content('.bio-outer #expand-text');
|
profile.description = query.content('.content-container #expand-text')?.trim();
|
||||||
|
|
||||||
const tags = query.contents('.bio-tags a').map((tag) => tag.toLowerCase());
|
const tags = query.contents('.content-container a[href*="/tag"]').map((tag) => tag.toLowerCase().trim());
|
||||||
|
|
||||||
if (tags.includes('brunette') || tags.includes('brunet')) profile.hairColor = 'brown';
|
if (tags.includes('brunette') || tags.includes('brunet')) profile.hairColor = 'brown';
|
||||||
if (tags.includes('blonde') || tags.includes('blond')) profile.hairColor = 'blonde';
|
if (tags.includes('blonde') || tags.includes('blond')) profile.hairColor = 'blonde';
|
||||||
|
@ -125,24 +132,21 @@ async function scrapeProfile({ query }, actorUrl, entity, include) {
|
||||||
if ((tags.includes('big dick') || tags.includes('foreskin'))
|
if ((tags.includes('big dick') || tags.includes('foreskin'))
|
||||||
&& (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual';
|
&& (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual';
|
||||||
|
|
||||||
profile.avatar = query.img('.bio-slider-img, .bio-img:not([src*="Missing"])');
|
[profile.avatar, ...profile.photos] = query.imgs('.kink-slider-img:not([data-src*="Missing"])', { attribute: 'data-src' });
|
||||||
profile.social = query.urls('a.social-link');
|
profile.social = query.urls('.content-container a[href*="twitter.com"], .content-container a[href*="x.com"]');
|
||||||
|
|
||||||
if (include.releases && profile.entryId) {
|
|
||||||
profile.releases = await fetchActorReleases(profile.entryId, entity);
|
|
||||||
}
|
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const { tab } = await http.getBrowserSession('kink');
|
const { tab } = await http.getBrowserSession('kink');
|
||||||
const res = await tab.goto(`${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`);
|
const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`;
|
||||||
|
const res = await tab.goto(url);
|
||||||
const status = res.status();
|
const status = res.status();
|
||||||
|
|
||||||
if (status === 200) {
|
if (status === 200) {
|
||||||
const html = await tab.content();
|
const html = await tab.content();
|
||||||
const items = unprint.initAll(html, '.results .shoot-card');
|
const items = unprint.initAll(html, '.container .card');
|
||||||
|
|
||||||
const scenes = scrapeAll(items, channel);
|
const scenes = scrapeAll(items, channel);
|
||||||
|
|
||||||
|
@ -178,7 +182,7 @@ async function fetchScene(url, channel) {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, entity, options) {
|
async function fetchProfile({ name: actorName }, entity) {
|
||||||
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url;
|
||||||
const { tab } = await http.getBrowserSession('kink');
|
const { tab } = await http.getBrowserSession('kink');
|
||||||
|
|
||||||
|
@ -188,12 +192,13 @@ async function fetchProfile({ name: actorName }, entity, options) {
|
||||||
if (searchStatus === 200) {
|
if (searchStatus === 200) {
|
||||||
const searchHtml = await tab.content();
|
const searchHtml = await tab.content();
|
||||||
|
|
||||||
const searchResItems = unprint.initAll(searchHtml, '.model');
|
const searchResItems = unprint.initAll(searchHtml, '.ratio-model');
|
||||||
const actorItem = searchResItems.find((item) => item.query.exists(`.model-link img[alt="${actorName}"]`));
|
const actorItem = searchResItems.find((item) => item.query.exists(`//span[contains(text(), '${actorName}')]`));
|
||||||
|
|
||||||
if (actorItem) {
|
if (actorItem) {
|
||||||
const actorPath = actorItem.query.url('.model-link');
|
const actorPath = actorItem.query.url(null);
|
||||||
const actorUrl = `${networkUrl}${actorPath}`;
|
const actorUrl = `${networkUrl}${actorPath}`;
|
||||||
|
|
||||||
const actorRes = await tab.goto(actorUrl);
|
const actorRes = await tab.goto(actorUrl);
|
||||||
const actorStatus = actorRes.status();
|
const actorStatus = actorRes.status();
|
||||||
|
|
||||||
|
@ -203,7 +208,7 @@ async function fetchProfile({ name: actorName }, entity, options) {
|
||||||
|
|
||||||
await tab.close();
|
await tab.close();
|
||||||
|
|
||||||
return scrapeProfile(item, actorUrl, entity, options);
|
return scrapeProfile(item, actorUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
await tab.close();
|
await tab.close();
|
||||||
|
|
Loading…
Reference in New Issue