Updated Karups scraper.
This commit is contained in:
parent
b4877d16da
commit
0511b5a4a4
|
|
@ -6,7 +6,7 @@
|
||||||
},
|
},
|
||||||
"parserOptions": {
|
"parserOptions": {
|
||||||
"parser": "@babel/eslint-parser",
|
"parser": "@babel/eslint-parser",
|
||||||
"ecmaVersion": "latest",
|
"ecmaVersion": 2024,
|
||||||
"sourceType": "script"
|
"sourceType": "script"
|
||||||
},
|
},
|
||||||
"rules": {
|
"rules": {
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.18.13",
|
"unprint": "^0.18.14",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
@ -20380,9 +20380,10 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/unprint": {
|
"node_modules/unprint": {
|
||||||
"version": "0.18.13",
|
"version": "0.18.14",
|
||||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.13.tgz",
|
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.18.14.tgz",
|
||||||
"integrity": "sha512-vjUF7X7/dg2Os/zesJ0+23eVc7NH2oKzspPSyBzcIx6IuEcVm1rdlD9dAxdaRMUNBWEeA5ekyk263CBI3lyaBQ==",
|
"integrity": "sha512-6sHW3/2W2hNTuE/EcxM8CJ7ZX+JWFmWS0G7OuCYz9CAYX2bb6pAQ9Eaz0FvqCqqk1GjaxHEjWBhQjoACfIuiCA==",
|
||||||
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
"cookie": "^1.1.1",
|
"cookie": "^1.1.1",
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@
|
||||||
"tunnel": "0.0.6",
|
"tunnel": "0.0.6",
|
||||||
"ua-parser-js": "^1.0.37",
|
"ua-parser-js": "^1.0.37",
|
||||||
"undici": "^5.28.1",
|
"undici": "^5.28.1",
|
||||||
"unprint": "^0.18.13",
|
"unprint": "^0.18.14",
|
||||||
"url-pattern": "^1.0.3",
|
"url-pattern": "^1.0.3",
|
||||||
"v-tooltip": "^2.1.3",
|
"v-tooltip": "^2.1.3",
|
||||||
"video.js": "^8.6.1",
|
"video.js": "^8.6.1",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
const channelSlugs = {
|
const channelSlugs = {
|
||||||
|
|
@ -16,34 +17,66 @@ function scrapeAll(scenes) {
|
||||||
release.url = query.url('a');
|
release.url = query.url('a');
|
||||||
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
|
release.entryId = new URL(release.url).pathname.match(/(\d+)\.html/)?.[1];
|
||||||
|
|
||||||
release.title = query.cnt('.title');
|
release.title = query.content('.title');
|
||||||
release.date = query.date('.date', 'MMM Do, YYYY');
|
release.date = query.date('.date', 'MMM Do, YYYY');
|
||||||
|
|
||||||
release.channel = channelSlugs[query.cnt('.site')];
|
release.channel = channelSlugs[query.content('.site')];
|
||||||
|
|
||||||
release.poster = query.img('.thumb img');
|
const poster = query.img('.thumb img');
|
||||||
|
|
||||||
|
if (poster) {
|
||||||
|
release.poster = Array.from(new Set([
|
||||||
|
poster.replace('.jpg', '-feat_lg.jpg'),
|
||||||
|
poster,
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, url) {
|
async function fetchLatest(channel, page) {
|
||||||
|
const res = await unprint.get(`${channel.url}/videos/page${page}.html`, {
|
||||||
|
selectAll: '.listing-videos .item',
|
||||||
|
cookies: {
|
||||||
|
warningHidden: 'hide',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeAll(res.context, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene({ query }, { url }) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
|
release.entryId = new URL(url).pathname.match(/(\d+)\.html/)?.[1];
|
||||||
|
|
||||||
release.title = query.cnt('.title');
|
release.title = query.content('.title');
|
||||||
|
release.description = query.content('.content-information-description p');
|
||||||
|
|
||||||
release.date = query.date('.date .content', 'MMM Do, YYYY');
|
release.date = query.date('.date .content', 'MMM Do, YYYY');
|
||||||
|
|
||||||
release.actors = query.all('.models .content a').map((modelEl) => ({
|
release.actors = query.all('.models .content a').map((modelEl) => ({
|
||||||
name: query.cnt(modelEl),
|
name: unprint.query.content(modelEl),
|
||||||
url: query.url(modelEl, null),
|
url: unprint.query.url(modelEl, null),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.poster = query.poster();
|
// videos and photos seem to be removed, query educated guess just in case
|
||||||
release.photos = query.imgs('.video-thumbs img').slice(1);
|
const poster = query.poster('.video-player video') || query.img('.video-poster img');
|
||||||
|
|
||||||
release.trailer = query.video();
|
if (poster) {
|
||||||
|
release.poster = Array.from(new Set([
|
||||||
|
poster,
|
||||||
|
poster.replace('-feat_lg', ''),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
release.photos = query.imgs('.video-thumbs img').slice(1);
|
||||||
|
release.trailer = query.video('.video-player source');
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
@ -54,38 +87,47 @@ function scrapeProfile({ query }, entity) {
|
||||||
profile.gender = 'female';
|
profile.gender = 'female';
|
||||||
|
|
||||||
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
|
profile.avatar = query.img('.model-thumb img[src*=".jpg"]');
|
||||||
profile.scenes = scrapeAll(qu.initAll(query.all('.listing-videos .item')), entity);
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.listing-videos .item')), entity);
|
||||||
|
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page) {
|
async function getActorUrl(actor) {
|
||||||
const res = await qu.getAll(`${channel.url}/videos/page${page}.html`, '.listing-videos .item');
|
if (actor.url) {
|
||||||
|
return actor.url;
|
||||||
if (res.ok) {
|
|
||||||
return scrapeAll(res.items, channel);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
const res = await unprint.get(`https://www.karups.com/models/search/${actor.slug}/`, {
|
||||||
|
selectAll: '.listing-models .item',
|
||||||
|
cookies: {
|
||||||
|
warningHidden: 'hide',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorUrl = res.context.find((item) => slugify(item.query.content('.title')) === actor.slug)?.query.url('a');
|
||||||
|
|
||||||
|
return actorUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile(baseActor, entity) {
|
async function fetchProfile(actor, entity) {
|
||||||
const searchRes = await qu.getAll(`https://www.karups.com/models/search/${baseActor.slug}/`, '.listing-models .item');
|
const actorUrl = await getActorUrl(actor);
|
||||||
|
|
||||||
if (!searchRes.ok) {
|
|
||||||
return searchRes.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
const actorUrl = searchRes.items.find((item) => slugify(item.query.cnt('.title')) === baseActor.slug)?.query.url('a');
|
|
||||||
|
|
||||||
if (!actorUrl) {
|
if (!actorUrl) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const actorRes = await qu.get(actorUrl);
|
const actorRes = await unprint.get(actorUrl, {
|
||||||
|
cookies: {
|
||||||
|
warningHidden: 'hide',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
if (actorRes.ok) {
|
if (actorRes.ok) {
|
||||||
return scrapeProfile(actorRes.item, entity);
|
return scrapeProfile(actorRes.context, entity);
|
||||||
}
|
}
|
||||||
|
|
||||||
return actorRes.status;
|
return actorRes.status;
|
||||||
|
|
@ -94,6 +136,10 @@ async function fetchProfile(baseActor, entity) {
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
scrapeScene,
|
scrapeScene: {
|
||||||
deprecated: true,
|
scraper: scrapeScene,
|
||||||
|
cookies: {
|
||||||
|
warningHidden: 'hide',
|
||||||
|
},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
"extends": "airbnb-base",
|
"extends": "airbnb-base",
|
||||||
"parserOptions": {
|
"parserOptions": {
|
||||||
"parser": "@babel/eslint-parser",
|
"parser": "@babel/eslint-parser",
|
||||||
|
"ecmaVersion": "latest",
|
||||||
"sourceType": "script"
|
"sourceType": "script"
|
||||||
},
|
},
|
||||||
"rules": {
|
"rules": {
|
||||||
|
|
|
||||||
|
|
@ -227,6 +227,7 @@ const actors = [
|
||||||
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
|
||||||
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
|
||||||
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
|
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
|
||||||
|
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar', 'gender'] },
|
||||||
];
|
];
|
||||||
|
|
||||||
const actorScrapers = scrapers.actors;
|
const actorScrapers = scrapers.actors;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue