Added Kink profile scraper. Fixed --force causing media collisions.

This commit is contained in:
ThePendulum 2020-07-13 00:12:01 +02:00
parent a7707b7b28
commit 6fd2bc2687
6 changed files with 119 additions and 29 deletions

View File

@ -123,6 +123,7 @@ module.exports = {
'julesjordan', 'julesjordan',
'bang', 'bang',
'pervcity', 'pervcity',
'kink',
'peternorth', 'peternorth',
'naughtyamerica', 'naughtyamerica',
'cherrypimps', 'cherrypimps',

View File

@ -64,9 +64,10 @@ const ethnicities = {
black: 'black', black: 'black',
caucasian: 'white', caucasian: 'white',
european: 'white', european: 'white',
hispanic: 'latina', hispanic: 'latin',
indian: 'indian', indian: 'indian',
japanese: 'japanese', japanese: 'japanese',
latin: 'latin',
latina: 'latina', latina: 'latina',
latino: 'latino', latino: 'latino',
white: 'white', white: 'white',

View File

@ -591,13 +591,20 @@ async function storeMedias(baseMedias) {
); );
const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias); const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias);
const newMedias = argv.force ? uniqueHashMedias.concat(existingHashMedias) : uniqueHashMedias;
const savedMedias = await Promise.map( const savedMedias = await Promise.map(
newMedias, uniqueHashMedias,
async baseMedia => storeFile(baseMedia), async baseMedia => storeFile(baseMedia),
); );
if (argv.force) {
// overwrite files in case image processing was changed
await Promise.map(
existingHashMedias,
async baseMedia => storeFile(baseMedia),
);
}
const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index)); const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index));
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry); const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);

View File

@ -1,54 +1,54 @@
'use strict'; 'use strict';
const { get, getAll } = require('../utils/qu'); const qu = require('../utils/qu');
function scrapeLatest(scenes) { function scrapeAll(scenes) {
return scenes.map(({ qu }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
const href = qu.url('.shoot-thumb-title a'); const href = query.url('.shoot-thumb-title a');
release.url = `https://kink.com${href}`; release.url = `https://kink.com${href}`;
release.shootId = href.split('/').slice(-1)[0]; release.shootId = href.split('/').slice(-1)[0];
release.entryId = release.shootId; release.entryId = release.shootId;
release.title = qu.q('.shoot-thumb-title a', true); release.title = query.q('.shoot-thumb-title a', true);
release.date = qu.date('.date', 'MMM DD, YYYY'); release.date = query.date('.date', 'MMM DD, YYYY');
release.actors = qu.all('.shoot-thumb-models a', true); release.actors = query.all('.shoot-thumb-models a', true);
release.stars = qu.q('.average-rating', 'data-rating') / 10; release.stars = query.q('.average-rating', 'data-rating') / 10;
release.poster = qu.img('.adimage'); release.poster = query.img('.adimage');
release.photos = qu.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [ release.photos = query.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [
photo.replace('410/', '830/'), photo.replace('410/', '830/'),
photo, photo,
]); ]);
release.duration = qu.dur('.video span'); release.duration = query.dur('.video span');
return release; return release;
}); });
} }
async function scrapeScene({ qu }, url) { async function scrapeScene({ query }, url) {
const release = { url }; const release = { url };
release.shootId = new URL(url).pathname.split('/')[2]; release.shootId = new URL(url).pathname.split('/')[2];
release.entryId = release.shootId; release.entryId = release.shootId;
release.title = qu.q('.shoot-title span.favorite-button', 'data-title'); release.title = query.q('.shoot-title span.favorite-button', 'data-title');
release.description = qu.q('.description-text', true); release.description = query.q('.description-text', true);
release.date = qu.date('.shoot-date', 'MMMM DD, YYYY'); release.date = query.date('.shoot-date', 'MMMM DD, YYYY');
release.actors = qu.all('.names a', true).map(actor => actor.replace(/,\s*/, '')); release.actors = query.all('.names a', true).map(actor => actor.replace(/,\s*/, ''));
release.director = qu.q('.director-name', true); release.director = query.q('.director-name', true);
release.photos = qu.imgs('.gallery .thumb img', 'data-image-file'); release.photos = query.imgs('.gallery .thumb img', 'data-image-file');
release.poster = qu.poster(); release.poster = query.poster();
release.tags = qu.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, '')); release.tags = query.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, ''));
const trailer = qu.q('.player span[data-type="trailer-src"]', 'data-url'); const trailer = query.q('.player span[data-type="trailer-src"]', 'data-url');
release.trailer = [ release.trailer = [
{ {
@ -69,23 +69,77 @@ async function scrapeScene({ qu }, url) {
}, },
]; ];
release.channel = qu.url('.shoot-logo a').split('/').slice(-1)[0]; release.channel = query.url('.shoot-logo a').split('/').slice(-1)[0];
return release; return release;
} }
async function fetchLatest(site, page = 1) { async function fetchActorReleases(actorUrl, page = 1, accReleases = []) {
const res = await getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot'); const res = await qu.get(`${actorUrl}?page=${page}`);
if (res.ok) { if (res.ok) {
return scrapeLatest(res.items, site); const releases = scrapeAll(qu.initAll(res.item.el, '.shoot-list .shoot'));
const hasNextPage = res.item.query.exists('.paginated-nav li:last-child:not(.disabled)');
if (hasNextPage) {
return fetchActorReleases(actorUrl, page + 1, accReleases.concat(releases));
}
return accReleases.concat(releases);
}
return accReleases;
}
async function scrapeProfile({ query }, actorUrl, include) {
const profile = {};
profile.description = query.q('.bio #expand-text', true);
const tags = query.all('.bio-tags a', true);
if (tags.includes('brunette') || tags.includes('brunet')) profile.hairColor = 'brown';
if (tags.includes('blonde') || tags.includes('blond')) profile.hairColor = 'blonde';
if (tags.includes('black hair')) profile.hairColor = 'black';
if (tags.includes('redhead')) profile.hairColor = 'red';
if (tags.includes('natural boobs')) profile.naturalBoobs = true;
if (tags.includes('fake boobs')) profile.naturalBoobs = false;
if (tags.includes('white')) profile.ethnicity = 'white';
if (tags.includes('latin')) profile.ethnicity = 'latin';
if (tags.includes('Black')) profile.ethnicity = 'black';
if (tags.includes('pierced nipples')) profile.hasPiercings = true;
if (tags.includes('tattoo')) profile.hasTattoos = true;
if (tags.includes('foreskin')) profile.hasForeskin = true;
if ((tags.includes('big dick') || tags.includes('foreskin'))
&& (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual';
profile.avatar = query.img('.bio-slider-img, .bio-img:not([src*="Missing"])');
profile.social = query.urls('a.social-link');
if (include.releases) {
profile.releases = await fetchActorReleases(actorUrl);
}
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await qu.getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot');
if (res.ok) {
return scrapeAll(res.items, site);
} }
return res.status; return res.status;
} }
async function fetchScene(url, site) { async function fetchScene(url, site) {
const res = await get(url); const res = await qu.get(url);
if (res.ok) { if (res.ok) {
return scrapeScene(res.item, url, site); return scrapeScene(res.item, url, site);
@ -94,7 +148,32 @@ async function fetchScene(url, site) {
return res.status; return res.status;
} }
async function fetchProfile(actorName, entity, include) {
const searchRes = await qu.getAll(`https://kink.com/search?type=performers&q=${actorName}`, '.model');
if (searchRes.ok) {
const actorItem = searchRes.items.find(() => qu.query.exists(`.model-link img[alt="${actorName}"]`));
if (actorItem) {
const actorPath = actorItem.query.url('.model-link');
const actorUrl = `https://kink.com${actorPath}`;
const actorRes = await qu.get(actorUrl);
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, include);
}
return actorRes.status;
}
return null;
}
return searchRes.status;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile,
}; };

View File

@ -180,6 +180,7 @@ module.exports = {
julesjordan, julesjordan,
kellymadison, kellymadison,
killergram, killergram,
kink,
legalporno, legalporno,
men, men,
metrohd, metrohd,

View File

@ -358,6 +358,7 @@ module.exports = {
ctxa: initAll, ctxa: initAll,
geta: getAll, geta: getAll,
qu: quFuncs, qu: quFuncs,
query: quFuncs,
prefixUrl, prefixUrl,
...legacyFuncs, ...legacyFuncs,
}; };