From 6fd2bc2687af51bae2202d0be4295652a4eb3761 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Mon, 13 Jul 2020 00:12:01 +0200 Subject: [PATCH] Added Kink profile scraper. Fixed --force causing media collisions. --- config/default.js | 1 + src/actors.js | 3 +- src/media.js | 11 +++- src/scrapers/kink.js | 131 +++++++++++++++++++++++++++++++-------- src/scrapers/scrapers.js | 1 + src/utils/qu.js | 1 + 6 files changed, 119 insertions(+), 29 deletions(-) diff --git a/config/default.js b/config/default.js index 734752a4..9d92e2cd 100644 --- a/config/default.js +++ b/config/default.js @@ -123,6 +123,7 @@ module.exports = { 'julesjordan', 'bang', 'pervcity', + 'kink', 'peternorth', 'naughtyamerica', 'cherrypimps', diff --git a/src/actors.js b/src/actors.js index 6ff262f7..6a1cd64f 100644 --- a/src/actors.js +++ b/src/actors.js @@ -64,9 +64,10 @@ const ethnicities = { black: 'black', caucasian: 'white', european: 'white', - hispanic: 'latina', + hispanic: 'latin', indian: 'indian', japanese: 'japanese', + latin: 'latin', latina: 'latina', latino: 'latino', white: 'white', diff --git a/src/media.js b/src/media.js index ede2dc72..4a8f8f6e 100644 --- a/src/media.js +++ b/src/media.js @@ -591,13 +591,20 @@ async function storeMedias(baseMedias) { ); const [uniqueHashMedias, existingHashMedias] = await findHashDuplicates(fetchedMedias); - const newMedias = argv.force ? uniqueHashMedias.concat(existingHashMedias) : uniqueHashMedias; const savedMedias = await Promise.map( - newMedias, + uniqueHashMedias, async baseMedia => storeFile(baseMedia), ); + if (argv.force) { + // overwrite files in case image processing was changed + await Promise.map( + existingHashMedias, + async baseMedia => storeFile(baseMedia), + ); + } + const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index)); const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry); diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index a63ad7e2..62339375 100644 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -1,54 +1,54 @@ 'use strict'; -const { get, getAll } = require('../utils/qu'); +const qu = require('../utils/qu'); -function scrapeLatest(scenes) { - return scenes.map(({ qu }) => { +function scrapeAll(scenes) { + return scenes.map(({ query }) => { const release = {}; - const href = qu.url('.shoot-thumb-title a'); + const href = query.url('.shoot-thumb-title a'); release.url = `https://kink.com${href}`; release.shootId = href.split('/').slice(-1)[0]; release.entryId = release.shootId; - release.title = qu.q('.shoot-thumb-title a', true); - release.date = qu.date('.date', 'MMM DD, YYYY'); + release.title = query.q('.shoot-thumb-title a', true); + release.date = query.date('.date', 'MMM DD, YYYY'); - release.actors = qu.all('.shoot-thumb-models a', true); - release.stars = qu.q('.average-rating', 'data-rating') / 10; + release.actors = query.all('.shoot-thumb-models a', true); + release.stars = query.q('.average-rating', 'data-rating') / 10; - release.poster = qu.img('.adimage'); - release.photos = qu.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [ + release.poster = query.img('.adimage'); + release.photos = query.imgs('.rollover .roll-image', 'data-imagesrc').map(photo => [ photo.replace('410/', '830/'), photo, ]); - release.duration = qu.dur('.video span'); + release.duration = query.dur('.video span'); return release; }); } -async function scrapeScene({ qu }, url) { +async function scrapeScene({ query }, url) { const release = { url }; release.shootId = new URL(url).pathname.split('/')[2]; release.entryId = release.shootId; - release.title = qu.q('.shoot-title span.favorite-button', 'data-title'); - release.description = qu.q('.description-text', true); + release.title = query.q('.shoot-title span.favorite-button', 'data-title'); + release.description = query.q('.description-text', true); - release.date = qu.date('.shoot-date', 'MMMM DD, YYYY'); - release.actors = qu.all('.names a', true).map(actor => actor.replace(/,\s*/, '')); - release.director = qu.q('.director-name', true); + release.date = query.date('.shoot-date', 'MMMM DD, YYYY'); + release.actors = query.all('.names a', true).map(actor => actor.replace(/,\s*/, '')); + release.director = query.q('.director-name', true); - release.photos = qu.imgs('.gallery .thumb img', 'data-image-file'); - release.poster = qu.poster(); + release.photos = query.imgs('.gallery .thumb img', 'data-image-file'); + release.poster = query.poster(); - release.tags = qu.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, '')); + release.tags = query.all('.tag-list a[href*="/tag"]', true).map(tag => tag.replace(/,\s*/, '')); - const trailer = qu.q('.player span[data-type="trailer-src"]', 'data-url'); + const trailer = query.q('.player span[data-type="trailer-src"]', 'data-url'); release.trailer = [ { @@ -69,23 +69,77 @@ async function scrapeScene({ qu }, url) { }, ]; - release.channel = qu.url('.shoot-logo a').split('/').slice(-1)[0]; + release.channel = query.url('.shoot-logo a').split('/').slice(-1)[0]; return release; } -async function fetchLatest(site, page = 1) { - const res = await getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot'); +async function fetchActorReleases(actorUrl, page = 1, accReleases = []) { + const res = await qu.get(`${actorUrl}?page=${page}`); if (res.ok) { - return scrapeLatest(res.items, site); + const releases = scrapeAll(qu.initAll(res.item.el, '.shoot-list .shoot')); + const hasNextPage = res.item.query.exists('.paginated-nav li:last-child:not(.disabled)'); + + if (hasNextPage) { + return fetchActorReleases(actorUrl, page + 1, accReleases.concat(releases)); + } + + return accReleases.concat(releases); + } + + return accReleases; +} + +async function scrapeProfile({ query }, actorUrl, include) { + const profile = {}; + + profile.description = query.q('.bio #expand-text', true); + + const tags = query.all('.bio-tags a', true); + + if (tags.includes('brunette') || tags.includes('brunet')) profile.hairColor = 'brown'; + if (tags.includes('blonde') || tags.includes('blond')) profile.hairColor = 'blonde'; + if (tags.includes('black hair')) profile.hairColor = 'black'; + if (tags.includes('redhead')) profile.hairColor = 'red'; + + if (tags.includes('natural boobs')) profile.naturalBoobs = true; + if (tags.includes('fake boobs')) profile.naturalBoobs = false; + + if (tags.includes('white')) profile.ethnicity = 'white'; + if (tags.includes('latin')) profile.ethnicity = 'latin'; + if (tags.includes('Black')) profile.ethnicity = 'black'; + + if (tags.includes('pierced nipples')) profile.hasPiercings = true; + if (tags.includes('tattoo')) profile.hasTattoos = true; + + if (tags.includes('foreskin')) profile.hasForeskin = true; + + if ((tags.includes('big dick') || tags.includes('foreskin')) + && (tags.includes('fake boobs') || tags.includes('big tits'))) profile.gender = 'transsexual'; + + profile.avatar = query.img('.bio-slider-img, .bio-img:not([src*="Missing"])'); + profile.social = query.urls('a.social-link'); + + if (include.releases) { + profile.releases = await fetchActorReleases(actorUrl); + } + + return profile; +} + +async function fetchLatest(site, page = 1) { + const res = await qu.getAll(`${site.url}/latest/page/${page}`, '.shoot-list .shoot'); + + if (res.ok) { + return scrapeAll(res.items, site); } return res.status; } async function fetchScene(url, site) { - const res = await get(url); + const res = await qu.get(url); if (res.ok) { return scrapeScene(res.item, url, site); @@ -94,7 +148,32 @@ async function fetchScene(url, site) { return res.status; } +async function fetchProfile(actorName, entity, include) { + const searchRes = await qu.getAll(`https://kink.com/search?type=performers&q=${actorName}`, '.model'); + + if (searchRes.ok) { + const actorItem = searchRes.items.find(() => qu.query.exists(`.model-link img[alt="${actorName}"]`)); + + if (actorItem) { + const actorPath = actorItem.query.url('.model-link'); + const actorUrl = `https://kink.com${actorPath}`; + const actorRes = await qu.get(actorUrl); + + if (actorRes.ok) { + return scrapeProfile(actorRes.item, actorUrl, include); + } + + return actorRes.status; + } + + return null; + } + + return searchRes.status; +} + module.exports = { fetchLatest, fetchScene, + fetchProfile, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 6bba5aed..08d16e03 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -180,6 +180,7 @@ module.exports = { julesjordan, kellymadison, killergram, + kink, legalporno, men, metrohd, diff --git a/src/utils/qu.js b/src/utils/qu.js index faad26ec..70adbfbb 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -358,6 +358,7 @@ module.exports = { ctxa: initAll, geta: getAll, qu: quFuncs, + query: quFuncs, prefixUrl, ...legacyFuncs, };