diff --git a/common b/common index dc00c3d5..4b90a5fe 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit dc00c3d58af2c23530b8b3cb6704f3860fdd7d0f +Subproject commit 4b90a5feeccc0c6325469dcb45a8d7cceabb386a diff --git a/config/default.js b/config/default.js index 2421297b..cba2a2cf 100755 --- a/config/default.js +++ b/config/default.js @@ -403,4 +403,23 @@ module.exports = { flushWindow: 1000, }, titleSlugLength: 50, + socials: { + urls: { + cashapp: 'https://cash.app/${handle}', // eslint-disable-line no-template-curly-in-string + fansly: 'https://fansly.com/{handle}', + instagram: 'https://www.instagram.com/{handle}', + linktree: 'https://linktr.ee/{handle}', + loyalfans: 'https://www.loyalfans.com/{handle}', + manyvids: 'https://{handle}.manyvids.com', + onlyfans: 'https://onlyfans.com/{handle}', + pornhub: 'https://www.pornhub.com/model/{handle}', + reddit: 'https://www.reddit.com/u/{handle}', + twitter: 'https://x.com/{handle}', + }, + prefix: { + default: '@', + cashapp: '$', + reddit: 'u/', + }, + }, }; diff --git a/src/actors.js b/src/actors.js index 96d07fdf..78e83020 100755 --- a/src/actors.js +++ b/src/actors.js @@ -698,7 +698,61 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy return profiles.filter(Boolean); } +function curateSocials(socials, platformsByHostname) { + return socials + .map((social) => { + if (social.url) { + return social.url; + } + + if (social.handle && social.platform) { + return social; + } + + if (typeof social === 'string') { + return { + url: social, + }; + } + + return null; + }) + .filter(Boolean) + .map((social) => { + if (social.handle && social.platform && /[\w-]+/.test(social.handle) && /[a-z]+/i.test(social.platform)) { + return { + platform: social.platform.toLowerCase(), + handle: social.handle, + }; + } + + if (social.url) { + const { hostname, pathname } = new URL(social.url); + const platform = platformsByHostname[hostname]; + + if (platform) { + const handle = pathname.match(new RegExp(platform.pathname.replace('{handle}', '([\\w-]+)')))?.[1]; + + if (handle) { + return { + platform: platform.platform, + handle, + }; + } + } + + return { + url: social.url, + }; + } + + throw new Error('Invalid social'); + }) + .filter(Boolean); +} + async function associateSocials(profiles) { + const { platformsByHostname } = await actorsCommon; const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id])); const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => { @@ -725,11 +779,12 @@ async function associateSocials(profiles) { } await knex('actors_socials') - .insert(profile.social.map((url) => ({ - url, - platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1], + .insert(curateSocials(profile.social, platformsByHostname).map((social) => ({ + platform: social.platform, + handle: social.handle, + url: social.url, actor_id: profile.actorId, - profile_id: profileId, + // profile_id: profileId, }))) .onConflict() .ignore(); diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index 3a93192b..e3f2e854 100755 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -2,7 +2,6 @@ const unprint = require('unprint'); -const http = require('../utils/http'); const slugify = require('../utils/slugify'); const { stripQuery } = require('../utils/url'); @@ -40,23 +39,24 @@ function scrapeAll(scenes, entity) { })); try { - release.photos = JSON.parse(query.attribute('.ratio-thumbnail img', 'data-cycle')).map((src) => [ - stripQuery(src).replace('_thumb', '_full'), - stripQuery(src), - src, - ].filter(Boolean).map((source) => ({ - src: source, - expectType: { - PNG: 'image/png', - }, - }))); + release.photos = JSON.parse(query.attribute('.ratio-thumbnail img', 'data-cycle')) + .map((src) => Array.from(new Set([ + stripQuery(src).replace('_thumb', '_full'), + stripQuery(src), + src, + ])).filter(Boolean).map((source) => ({ + src: source, + expectType: { + PNG: 'image/png', + }, + }))); } catch (error) { // no photos } release.trailer = `https://cdnp.kink.com/imagedb/${release.entryId}/trailer/${release.entryId}_trailer_high.mp4`; - release.channel = slugify(query.content('.shoot-detail-legend a[href*="/channel"]'), ''); + release.channel = slugify(query.content('.shoot-thumbnail-footer a[href*="/channel"]'), ''); release.rating = query.number('.thumb-up') / 10; return release; @@ -64,25 +64,21 @@ function scrapeAll(scenes, entity) { } async function fetchLatest(channel, page = 1) { - const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true }); const url = `${channel.parent.url}/search?type=shoots&channelIds=${channel.parameters?.slug || channel.slug}&sort=published&page=${page}`; - const res = await tab.goto(url); - const status = res.status(); - if (status === 200) { - const html = await tab.content(); - const items = unprint.initAll(html, '.container .card'); + const res = await unprint.browserRequest(url, { + selectAll: '.container .card', + }); - const scenes = scrapeAll(items, channel); + if (res.status === 200) { + // const items = unprint.initAll(html, '.container .card'); - await tab.close(); + const scenes = scrapeAll(res.context, channel); return scenes; } - await tab.close(); - - return status; + return res.status; } function scrapeScene({ query }, url, entity) { @@ -149,29 +145,19 @@ function scrapeScene({ query }, url, entity) { } async function fetchScene(url, channel) { - const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true }); - const res = await tab.goto(url); + const res = await unprint.browserRequest(url); - const status = res.status(); - - if (status === 200) { - const html = await tab.content(); - const item = unprint.init(html); - - const scene = scrapeScene(item, url, channel); - - await tab.close(); + if (res.status === 200) { + const scene = scrapeScene(res.context, url, channel); return scene; } - await tab.close(); - - return status; + return res.status; } async function scrapeProfile({ query }, actorUrl) { - const profile = {}; + const profile = { url: actorUrl }; profile.entryId = actorUrl.match(/\/model\/(\d+)\//)?.[1] || query.attribute('h1 + button[data-id]', 'data-id'); profile.description = query.content('.content-container #expand-text')?.trim(); @@ -204,42 +190,43 @@ async function scrapeProfile({ query }, actorUrl) { return profile; } -async function fetchProfile({ name: actorName }, entity) { - const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url; - const { tab } = await http.getBrowserSession('kink', { useGlobalBrowser: false, useProxy: true }); +async function getActorUrl({ name: actorName, url }, networkUrl) { + if (url) { + return url; + } // const searchRes = await tab.goto(`${networkUrl}/search?type=performers&q=${actorName}`); - const searchApiRes = await tab.goto(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`); - const searchStatus = searchApiRes.status(); + const searchApiRes = await unprint.browserRequest(`https://www.kink.com/api/v2/search/suggestions/performers?term=${actorName}`); - if (searchStatus === 200) { - const searchHtml = await tab.content(); - const data = unprint.init(searchHtml).query.json('body pre'); + if (searchApiRes.status === 200) { + const data = searchApiRes.context.query.json('body pre'); const actorId = data.find((actor) => actor.label === actorName)?.id; if (actorId) { const actorUrl = `${networkUrl}/model/${actorId}/${slugify(actorName)}`; - const actorRes = await tab.goto(actorUrl); - const actorStatus = actorRes.status(); - if (actorStatus === 200) { - const actorHtml = await tab.content(); - const item = unprint.init(actorHtml); - - await tab.close(); - - return scrapeProfile(item, actorUrl); - } - - await tab.close(); - - return actorRes.status; + return actorUrl; } - - return null; } - return searchStatus; + return null; +} + +async function fetchProfile(actor, entity) { + const networkUrl = entity.type === 'channel' ? entity.parent.url : entity.url; + const actorUrl = await getActorUrl(actor, networkUrl); + + if (actorUrl) { + const actorRes = await unprint.browserRequest(actorUrl); + + if (actorRes.status === 200) { + return scrapeProfile(actorRes.context, actorUrl); + } + + return actorRes.status; + } + + return null; } module.exports = {