Refactored Kink scraper to use unprint browser. Improved socials handling in actors module.

This commit is contained in:
DebaucheryLibrarian
2025-12-28 05:48:24 +01:00
parent f5d6574cc6
commit 5c585d5d45
4 changed files with 129 additions and 68 deletions

View File

@@ -698,7 +698,61 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
return profiles.filter(Boolean);
}
function curateSocials(socials, platformsByHostname) {
return socials
.map((social) => {
if (social.url) {
return social.url;
}
if (social.handle && social.platform) {
return social;
}
if (typeof social === 'string') {
return {
url: social,
};
}
return null;
})
.filter(Boolean)
.map((social) => {
if (social.handle && social.platform && /[\w-]+/.test(social.handle) && /[a-z]+/i.test(social.platform)) {
return {
platform: social.platform.toLowerCase(),
handle: social.handle,
};
}
if (social.url) {
const { hostname, pathname } = new URL(social.url);
const platform = platformsByHostname[hostname];
if (platform) {
const handle = pathname.match(new RegExp(platform.pathname.replace('{handle}', '([\\w-]+)')))?.[1];
if (handle) {
return {
platform: platform.platform,
handle,
};
}
}
return {
url: social.url,
};
}
throw new Error('Invalid social');
})
.filter(Boolean);
}
async function associateSocials(profiles) {
const { platformsByHostname } = await actorsCommon;
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
@@ -725,11 +779,12 @@ async function associateSocials(profiles) {
}
await knex('actors_socials')
.insert(profile.social.map((url) => ({
url,
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
.insert(curateSocials(profile.social, platformsByHostname).map((social) => ({
platform: social.platform,
handle: social.handle,
url: social.url,
actor_id: profile.actorId,
profile_id: profileId,
// profile_id: profileId,
})))
.onConflict()
.ignore();