Refactored Kink scraper to use unprint browser. Improved socials handling in actors module.
This commit is contained in:
@@ -698,7 +698,61 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
return profiles.filter(Boolean);
|
||||
}
|
||||
|
||||
function curateSocials(socials, platformsByHostname) {
|
||||
return socials
|
||||
.map((social) => {
|
||||
if (social.url) {
|
||||
return social.url;
|
||||
}
|
||||
|
||||
if (social.handle && social.platform) {
|
||||
return social;
|
||||
}
|
||||
|
||||
if (typeof social === 'string') {
|
||||
return {
|
||||
url: social,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
})
|
||||
.filter(Boolean)
|
||||
.map((social) => {
|
||||
if (social.handle && social.platform && /[\w-]+/.test(social.handle) && /[a-z]+/i.test(social.platform)) {
|
||||
return {
|
||||
platform: social.platform.toLowerCase(),
|
||||
handle: social.handle,
|
||||
};
|
||||
}
|
||||
|
||||
if (social.url) {
|
||||
const { hostname, pathname } = new URL(social.url);
|
||||
const platform = platformsByHostname[hostname];
|
||||
|
||||
if (platform) {
|
||||
const handle = pathname.match(new RegExp(platform.pathname.replace('{handle}', '([\\w-]+)')))?.[1];
|
||||
|
||||
if (handle) {
|
||||
return {
|
||||
platform: platform.platform,
|
||||
handle,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: social.url,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('Invalid social');
|
||||
})
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function associateSocials(profiles) {
|
||||
const { platformsByHostname } = await actorsCommon;
|
||||
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
|
||||
|
||||
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
||||
@@ -725,11 +779,12 @@ async function associateSocials(profiles) {
|
||||
}
|
||||
|
||||
await knex('actors_socials')
|
||||
.insert(profile.social.map((url) => ({
|
||||
url,
|
||||
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
|
||||
.insert(curateSocials(profile.social, platformsByHostname).map((social) => ({
|
||||
platform: social.platform,
|
||||
handle: social.handle,
|
||||
url: social.url,
|
||||
actor_id: profile.actorId,
|
||||
profile_id: profileId,
|
||||
// profile_id: profileId,
|
||||
})))
|
||||
.onConflict()
|
||||
.ignore();
|
||||
|
||||
Reference in New Issue
Block a user