From e24012f446ba560898dd06bb8ec0f4a49217a19d Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sat, 26 Oct 2024 01:04:28 +0200 Subject: [PATCH] Storing avatars in actors avatars table to allow multiple (historical) photos per profile. --- .eslintrc | 2 + src/actors.js | 35 +++++++++++++---- src/media.js | 13 ++++--- src/scrapers/aylo.js | 92 ++++++++++++++++++++++---------------------- 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/.eslintrc b/.eslintrc index 4f4ae6ef..161b3210 100755 --- a/.eslintrc +++ b/.eslintrc @@ -15,6 +15,8 @@ "default-param-last": 0, "template-curly-spacing": "off", "max-len": 0, + "func-names": 0, + "space-before-function-paren": 0, "vue/no-v-html": 0, "vue/html-indent": ["error", "tab"], "vue/multiline-html-element-content-newline": 0, diff --git a/src/actors.js b/src/actors.js index 327b6ec6..8c711d34 100755 --- a/src/actors.js +++ b/src/actors.js @@ -291,13 +291,13 @@ function curateActorEntries(baseActors, batchId) { } function curateProfileEntry(profile) { - if (!profile.id) { + if (!profile.actorId) { return null; } const curatedProfileEntry = { ...(profile.update !== false && { id: profile.update }), - actor_id: profile.id, + actor_id: profile.actorId, entity_id: profile.entity?.id || null, date_of_birth: profile.dateOfBirth, date_of_death: profile.dateOfDeath, @@ -383,13 +383,15 @@ async function curateProfile(profile, actor) { try { const curatedProfile = { - id: profile.id, + // id: profile.id, + update: profile.update, + actorId: profile.actorId, + profileId: profile.profileId, name: profile.name, url: profile.url, avatar: profile.avatar, scraper: profile.scraper, entity: profile.entity, - update: profile.update, }; curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null; @@ -550,6 +552,17 @@ async function upsertProfiles(profiles) { logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`); } + + if (profiles.length > 0) { + await knex('actors_avatars') + .insert(profiles.filter((profile) => !!profile.avatarMediaId).map((profile) => ({ + actor_id: profile.actorId, + profile_id: profile.profileId, + media_id: profile.avatarMediaId, + }))) + .onConflict() + .ignore(); + } } async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) { @@ -604,6 +617,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy ...actor, }), context, include); + console.log('PROFILE', profile); + if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`); throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' }); @@ -615,6 +630,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy ...actor, ...profile, entity, + actorId: actor.id, + profileId: existingProfile?.id, update: existingProfile?.id || false, }, actor); } catch (error) { @@ -641,7 +658,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy } async function associateSocials(profiles) { - const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id])); + const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id])); const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => { if (!acc[profileEntry.actor_id]) { @@ -660,7 +677,7 @@ async function associateSocials(profiles) { return; } - const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id]; + const profileId = profileEntriesByActorIdAndEntityId[profile.actorId]?.[profile.entity.id]; if (!profileId) { return; @@ -670,7 +687,7 @@ async function associateSocials(profiles) { .insert(profile.social.map((url) => ({ url, platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1], - actor_id: profile.id, + actor_id: profile.actorId, profile_id: profileId, }))) .onConflict() @@ -698,8 +715,10 @@ async function getActorNames(actorNames) { } async function storeProfiles(profiles) { + console.log('profiles', profiles); + const profilesWithAvatarIds = await associateAvatars(profiles); - const actorIds = Array.from(new Set(profiles.map((profile) => profile.id))); + const actorIds = Array.from(new Set(profiles.map((profile) => profile.actorId))); await associateSocials(profiles); diff --git a/src/media.js b/src/media.js index e242cbdf..26017546 100755 --- a/src/media.js +++ b/src/media.js @@ -838,7 +838,10 @@ function curateMediaEntry(media, index) { async function storeMedias(baseMedias, options) { await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true }); - const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias); + // avatars often have different images at the same URL, skip URL dedupe until more advanced source deduping is implemented + const { existingSourceMediaByUrl, existingExtractMediaByUrl } = options.type === 'avatars' + ? { existingSourceMediaByUrl: {}, existingExtractMediaByUrl: {} } + : await findSourceDuplicates(baseMedias); const fetchedMedias = await Promise.map( baseMedias, @@ -914,7 +917,7 @@ async function associateReleaseMedia(releases, type = 'release') { } try { - const storedMedias = await storeMedias(baseMedias); + const storedMedias = await storeMedias(baseMedias, { type }); const storedMediasById = itemsByKey(storedMedias, 'id'); const associations = Object @@ -967,10 +970,10 @@ async function associateAvatars(profiles) { const baseMedias = profilesWithBaseMedias.map((profile) => profile.avatarBaseMedia).filter(Boolean); - const storedMedias = await storeMedias(baseMedias, { stats: true }); + const storedMedias = await storeMedias(baseMedias, { type: 'avatars', stats: true }); const storedMediasById = itemsByKey(storedMedias, 'id'); - const profilesWithAvatarIds = profilesWithBaseMedias.map((profile) => { + const profilesWithAvatarId = profilesWithBaseMedias.map((profile) => { const media = storedMediasById[profile.avatarBaseMedia?.id]; if (media) { @@ -983,7 +986,7 @@ async function associateAvatars(profiles) { return profile; }); - return profilesWithAvatarIds; + return profilesWithAvatarId; } async function deleteS3Objects(media) { diff --git a/src/scrapers/aylo.js b/src/scrapers/aylo.js index f5021949..4215fc2f 100755 --- a/src/scrapers/aylo.js +++ b/src/scrapers/aylo.js @@ -244,52 +244,6 @@ async function getSession(site, parameters, url) { throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); } -function scrapeProfile(data, networkName, releases = []) { - const profile = { - description: data.bio, - aliases: data.aliases.filter(Boolean), - }; - - profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; - profile.measurements = data.measurements; - - profile.dateOfBirth = qu.parseDate(data.birthday); - profile.birthPlace = data.birthPlace; - profile.height = inchesToCm(data.height); - profile.weight = lbsToKg(data.weight); - - profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name; - profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name; - - if (data.images.card_main_rect?.[0]) { - profile.avatar = data.images.card_main_rect[0].xl?.url - || data.images.card_main_rect[0].lg?.url - || data.images.card_main_rect[0].md?.url - || data.images.card_main_rect[0].sm?.url - || data.images.card_main_rect[0].xs?.url; - } - - if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { - profile.naturalBoobs = true; - } - - if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) { - profile.naturalBoobs = false; - } - - if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) { - profile.hasTattoos = true; - } - - if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) { - profile.hasPiercings = true; - } - - profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName)); - - return profile; -} - async function fetchLatest(site, page = 1, options) { const url = getUrl(site); const { searchParams, pathname } = new URL(url); @@ -380,6 +334,52 @@ async function fetchRelease(url, site, baseScene, options) { return null; } +function scrapeProfile(data, networkName, _releases = []) { + const profile = { + description: data.bio, + aliases: data.aliases.filter(Boolean), + }; + + profile.gender = data.gender === 'other' ? 'transsexual' : data.gender; + profile.measurements = data.measurements; + + profile.dateOfBirth = qu.parseDate(data.birthday); + profile.birthPlace = data.birthPlace; + profile.height = inchesToCm(data.height); + profile.weight = lbsToKg(data.weight); + + profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name; + profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name; + + if (data.images.card_main_rect?.[0]) { + profile.avatar = data.images.card_main_rect[0].xl?.url + || data.images.card_main_rect[0].lg?.url + || data.images.card_main_rect[0].md?.url + || data.images.card_main_rect[0].sm?.url + || data.images.card_main_rect[0].xs?.url; + } + + if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) { + profile.naturalBoobs = true; + } + + if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) { + profile.naturalBoobs = false; + } + + if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) { + profile.hasTattoos = true; + } + + if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) { + profile.hasPiercings = true; + } + + // profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName)); + + return profile; +} + async function fetchProfile({ name: actorName }, { entity, parameters }, include) { // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; const { session, instanceToken } = await getSession(entity, parameters);