Storing avatars in actors avatars table to allow multiple (historical) photos per profile.

This commit is contained in:
DebaucheryLibrarian 2024-10-26 01:04:28 +02:00
parent 8d3f1c13cf
commit e24012f446
4 changed files with 83 additions and 59 deletions

View File

@ -15,6 +15,8 @@
"default-param-last": 0, "default-param-last": 0,
"template-curly-spacing": "off", "template-curly-spacing": "off",
"max-len": 0, "max-len": 0,
"func-names": 0,
"space-before-function-paren": 0,
"vue/no-v-html": 0, "vue/no-v-html": 0,
"vue/html-indent": ["error", "tab"], "vue/html-indent": ["error", "tab"],
"vue/multiline-html-element-content-newline": 0, "vue/multiline-html-element-content-newline": 0,

View File

@ -291,13 +291,13 @@ function curateActorEntries(baseActors, batchId) {
} }
function curateProfileEntry(profile) { function curateProfileEntry(profile) {
if (!profile.id) { if (!profile.actorId) {
return null; return null;
} }
const curatedProfileEntry = { const curatedProfileEntry = {
...(profile.update !== false && { id: profile.update }), ...(profile.update !== false && { id: profile.update }),
actor_id: profile.id, actor_id: profile.actorId,
entity_id: profile.entity?.id || null, entity_id: profile.entity?.id || null,
date_of_birth: profile.dateOfBirth, date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath, date_of_death: profile.dateOfDeath,
@ -383,13 +383,15 @@ async function curateProfile(profile, actor) {
try { try {
const curatedProfile = { const curatedProfile = {
id: profile.id, // id: profile.id,
update: profile.update,
actorId: profile.actorId,
profileId: profile.profileId,
name: profile.name, name: profile.name,
url: profile.url, url: profile.url,
avatar: profile.avatar, avatar: profile.avatar,
scraper: profile.scraper, scraper: profile.scraper,
entity: profile.entity, entity: profile.entity,
update: profile.update,
}; };
curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null; curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null;
@ -550,6 +552,17 @@ async function upsertProfiles(profiles) {
logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`); logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`);
} }
if (profiles.length > 0) {
await knex('actors_avatars')
.insert(profiles.filter((profile) => !!profile.avatarMediaId).map((profile) => ({
actor_id: profile.actorId,
profile_id: profile.profileId,
media_id: profile.avatarMediaId,
})))
.onConflict()
.ignore();
}
} }
async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) { async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
@ -604,6 +617,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
...actor, ...actor,
}), context, include); }), context, include);
console.log('PROFILE', profile);
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`); logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' }); throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' });
@ -615,6 +630,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
...actor, ...actor,
...profile, ...profile,
entity, entity,
actorId: actor.id,
profileId: existingProfile?.id,
update: existingProfile?.id || false, update: existingProfile?.id || false,
}, actor); }, actor);
} catch (error) { } catch (error) {
@ -641,7 +658,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
} }
async function associateSocials(profiles) { async function associateSocials(profiles) {
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id])); const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => { const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
if (!acc[profileEntry.actor_id]) { if (!acc[profileEntry.actor_id]) {
@ -660,7 +677,7 @@ async function associateSocials(profiles) {
return; return;
} }
const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id]; const profileId = profileEntriesByActorIdAndEntityId[profile.actorId]?.[profile.entity.id];
if (!profileId) { if (!profileId) {
return; return;
@ -670,7 +687,7 @@ async function associateSocials(profiles) {
.insert(profile.social.map((url) => ({ .insert(profile.social.map((url) => ({
url, url,
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1], platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
actor_id: profile.id, actor_id: profile.actorId,
profile_id: profileId, profile_id: profileId,
}))) })))
.onConflict() .onConflict()
@ -698,8 +715,10 @@ async function getActorNames(actorNames) {
} }
async function storeProfiles(profiles) { async function storeProfiles(profiles) {
console.log('profiles', profiles);
const profilesWithAvatarIds = await associateAvatars(profiles); const profilesWithAvatarIds = await associateAvatars(profiles);
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id))); const actorIds = Array.from(new Set(profiles.map((profile) => profile.actorId)));
await associateSocials(profiles); await associateSocials(profiles);

View File

@ -838,7 +838,10 @@ function curateMediaEntry(media, index) {
async function storeMedias(baseMedias, options) { async function storeMedias(baseMedias, options) {
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true }); await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias); // avatars often have different images at the same URL, skip URL dedupe until more advanced source deduping is implemented
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = options.type === 'avatars'
? { existingSourceMediaByUrl: {}, existingExtractMediaByUrl: {} }
: await findSourceDuplicates(baseMedias);
const fetchedMedias = await Promise.map( const fetchedMedias = await Promise.map(
baseMedias, baseMedias,
@ -914,7 +917,7 @@ async function associateReleaseMedia(releases, type = 'release') {
} }
try { try {
const storedMedias = await storeMedias(baseMedias); const storedMedias = await storeMedias(baseMedias, { type });
const storedMediasById = itemsByKey(storedMedias, 'id'); const storedMediasById = itemsByKey(storedMedias, 'id');
const associations = Object const associations = Object
@ -967,10 +970,10 @@ async function associateAvatars(profiles) {
const baseMedias = profilesWithBaseMedias.map((profile) => profile.avatarBaseMedia).filter(Boolean); const baseMedias = profilesWithBaseMedias.map((profile) => profile.avatarBaseMedia).filter(Boolean);
const storedMedias = await storeMedias(baseMedias, { stats: true }); const storedMedias = await storeMedias(baseMedias, { type: 'avatars', stats: true });
const storedMediasById = itemsByKey(storedMedias, 'id'); const storedMediasById = itemsByKey(storedMedias, 'id');
const profilesWithAvatarIds = profilesWithBaseMedias.map((profile) => { const profilesWithAvatarId = profilesWithBaseMedias.map((profile) => {
const media = storedMediasById[profile.avatarBaseMedia?.id]; const media = storedMediasById[profile.avatarBaseMedia?.id];
if (media) { if (media) {
@ -983,7 +986,7 @@ async function associateAvatars(profiles) {
return profile; return profile;
}); });
return profilesWithAvatarIds; return profilesWithAvatarId;
} }
async function deleteS3Objects(media) { async function deleteS3Objects(media) {

View File

@ -244,52 +244,6 @@ async function getSession(site, parameters, url) {
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`); throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
} }
function scrapeProfile(data, networkName, releases = []) {
const profile = {
description: data.bio,
aliases: data.aliases.filter(Boolean),
};
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.measurements = data.measurements;
profile.dateOfBirth = qu.parseDate(data.birthday);
profile.birthPlace = data.birthPlace;
profile.height = inchesToCm(data.height);
profile.weight = lbsToKg(data.weight);
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
|| data.images.card_main_rect[0].lg?.url
|| data.images.card_main_rect[0].md?.url
|| data.images.card_main_rect[0].sm?.url
|| data.images.card_main_rect[0].xs?.url;
}
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
profile.naturalBoobs = true;
}
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
profile.naturalBoobs = false;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
profile.hasTattoos = true;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
profile.hasPiercings = true;
}
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
return profile;
}
async function fetchLatest(site, page = 1, options) { async function fetchLatest(site, page = 1, options) {
const url = getUrl(site); const url = getUrl(site);
const { searchParams, pathname } = new URL(url); const { searchParams, pathname } = new URL(url);
@ -380,6 +334,52 @@ async function fetchRelease(url, site, baseScene, options) {
return null; return null;
} }
function scrapeProfile(data, networkName, _releases = []) {
const profile = {
description: data.bio,
aliases: data.aliases.filter(Boolean),
};
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.measurements = data.measurements;
profile.dateOfBirth = qu.parseDate(data.birthday);
profile.birthPlace = data.birthPlace;
profile.height = inchesToCm(data.height);
profile.weight = lbsToKg(data.weight);
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
|| data.images.card_main_rect[0].lg?.url
|| data.images.card_main_rect[0].md?.url
|| data.images.card_main_rect[0].sm?.url
|| data.images.card_main_rect[0].xs?.url;
}
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
profile.naturalBoobs = true;
}
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
profile.naturalBoobs = false;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
profile.hasTattoos = true;
}
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
profile.hasPiercings = true;
}
// profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
return profile;
}
async function fetchProfile({ name: actorName }, { entity, parameters }, include) { async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; // const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
const { session, instanceToken } = await getSession(entity, parameters); const { session, instanceToken } = await getSession(entity, parameters);