Storing avatars in actors avatars table to allow multiple (historical) photos per profile.
This commit is contained in:
parent
8d3f1c13cf
commit
e24012f446
|
@ -15,6 +15,8 @@
|
||||||
"default-param-last": 0,
|
"default-param-last": 0,
|
||||||
"template-curly-spacing": "off",
|
"template-curly-spacing": "off",
|
||||||
"max-len": 0,
|
"max-len": 0,
|
||||||
|
"func-names": 0,
|
||||||
|
"space-before-function-paren": 0,
|
||||||
"vue/no-v-html": 0,
|
"vue/no-v-html": 0,
|
||||||
"vue/html-indent": ["error", "tab"],
|
"vue/html-indent": ["error", "tab"],
|
||||||
"vue/multiline-html-element-content-newline": 0,
|
"vue/multiline-html-element-content-newline": 0,
|
||||||
|
|
|
@ -291,13 +291,13 @@ function curateActorEntries(baseActors, batchId) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function curateProfileEntry(profile) {
|
function curateProfileEntry(profile) {
|
||||||
if (!profile.id) {
|
if (!profile.actorId) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const curatedProfileEntry = {
|
const curatedProfileEntry = {
|
||||||
...(profile.update !== false && { id: profile.update }),
|
...(profile.update !== false && { id: profile.update }),
|
||||||
actor_id: profile.id,
|
actor_id: profile.actorId,
|
||||||
entity_id: profile.entity?.id || null,
|
entity_id: profile.entity?.id || null,
|
||||||
date_of_birth: profile.dateOfBirth,
|
date_of_birth: profile.dateOfBirth,
|
||||||
date_of_death: profile.dateOfDeath,
|
date_of_death: profile.dateOfDeath,
|
||||||
|
@ -383,13 +383,15 @@ async function curateProfile(profile, actor) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const curatedProfile = {
|
const curatedProfile = {
|
||||||
id: profile.id,
|
// id: profile.id,
|
||||||
|
update: profile.update,
|
||||||
|
actorId: profile.actorId,
|
||||||
|
profileId: profile.profileId,
|
||||||
name: profile.name,
|
name: profile.name,
|
||||||
url: profile.url,
|
url: profile.url,
|
||||||
avatar: profile.avatar,
|
avatar: profile.avatar,
|
||||||
scraper: profile.scraper,
|
scraper: profile.scraper,
|
||||||
entity: profile.entity,
|
entity: profile.entity,
|
||||||
update: profile.update,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null;
|
curatedProfile.description = domPurify.sanitize(profile.description?.replace(/\s+/g, ' '), { ALLOWED_TAGS: [] }).trim() || null;
|
||||||
|
@ -550,6 +552,17 @@ async function upsertProfiles(profiles) {
|
||||||
|
|
||||||
logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`);
|
logger.info(`Updated ${updatingProfileEntries.length} new actor profiles`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (profiles.length > 0) {
|
||||||
|
await knex('actors_avatars')
|
||||||
|
.insert(profiles.filter((profile) => !!profile.avatarMediaId).map((profile) => ({
|
||||||
|
actor_id: profile.actorId,
|
||||||
|
profile_id: profile.profileId,
|
||||||
|
media_id: profile.avatarMediaId,
|
||||||
|
})))
|
||||||
|
.onConflict()
|
||||||
|
.ignore();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
|
async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesByActorEntityId) {
|
||||||
|
@ -604,6 +617,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||||
...actor,
|
...actor,
|
||||||
}), context, include);
|
}), context, include);
|
||||||
|
|
||||||
|
console.log('PROFILE', profile);
|
||||||
|
|
||||||
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
if (!profile || typeof profile === 'number') { // scraper returns HTTP code on request failure
|
||||||
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
logger.verbose(`Profile for '${actor.name}' not available on ${label}, scraper returned ${profile}`);
|
||||||
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' });
|
throw Object.assign(new Error(`Profile for '${actor.name}' not available on ${label}`), { code: 'PROFILE_NOT_AVAILABLE' });
|
||||||
|
@ -615,6 +630,8 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||||
...actor,
|
...actor,
|
||||||
...profile,
|
...profile,
|
||||||
entity,
|
entity,
|
||||||
|
actorId: actor.id,
|
||||||
|
profileId: existingProfile?.id,
|
||||||
update: existingProfile?.id || false,
|
update: existingProfile?.id || false,
|
||||||
}, actor);
|
}, actor);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -641,7 +658,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||||
}
|
}
|
||||||
|
|
||||||
async function associateSocials(profiles) {
|
async function associateSocials(profiles) {
|
||||||
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id]));
|
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.actorId, profile.entity.id]));
|
||||||
|
|
||||||
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
||||||
if (!acc[profileEntry.actor_id]) {
|
if (!acc[profileEntry.actor_id]) {
|
||||||
|
@ -660,7 +677,7 @@ async function associateSocials(profiles) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id];
|
const profileId = profileEntriesByActorIdAndEntityId[profile.actorId]?.[profile.entity.id];
|
||||||
|
|
||||||
if (!profileId) {
|
if (!profileId) {
|
||||||
return;
|
return;
|
||||||
|
@ -670,7 +687,7 @@ async function associateSocials(profiles) {
|
||||||
.insert(profile.social.map((url) => ({
|
.insert(profile.social.map((url) => ({
|
||||||
url,
|
url,
|
||||||
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
|
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
|
||||||
actor_id: profile.id,
|
actor_id: profile.actorId,
|
||||||
profile_id: profileId,
|
profile_id: profileId,
|
||||||
})))
|
})))
|
||||||
.onConflict()
|
.onConflict()
|
||||||
|
@ -698,8 +715,10 @@ async function getActorNames(actorNames) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeProfiles(profiles) {
|
async function storeProfiles(profiles) {
|
||||||
|
console.log('profiles', profiles);
|
||||||
|
|
||||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||||
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
|
const actorIds = Array.from(new Set(profiles.map((profile) => profile.actorId)));
|
||||||
|
|
||||||
await associateSocials(profiles);
|
await associateSocials(profiles);
|
||||||
|
|
||||||
|
|
13
src/media.js
13
src/media.js
|
@ -838,7 +838,10 @@ function curateMediaEntry(media, index) {
|
||||||
async function storeMedias(baseMedias, options) {
|
async function storeMedias(baseMedias, options) {
|
||||||
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
await fsPromises.mkdir(path.join(config.media.path, 'temp'), { recursive: true });
|
||||||
|
|
||||||
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = await findSourceDuplicates(baseMedias);
|
// avatars often have different images at the same URL, skip URL dedupe until more advanced source deduping is implemented
|
||||||
|
const { existingSourceMediaByUrl, existingExtractMediaByUrl } = options.type === 'avatars'
|
||||||
|
? { existingSourceMediaByUrl: {}, existingExtractMediaByUrl: {} }
|
||||||
|
: await findSourceDuplicates(baseMedias);
|
||||||
|
|
||||||
const fetchedMedias = await Promise.map(
|
const fetchedMedias = await Promise.map(
|
||||||
baseMedias,
|
baseMedias,
|
||||||
|
@ -914,7 +917,7 @@ async function associateReleaseMedia(releases, type = 'release') {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const storedMedias = await storeMedias(baseMedias);
|
const storedMedias = await storeMedias(baseMedias, { type });
|
||||||
const storedMediasById = itemsByKey(storedMedias, 'id');
|
const storedMediasById = itemsByKey(storedMedias, 'id');
|
||||||
|
|
||||||
const associations = Object
|
const associations = Object
|
||||||
|
@ -967,10 +970,10 @@ async function associateAvatars(profiles) {
|
||||||
|
|
||||||
const baseMedias = profilesWithBaseMedias.map((profile) => profile.avatarBaseMedia).filter(Boolean);
|
const baseMedias = profilesWithBaseMedias.map((profile) => profile.avatarBaseMedia).filter(Boolean);
|
||||||
|
|
||||||
const storedMedias = await storeMedias(baseMedias, { stats: true });
|
const storedMedias = await storeMedias(baseMedias, { type: 'avatars', stats: true });
|
||||||
const storedMediasById = itemsByKey(storedMedias, 'id');
|
const storedMediasById = itemsByKey(storedMedias, 'id');
|
||||||
|
|
||||||
const profilesWithAvatarIds = profilesWithBaseMedias.map((profile) => {
|
const profilesWithAvatarId = profilesWithBaseMedias.map((profile) => {
|
||||||
const media = storedMediasById[profile.avatarBaseMedia?.id];
|
const media = storedMediasById[profile.avatarBaseMedia?.id];
|
||||||
|
|
||||||
if (media) {
|
if (media) {
|
||||||
|
@ -983,7 +986,7 @@ async function associateAvatars(profiles) {
|
||||||
return profile;
|
return profile;
|
||||||
});
|
});
|
||||||
|
|
||||||
return profilesWithAvatarIds;
|
return profilesWithAvatarId;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deleteS3Objects(media) {
|
async function deleteS3Objects(media) {
|
||||||
|
|
|
@ -244,52 +244,6 @@ async function getSession(site, parameters, url) {
|
||||||
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
|
throw new Error(`Failed to acquire MindGeek session (${res.statusCode})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile(data, networkName, releases = []) {
|
|
||||||
const profile = {
|
|
||||||
description: data.bio,
|
|
||||||
aliases: data.aliases.filter(Boolean),
|
|
||||||
};
|
|
||||||
|
|
||||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
|
||||||
profile.measurements = data.measurements;
|
|
||||||
|
|
||||||
profile.dateOfBirth = qu.parseDate(data.birthday);
|
|
||||||
profile.birthPlace = data.birthPlace;
|
|
||||||
profile.height = inchesToCm(data.height);
|
|
||||||
profile.weight = lbsToKg(data.weight);
|
|
||||||
|
|
||||||
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
|
|
||||||
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
|
|
||||||
|
|
||||||
if (data.images.card_main_rect?.[0]) {
|
|
||||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
|
||||||
|| data.images.card_main_rect[0].lg?.url
|
|
||||||
|| data.images.card_main_rect[0].md?.url
|
|
||||||
|| data.images.card_main_rect[0].sm?.url
|
|
||||||
|| data.images.card_main_rect[0].xs?.url;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
|
||||||
profile.naturalBoobs = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
|
|
||||||
profile.naturalBoobs = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
|
|
||||||
profile.hasTattoos = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
|
|
||||||
profile.hasPiercings = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
|
||||||
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1, options) {
|
async function fetchLatest(site, page = 1, options) {
|
||||||
const url = getUrl(site);
|
const url = getUrl(site);
|
||||||
const { searchParams, pathname } = new URL(url);
|
const { searchParams, pathname } = new URL(url);
|
||||||
|
@ -380,6 +334,52 @@ async function fetchRelease(url, site, baseScene, options) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function scrapeProfile(data, networkName, _releases = []) {
|
||||||
|
const profile = {
|
||||||
|
description: data.bio,
|
||||||
|
aliases: data.aliases.filter(Boolean),
|
||||||
|
};
|
||||||
|
|
||||||
|
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||||
|
profile.measurements = data.measurements;
|
||||||
|
|
||||||
|
profile.dateOfBirth = qu.parseDate(data.birthday);
|
||||||
|
profile.birthPlace = data.birthPlace;
|
||||||
|
profile.height = inchesToCm(data.height);
|
||||||
|
profile.weight = lbsToKg(data.weight);
|
||||||
|
|
||||||
|
profile.hairColor = data.tags.find((tag) => /hair color/i.test(tag.category))?.name;
|
||||||
|
profile.ethnicity = data.tags.find((tag) => /ethnicity/i.test(tag.category))?.name;
|
||||||
|
|
||||||
|
if (data.images.card_main_rect?.[0]) {
|
||||||
|
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||||
|
|| data.images.card_main_rect[0].lg?.url
|
||||||
|
|| data.images.card_main_rect[0].md?.url
|
||||||
|
|| data.images.card_main_rect[0].sm?.url
|
||||||
|
|| data.images.card_main_rect[0].xs?.url;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /natural tits/i.test(tag.name))) {
|
||||||
|
profile.naturalBoobs = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /boob type/i.test(tag.category) && /enhanced/i.test(tag.name))) {
|
||||||
|
profile.naturalBoobs = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /body art/i.test(tag.category) && /tattoo/i.test(tag.name))) {
|
||||||
|
profile.hasTattoos = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.tags.some((tag) => /body art/i.test(tag.category) && /piercing/i.test(tag.name))) {
|
||||||
|
profile.hasPiercings = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// profile.releases = releases.map((release) => scrapeRelease(release, null, null, networkName));
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
||||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||||
const { session, instanceToken } = await getSession(entity, parameters);
|
const { session, instanceToken } = await getSession(entity, parameters);
|
||||||
|
|
Loading…
Reference in New Issue