Storing actor socials, improved Hush profile scraper.

This commit is contained in:
DebaucheryLibrarian 2023-07-23 01:02:18 +02:00
parent 48acabac49
commit ca695db3ba
5 changed files with 68 additions and 14 deletions

View File

@ -397,7 +397,6 @@ const releaseFields = `
${releaseBasicActorsFragment}
${releaseTagsFragment}
${releasePosterFragment}
${releaseCoversFragment}
${releasePhotosFragment}
${siteFragment}
studio {

View File

@ -17,7 +17,7 @@ exports.up = async (knex) => {
AS showcased,
releases.effective_date,
releases.created_at,
array_agg(tags.slug) FILTER (WHERE tags.slug IS NOT NULL) AS tags
array_agg(tags.slug ORDER BY tags.priority DESC) FILTER (WHERE tags.slug IS NOT NULL) AS tags
FROM releases
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
LEFT JOIN tags ON tags.id = releases_tags.tag_id

View File

@ -0,0 +1,27 @@
exports.up = async (knex) => {
await knex.schema.alterTable('actors_social', (table) => {
table.integer('profile_id')
.references('id')
.inTable('actors_profiles');
table.dropUnique(['url', 'actor_id']);
table.unique(['url', 'actor_id', 'profile_id']);
});
await knex.raw(`
CREATE UNIQUE INDEX actors_social_url_actor_id_null_unique ON actors_social (url, actor_id) WHERE profile_id IS NULL;
`);
};
exports.down = async (knex) => {
await knex.raw(`
DROP INDEX actors_social_url_actor_id_null_unique;
`);
await knex.schema.alterTable('actors_social', (table) => {
table.dropUnique(['url', 'actor_id', 'profile_id']);
table.unique(['url', 'actor_id']);
table.dropColumn('profile_id');
});
};

View File

@ -775,7 +775,41 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
}
async function associateSocials(profiles) {
console.log('profiles', profiles);
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id]));
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
if (!acc[profileEntry.actor_id]) {
acc[profileEntry.actor_id] = {};
}
acc[profileEntry.actor_id][profileEntry.entity_id] = profileEntry.id;
return acc;
}, {});
profiles.reduce(async (chain, profile) => {
await chain;
if (!Array.isArray(profile.social) || profile.social.length === 0) {
return;
}
const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id];
if (!profileId) {
return;
}
await knex('actors_social')
.insert(profile.social.map((url) => ({
url,
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
actor_id: profile.id,
profile_id: profileId,
})))
.onConflict()
.ignore();
}, Promise.resolve());
}
async function getActorNames(actorNames) {
@ -801,7 +835,7 @@ async function storeProfiles(profiles) {
const profilesWithAvatarIds = await associateAvatars(profiles);
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
// await associateSocials(profiles);
await associateSocials(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actorIds);

View File

@ -269,13 +269,7 @@ async function scrapeProfile({ query, el }, channel, options) {
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.measurements = bio.measurements;
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
@ -285,14 +279,14 @@ async function scrapeProfile({ query, el }, channel, options) {
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
profile.social = [bio.onlyfans, bio.twitter, bio.instagram].filter(Boolean);
profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
profile.avatar = [
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),