Storing actor socials, improved Hush profile scraper.
This commit is contained in:
parent
48acabac49
commit
ca695db3ba
|
@ -397,7 +397,6 @@ const releaseFields = `
|
|||
${releaseBasicActorsFragment}
|
||||
${releaseTagsFragment}
|
||||
${releasePosterFragment}
|
||||
${releaseCoversFragment}
|
||||
${releasePhotosFragment}
|
||||
${siteFragment}
|
||||
studio {
|
||||
|
|
|
@ -17,7 +17,7 @@ exports.up = async (knex) => {
|
|||
AS showcased,
|
||||
releases.effective_date,
|
||||
releases.created_at,
|
||||
array_agg(tags.slug) FILTER (WHERE tags.slug IS NOT NULL) AS tags
|
||||
array_agg(tags.slug ORDER BY tags.priority DESC) FILTER (WHERE tags.slug IS NOT NULL) AS tags
|
||||
FROM releases
|
||||
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
|
||||
LEFT JOIN tags ON tags.id = releases_tags.tag_id
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
exports.up = async (knex) => {
|
||||
await knex.schema.alterTable('actors_social', (table) => {
|
||||
table.integer('profile_id')
|
||||
.references('id')
|
||||
.inTable('actors_profiles');
|
||||
|
||||
table.dropUnique(['url', 'actor_id']);
|
||||
table.unique(['url', 'actor_id', 'profile_id']);
|
||||
});
|
||||
|
||||
await knex.raw(`
|
||||
CREATE UNIQUE INDEX actors_social_url_actor_id_null_unique ON actors_social (url, actor_id) WHERE profile_id IS NULL;
|
||||
`);
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.raw(`
|
||||
DROP INDEX actors_social_url_actor_id_null_unique;
|
||||
`);
|
||||
|
||||
await knex.schema.alterTable('actors_social', (table) => {
|
||||
table.dropUnique(['url', 'actor_id', 'profile_id']);
|
||||
table.unique(['url', 'actor_id']);
|
||||
|
||||
table.dropColumn('profile_id');
|
||||
});
|
||||
};
|
|
@ -775,7 +775,41 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
|||
}
|
||||
|
||||
async function associateSocials(profiles) {
|
||||
console.log('profiles', profiles);
|
||||
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id]));
|
||||
|
||||
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
||||
if (!acc[profileEntry.actor_id]) {
|
||||
acc[profileEntry.actor_id] = {};
|
||||
}
|
||||
|
||||
acc[profileEntry.actor_id][profileEntry.entity_id] = profileEntry.id;
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
profiles.reduce(async (chain, profile) => {
|
||||
await chain;
|
||||
|
||||
if (!Array.isArray(profile.social) || profile.social.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id];
|
||||
|
||||
if (!profileId) {
|
||||
return;
|
||||
}
|
||||
|
||||
await knex('actors_social')
|
||||
.insert(profile.social.map((url) => ({
|
||||
url,
|
||||
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
|
||||
actor_id: profile.id,
|
||||
profile_id: profileId,
|
||||
})))
|
||||
.onConflict()
|
||||
.ignore();
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
async function getActorNames(actorNames) {
|
||||
|
@ -801,7 +835,7 @@ async function storeProfiles(profiles) {
|
|||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
|
||||
|
||||
// await associateSocials(profiles);
|
||||
await associateSocials(profiles);
|
||||
|
||||
await upsertProfiles(profilesWithAvatarIds);
|
||||
await interpolateProfiles(actorIds);
|
||||
|
|
|
@ -269,13 +269,7 @@ async function scrapeProfile({ query, el }, channel, options) {
|
|||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||
|
@ -285,14 +279,14 @@ async function scrapeProfile({ query, el }, channel, options) {
|
|||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
||||
|
||||
profile.social = [bio.onlyfans, bio.twitter, bio.instagram].filter(Boolean);
|
||||
profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
||||
|
||||
profile.avatar = [
|
||||
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
||||
|
|
Loading…
Reference in New Issue