Storing actor socials, improved Hush profile scraper.
This commit is contained in:
parent
48acabac49
commit
ca695db3ba
|
@ -397,7 +397,6 @@ const releaseFields = `
|
||||||
${releaseBasicActorsFragment}
|
${releaseBasicActorsFragment}
|
||||||
${releaseTagsFragment}
|
${releaseTagsFragment}
|
||||||
${releasePosterFragment}
|
${releasePosterFragment}
|
||||||
${releaseCoversFragment}
|
|
||||||
${releasePhotosFragment}
|
${releasePhotosFragment}
|
||||||
${siteFragment}
|
${siteFragment}
|
||||||
studio {
|
studio {
|
||||||
|
|
|
@ -17,7 +17,7 @@ exports.up = async (knex) => {
|
||||||
AS showcased,
|
AS showcased,
|
||||||
releases.effective_date,
|
releases.effective_date,
|
||||||
releases.created_at,
|
releases.created_at,
|
||||||
array_agg(tags.slug) FILTER (WHERE tags.slug IS NOT NULL) AS tags
|
array_agg(tags.slug ORDER BY tags.priority DESC) FILTER (WHERE tags.slug IS NOT NULL) AS tags
|
||||||
FROM releases
|
FROM releases
|
||||||
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
|
LEFT JOIN releases_tags ON releases_tags.release_id = releases.id
|
||||||
LEFT JOIN tags ON tags.id = releases_tags.tag_id
|
LEFT JOIN tags ON tags.id = releases_tags.tag_id
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
exports.up = async (knex) => {
|
||||||
|
await knex.schema.alterTable('actors_social', (table) => {
|
||||||
|
table.integer('profile_id')
|
||||||
|
.references('id')
|
||||||
|
.inTable('actors_profiles');
|
||||||
|
|
||||||
|
table.dropUnique(['url', 'actor_id']);
|
||||||
|
table.unique(['url', 'actor_id', 'profile_id']);
|
||||||
|
});
|
||||||
|
|
||||||
|
await knex.raw(`
|
||||||
|
CREATE UNIQUE INDEX actors_social_url_actor_id_null_unique ON actors_social (url, actor_id) WHERE profile_id IS NULL;
|
||||||
|
`);
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.down = async (knex) => {
|
||||||
|
await knex.raw(`
|
||||||
|
DROP INDEX actors_social_url_actor_id_null_unique;
|
||||||
|
`);
|
||||||
|
|
||||||
|
await knex.schema.alterTable('actors_social', (table) => {
|
||||||
|
table.dropUnique(['url', 'actor_id', 'profile_id']);
|
||||||
|
table.unique(['url', 'actor_id']);
|
||||||
|
|
||||||
|
table.dropColumn('profile_id');
|
||||||
|
});
|
||||||
|
};
|
|
@ -775,7 +775,41 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||||
}
|
}
|
||||||
|
|
||||||
async function associateSocials(profiles) {
|
async function associateSocials(profiles) {
|
||||||
console.log('profiles', profiles);
|
const profileEntries = await knex('actors_profiles').whereIn(['actor_id', 'entity_id'], profiles.map((profile) => [profile.id, profile.entity.id]));
|
||||||
|
|
||||||
|
const profileEntriesByActorIdAndEntityId = profileEntries.reduce((acc, profileEntry) => {
|
||||||
|
if (!acc[profileEntry.actor_id]) {
|
||||||
|
acc[profileEntry.actor_id] = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
acc[profileEntry.actor_id][profileEntry.entity_id] = profileEntry.id;
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
profiles.reduce(async (chain, profile) => {
|
||||||
|
await chain;
|
||||||
|
|
||||||
|
if (!Array.isArray(profile.social) || profile.social.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const profileId = profileEntriesByActorIdAndEntityId[profile.id]?.[profile.entity.id];
|
||||||
|
|
||||||
|
if (!profileId) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await knex('actors_social')
|
||||||
|
.insert(profile.social.map((url) => ({
|
||||||
|
url,
|
||||||
|
platform: new URL(url).hostname.match(/([\w-]+)?\.(\w+)$/)?.[1],
|
||||||
|
actor_id: profile.id,
|
||||||
|
profile_id: profileId,
|
||||||
|
})))
|
||||||
|
.onConflict()
|
||||||
|
.ignore();
|
||||||
|
}, Promise.resolve());
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getActorNames(actorNames) {
|
async function getActorNames(actorNames) {
|
||||||
|
@ -801,7 +835,7 @@ async function storeProfiles(profiles) {
|
||||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||||
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
|
const actorIds = Array.from(new Set(profiles.map((profile) => profile.id)));
|
||||||
|
|
||||||
// await associateSocials(profiles);
|
await associateSocials(profiles);
|
||||||
|
|
||||||
await upsertProfiles(profilesWithAvatarIds);
|
await upsertProfiles(profilesWithAvatarIds);
|
||||||
await interpolateProfiles(actorIds);
|
await interpolateProfiles(actorIds);
|
||||||
|
|
|
@ -269,13 +269,7 @@ async function scrapeProfile({ query, el }, channel, options) {
|
||||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||||
|
|
||||||
if (bio.measurements) {
|
profile.measurements = bio.measurements;
|
||||||
const [bust, waist, hip] = bio.measurements.split('-');
|
|
||||||
|
|
||||||
if (bust) profile.bust = bust;
|
|
||||||
if (waist) profile.waist = Number(waist);
|
|
||||||
if (hip) profile.hip = Number(hip);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||||
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
|
||||||
|
@ -285,14 +279,14 @@ async function scrapeProfile({ query, el }, channel, options) {
|
||||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||||
|
|
||||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||||
|
|
||||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim());
|
||||||
|
|
||||||
profile.social = [bio.onlyfans, bio.twitter, bio.instagram].filter(Boolean);
|
profile.social = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean);
|
||||||
|
|
||||||
profile.avatar = [
|
profile.avatar = [
|
||||||
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
|
||||||
|
|
Loading…
Reference in New Issue