From 1bfdf4b232dbf03e9bf1d15c3616283167d52844 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 30 Aug 2020 04:18:47 +0200 Subject: [PATCH] Storing actor profiles from scene pages. --- config/default.js | 4 ++-- src/actors.js | 48 +++++++++++++++++++++++++++++++--------- src/scrapers/mindgeek.js | 10 ++++----- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/config/default.js b/config/default.js index 80978805..68068c99 100644 --- a/config/default.js +++ b/config/default.js @@ -75,7 +75,8 @@ module.exports = { 'blowpass', ], [ - // MindGeek; Brazzers and Mile High Media have their own assets + // MindGeek; Mile High Media has its own assets + 'brazzers', 'realitykings', 'mofos', 'digitalplayground', @@ -90,7 +91,6 @@ module.exports = { ], 'wicked', 'burningangel', - 'brazzers', 'milehighmedia', [ 'vixen', diff --git a/src/actors.js b/src/actors.js index b8c4e56c..c73ed5ef 100644 --- a/src/actors.js +++ b/src/actors.js @@ -123,8 +123,9 @@ function toBaseActors(actorsOrNames, release) { const baseActor = { name, slug, - entryId: entryId || null, - entity: release?.site?.network || release?.entity?.parent || release?.entity || null, + entryId: entryId || actorOrName.entryId || null, + entity: release?.entity?.parent || release?.entity || null, + hasProfile: !!actorOrName.name, // actor contains profile information }; if (actorOrName.name) { @@ -227,6 +228,10 @@ function curateActorEntries(baseActors, batchId) { } function curateProfileEntry(profile) { + if (!profile.id) { + return null; + } + const curatedProfileEntry = { ...(profile.update !== false && { id: profile.update }), actor_id: profile.id, @@ -371,10 +376,10 @@ async function curateProfile(profile) { } } -async function interpolateProfiles(actors) { +async function interpolateProfiles(actorIds) { const profiles = await knex('actors_profiles') .select(['actors_profiles.*', 'media.width as avatar_width', 'media.height as avatar_height', 'media.size as avatar_size']) - .whereIn('actor_id', actors.map(actor => actor.id)) + .whereIn('actor_id', actorIds) .leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id'); const profilesByActorId = profiles.reduce((acc, profile) => ({ @@ -482,8 +487,8 @@ async function interpolateProfiles(actors) { } async function upsertProfiles(profiles) { - const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)); - const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)); + const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean); + const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean); if (newProfileEntries.length > 0) { await bulkInsert('actors_profiles', newProfileEntries); @@ -602,6 +607,14 @@ async function getActorNames(actorNames) { return actorsWithoutProfiles.rows.map(actor => actor.name); } +async function storeProfiles(profiles) { + const profilesWithAvatarIds = await associateAvatars(profiles); + const actorIds = Array.from(new Set(profiles.map(profile => profile.id))); + + await upsertProfiles(profilesWithAvatarIds); + await interpolateProfiles(actorIds); +} + async function scrapeActors(argNames) { const actorNames = await getActorNames(argNames); const baseActors = toBaseActors(actorNames); @@ -668,10 +681,7 @@ async function scrapeActors(argNames) { } if (argv.save) { - const profilesWithAvatarIds = await associateAvatars(profiles); - - await upsertProfiles(profilesWithAvatarIds); - await interpolateProfiles(actors); + await storeProfiles(profiles); } return profiles; @@ -698,6 +708,24 @@ async function getOrCreateActors(baseActors, batchId) { const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); const newActors = await bulkInsert('actors', curatedActorEntries); + const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({ + ...acc, + [actor.entity_id]: { + ...acc[actor.entity_id], + [actor.slug]: actor.id, + }, + }), {}); + + const newActorProfiles = baseActors + .filter(actor => actor.hasProfile) + .map(actor => ({ + ...actor, + id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug], + })) + .filter(actor => !!actor.id); + + await storeProfiles(newActorProfiles); + if (Array.isArray(newActors)) { return newActors.concat(existingActors); } diff --git a/src/scrapers/mindgeek.js b/src/scrapers/mindgeek.js index c0b845b6..ae8aad67 100644 --- a/src/scrapers/mindgeek.js +++ b/src/scrapers/mindgeek.js @@ -267,8 +267,8 @@ async function fetchScene(url, site, baseScene) { return null; } -async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model') { - const url = `https://www.${networkSlug}.com`; +async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath = 'model') { + const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`; const { session, instanceToken } = await getSession(url); const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, { @@ -281,7 +281,7 @@ async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase()); if (actorData) { - const actorUrl = `https://www.${networkSlug}.com/${actorPath}/${actorData.id}/`; + const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${actorPath}/${actorData.id}/`; const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`; const [actorRes, actorReleasesRes] = await Promise.all([ @@ -294,11 +294,11 @@ async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model ]); if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) { - return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkSlug); + return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkOrNetworkSlug.slug || networkOrNetworkSlug); } if (actorRes.statusCode === 200) { - return scrapeProfile(actorData, actorRes.body.toString(), null, networkSlug); + return scrapeProfile(actorData, actorRes.body.toString(), null, networkOrNetworkSlug.slug || networkOrNetworkSlug); } } }