Storing actor profiles from scene pages.

This commit is contained in:
DebaucheryLibrarian 2020-08-30 04:18:47 +02:00
parent 53b0101a12
commit 1bfdf4b232
3 changed files with 45 additions and 17 deletions

View File

@ -75,7 +75,8 @@ module.exports = {
'blowpass',
],
[
// MindGeek; Brazzers and Mile High Media have their own assets
// MindGeek; Mile High Media has its own assets
'brazzers',
'realitykings',
'mofos',
'digitalplayground',
@ -90,7 +91,6 @@ module.exports = {
],
'wicked',
'burningangel',
'brazzers',
'milehighmedia',
[
'vixen',

View File

@ -123,8 +123,9 @@ function toBaseActors(actorsOrNames, release) {
const baseActor = {
name,
slug,
entryId: entryId || null,
entity: release?.site?.network || release?.entity?.parent || release?.entity || null,
entryId: entryId || actorOrName.entryId || null,
entity: release?.entity?.parent || release?.entity || null,
hasProfile: !!actorOrName.name, // actor contains profile information
};
if (actorOrName.name) {
@ -227,6 +228,10 @@ function curateActorEntries(baseActors, batchId) {
}
function curateProfileEntry(profile) {
if (!profile.id) {
return null;
}
const curatedProfileEntry = {
...(profile.update !== false && { id: profile.update }),
actor_id: profile.id,
@ -371,10 +376,10 @@ async function curateProfile(profile) {
}
}
async function interpolateProfiles(actors) {
async function interpolateProfiles(actorIds) {
const profiles = await knex('actors_profiles')
.select(['actors_profiles.*', 'media.width as avatar_width', 'media.height as avatar_height', 'media.size as avatar_size'])
.whereIn('actor_id', actors.map(actor => actor.id))
.whereIn('actor_id', actorIds)
.leftJoin('media', 'actors_profiles.avatar_media_id', 'media.id');
const profilesByActorId = profiles.reduce((acc, profile) => ({
@ -482,8 +487,8 @@ async function interpolateProfiles(actors) {
}
async function upsertProfiles(profiles) {
const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile));
const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile));
const newProfileEntries = profiles.filter(profile => !profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
const updatingProfileEntries = profiles.filter(profile => profile.update).map(profile => curateProfileEntry(profile)).filter(Boolean);
if (newProfileEntries.length > 0) {
await bulkInsert('actors_profiles', newProfileEntries);
@ -602,6 +607,14 @@ async function getActorNames(actorNames) {
return actorsWithoutProfiles.rows.map(actor => actor.name);
}
async function storeProfiles(profiles) {
const profilesWithAvatarIds = await associateAvatars(profiles);
const actorIds = Array.from(new Set(profiles.map(profile => profile.id)));
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actorIds);
}
async function scrapeActors(argNames) {
const actorNames = await getActorNames(argNames);
const baseActors = toBaseActors(actorNames);
@ -668,10 +681,7 @@ async function scrapeActors(argNames) {
}
if (argv.save) {
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
await storeProfiles(profiles);
}
return profiles;
@ -698,6 +708,24 @@ async function getOrCreateActors(baseActors, batchId) {
const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId);
const newActors = await bulkInsert('actors', curatedActorEntries);
const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({
...acc,
[actor.entity_id]: {
...acc[actor.entity_id],
[actor.slug]: actor.id,
},
}), {});
const newActorProfiles = baseActors
.filter(actor => actor.hasProfile)
.map(actor => ({
...actor,
id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug],
}))
.filter(actor => !!actor.id);
await storeProfiles(newActorProfiles);
if (Array.isArray(newActors)) {
return newActors.concat(existingActors);
}

View File

@ -267,8 +267,8 @@ async function fetchScene(url, site, baseScene) {
return null;
}
async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model') {
const url = `https://www.${networkSlug}.com`;
async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath = 'model') {
const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
const { session, instanceToken } = await getSession(url);
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
@ -281,7 +281,7 @@ async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) {
const actorUrl = `https://www.${networkSlug}.com/${actorPath}/${actorData.id}/`;
const actorUrl = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com/${actorPath}/${actorData.id}/`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const [actorRes, actorReleasesRes] = await Promise.all([
@ -294,11 +294,11 @@ async function fetchProfile({ name: actorName }, networkSlug, actorPath = 'model
]);
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkSlug);
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkOrNetworkSlug.slug || networkOrNetworkSlug);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, networkSlug);
return scrapeProfile(actorData, actorRes.body.toString(), null, networkOrNetworkSlug.slug || networkOrNetworkSlug);
}
}
}