Improved actor profile flow. Added images.

This commit is contained in:
2020-01-23 03:52:12 +01:00
parent c9b8a18db5
commit 9db0e97512
23 changed files with 104 additions and 54 deletions

View File

@@ -287,6 +287,7 @@ async function mergeProfiles(profiles, actor) {
birthdate: Number.isNaN(Number(prevProfile.birthdate)) ? profile.birthdate : prevProfile.birthdate,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
residencePlace: prevProfile.residencePlace || profile.residencePlace,
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
ethnicity: prevProfile.ethnicity || profile.ethnicity,
bust: prevProfile.bust || profile.bust,
waist: prevProfile.waist || profile.waist,
@@ -316,6 +317,17 @@ async function mergeProfiles(profiles, actor) {
mergedProfile.birthPlace = birthPlace;
mergedProfile.residencePlace = residencePlace;
if (!mergedProfile.birthPlace && mergedProfile.nationality) {
const country = await knex('countries')
.where('nationality', 'ilike', `%${mergedProfile.nationality}%`)
.orderBy('priority', 'desc')
.first();
mergedProfile.birthPlace = {
country: country.alpha2,
};
}
return mergedProfile;
}

View File

@@ -182,7 +182,8 @@ async function storePhotos(photos, {
concurrency: 10,
}).filter(photo => photo);
const [hashDuplicates, hashOriginals] = await findDuplicates(metaFiles, 'hash', 'hash', label);
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
const savedPhotos = await savePhotos(hashOriginals, {
domain,

View File

@@ -119,17 +119,12 @@ async function scrapeProfile(html, _url, actorName) {
const descriptionEl = document.querySelector('.description-box');
const avatarEl = document.querySelector('.pornstar-details .card-img-top');
const country = await knex('countries')
.where('nationality', 'ilike', `%${bio.Nationality}%`)
.orderBy('priority', 'desc')
.first();
const profile = {
name: actorName,
};
profile.birthdate = moment.utc(bio.Birthday, 'MMMM DD, YYYY').toDate();
if (country) profile.birthPlace = country.name;
if (bio.Nationality) profile.nationality = bio.Nationality;
if (bio['Bra size']) [profile.bust] = bio['Bra size'].match(/\d+\w+/);
if (bio.Waist) profile.waist = Number(bio.Waist.match(/\d+/)[0]);

View File

@@ -78,6 +78,11 @@ function scrapeProfile(html, url, actorName, siteSlug) {
const avatarEl = document.querySelector('img.actorPicture');
const descriptionEl = document.querySelector('.actorBio p:not(.bioTitle)');
const hairEl = document.querySelector('.actorProfile .attribute_hair_color');
const heightEl = document.querySelector('.actorProfile .attribute_height');
const weightEl = document.querySelector('.actorProfile .attribute_weight');
const aliasEl = document.querySelector('.actorProfile .attribute_alternate_names');
const nationalityEl = document.querySelector('.actorProfile .attribute_home');
const profile = {
name: actorName,
@@ -96,6 +101,11 @@ function scrapeProfile(html, url, actorName, siteSlug) {
}
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
if (hairEl) profile.hair = hairEl.textContent.split(':')[1].trim();
if (heightEl) profile.height = Number(heightEl.textContent.match(/\d+/)[0]);
if (weightEl) profile.weight = Number(weightEl.textContent.match(/\d+/)[0]);
if (aliasEl) profile.aliases = aliasEl.textContent.split(':')[1].trim().split(', ');
if (nationalityEl) profile.nationality = nationalityEl.textContent.split(':')[1].trim();
profile.releases = Array.from(document.querySelectorAll('.sceneList .scene a.imgLink'), el => `https://${siteSlug}.com${el.href}`);