Added fixed actor age. Added male profiles to Littlr Caprice Dreams scraper. Added various tag photos.

This commit is contained in:
DebaucheryLibrarian
2020-11-29 03:59:47 +01:00
parent 71c884fe48
commit 9a61d2305c
71 changed files with 123 additions and 33 deletions

View File

@@ -105,6 +105,10 @@ function getMostFrequentDate(dates) {
return moment({ year, month, date }).toDate();
}
function getHighest(items) {
return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), 0);
}
function getLongest(items) {
return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
}
@@ -161,6 +165,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
entityId: actor.entity_id,
aliasFor: actor.alias_for,
dateOfBirth: actor.date_of_birth,
age: actor.age,
birthCountry: actor.birth_country_alpha2,
...(withDetails && {
alias: actor.alias && {
@@ -250,6 +255,7 @@ function curateProfileEntry(profile) {
entity_id: profile.entity?.id || null,
date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath,
age: profile.age,
url: profile.url,
gender: profile.gender,
ethnicity: profile.ethnicity,
@@ -328,6 +334,7 @@ async function curateProfile(profile) {
|| null;
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
curatedProfile.age = Number(profile.age) || null;
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
@@ -396,7 +403,7 @@ async function curateProfile(profile) {
}).filter(Boolean)
: [];
curatedProfile.releases = toBaseReleases(profile.releases, profile.entity);
curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity);
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`);
if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`);
@@ -483,6 +490,7 @@ async function interpolateProfiles(actorIds) {
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
profile.age = getHighest(valuesByProperty.age);
// ensure most frequent country, city and state match up
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
@@ -666,10 +674,13 @@ async function scrapeActors(argNames) {
const [entities, existingActorEntries] = await Promise.all([
knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children'))
.whereIn('entities.slug', entitySlugs)
.whereIn('entities.type', ['network', 'channel'])
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.orderBy('entities.type'),
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
.orderBy('entities.type')
.groupBy('entities.id', 'parents.id'),
knex('actors')
.select(['id', 'name', 'slug', 'entry_id'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))