Added fixed actor age. Added male profiles to Littlr Caprice Dreams scraper. Added various tag photos.

This commit is contained in:
DebaucheryLibrarian
2020-11-29 03:59:47 +01:00
parent 71c884fe48
commit 9a61d2305c
71 changed files with 123 additions and 33 deletions

View File

@@ -105,6 +105,10 @@ function getMostFrequentDate(dates) {
return moment({ year, month, date }).toDate();
}
function getHighest(items) {
return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), 0);
}
function getLongest(items) {
return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
}
@@ -161,6 +165,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
entityId: actor.entity_id,
aliasFor: actor.alias_for,
dateOfBirth: actor.date_of_birth,
age: actor.age,
birthCountry: actor.birth_country_alpha2,
...(withDetails && {
alias: actor.alias && {
@@ -250,6 +255,7 @@ function curateProfileEntry(profile) {
entity_id: profile.entity?.id || null,
date_of_birth: profile.dateOfBirth,
date_of_death: profile.dateOfDeath,
age: profile.age,
url: profile.url,
gender: profile.gender,
ethnicity: profile.ethnicity,
@@ -328,6 +334,7 @@ async function curateProfile(profile) {
|| null;
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
curatedProfile.age = Number(profile.age) || null;
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
@@ -396,7 +403,7 @@ async function curateProfile(profile) {
}).filter(Boolean)
: [];
curatedProfile.releases = toBaseReleases(profile.releases, profile.entity);
curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity);
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`);
if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`);
@@ -483,6 +490,7 @@ async function interpolateProfiles(actorIds) {
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
profile.age = getHighest(valuesByProperty.age);
// ensure most frequent country, city and state match up
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
@@ -666,10 +674,13 @@ async function scrapeActors(argNames) {
const [entities, existingActorEntries] = await Promise.all([
knex('entities')
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
.select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children'))
.whereIn('entities.slug', entitySlugs)
.whereIn('entities.type', ['network', 'channel'])
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.orderBy('entities.type'),
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
.orderBy('entities.type')
.groupBy('entities.id', 'parents.id'),
knex('actors')
.select(['id', 'name', 'slug', 'entry_id'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))

View File

@@ -49,7 +49,7 @@ async function init() {
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
const actors = (argv.actors || argv.actorsUpdate || argv.actorsFile) && await scrapeActors(actorNames);
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.scenes).flat().filter(Boolean);
const updateBaseScenes = (argv.latest || argv.upcoming || argv.channels || argv.networks || argv.movies) && await fetchUpdates();

View File

@@ -4,7 +4,11 @@ const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
const series = channel.children || channel.parent?.children;
if (!series) {
return null;
}
const serieNames = series.reduce((acc, serie) => ({
...acc,
@@ -26,7 +30,7 @@ function matchChannel(release, channel) {
if (serie) {
return {
slug: serie.slug,
channel: serie.slug,
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-:/]+\\s*`, 'ig'), ''),
};
}
@@ -109,31 +113,34 @@ async function scrapeScene({ query }, url, channel, include) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
}
release.trailer = {
src: query.video(),
type: query.video('source', 'type'),
quality: query.video('source', 'data-res'),
referer: url,
};
return {
...release,
...matchChannel(release, channel),
};
}
function scrapeProfile({ query }, url) {
const profile = {};
function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
const profile = { url, gender };
const bio = query.cnts('div p').reduce((acc, item) => {
const [key, value] = item.split(/\s*:\s*/);
profile.age = query.number('div:nth-child(2) > p');
profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
profile.description = query.cnt('div:nth-child(4) > p');
profile.avatar = {
src: query.img('.model-page'),
referer: url,
};
console.log(bio);
console.log(profile);
profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
return profile;
}
@@ -160,12 +167,16 @@ async function fetchScene(url, channel, baseRelease, include) {
return res.status;
}
async function getActorUrl(baseActor) {
async function getActorUrl(baseActor, gender = 'female') {
if (baseActor.url) {
return baseActor.url;
}
const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models');
const overviewUrl = gender === 'female'
? 'https://www.littlecaprice-dreams.com/pornstars/'
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
const overviewRes = await qu.getAll(overviewUrl, '.models');
if (!overviewRes.ok) {
return overviewRes.status;
@@ -174,23 +185,36 @@ async function getActorUrl(baseActor) {
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
if (!actorItem) {
if (gender === 'female') {
return getActorUrl(baseActor, 'male');
}
return null;
}
return actorItem.query.url('a');
const actorUrl = actorItem.query.url('a');
if (actorUrl) {
return {
url: actorUrl,
gender,
};
}
return null;
}
async function fetchProfile(baseActor, entity) {
async function fetchProfile(baseActor, { entity }) {
const actorUrl = await getActorUrl(baseActor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl, '#main-content');
const actorRes = await qu.get(actorUrl.url, '#main-content');
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, entity);
return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
}
return actorRes.status;

View File

@@ -1,5 +1,6 @@
'use strict';
const knex = require('../knex');
const logger = require('../logger')(__filename);
const http = require('./http');
@@ -8,6 +9,20 @@ async function resolvePlace(query) {
return null;
}
// query is a nationality, lookup would get weird results (British resolves to British, Northern Ireland)
const country = await knex('countries')
.where('nationality', 'ilike', `%${query}%`)
.orWhere('alpha3', 'ilike', `%${query}%`)
.orWhere('alpha2', 'ilike', `%${query}%`)
.orderBy('priority', 'desc')
.first();
if (country) {
return {
country: country.alpha2,
};
}
try {
// https://operations.osmfoundation.org/policies/nominatim/
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {

View File

@@ -12,7 +12,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
}
extend type Actor {
age: Int @requires(columns: ["dateOfBirth"])
ageFromBirth: Int @requires(columns: ["dateOfBirth"])
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
height(units:Units): String @requires(columns: ["height"])
weight(units:Units): String @requires(columns: ["weight"])
@@ -22,7 +22,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
`,
resolvers: {
Actor: {
age(parent, _args, _context, _info) {
ageFromBirth(parent, _args, _context, _info) {
if (!parent.dateOfBirth) return null;
return moment().diff(parent.dateOfBirth, 'years');