forked from DebaucheryLibrarian/traxxx
Added fixed actor age. Added male profiles to Littlr Caprice Dreams scraper. Added various tag photos.
This commit is contained in:
@@ -105,6 +105,10 @@ function getMostFrequentDate(dates) {
|
||||
return moment({ year, month, date }).toDate();
|
||||
}
|
||||
|
||||
function getHighest(items) {
|
||||
return items.reduce((prevItem, item) => (item > prevItem ? item : prevItem), 0);
|
||||
}
|
||||
|
||||
function getLongest(items) {
|
||||
return items.sort((itemA, itemB) => itemB.length - itemA.length)[0] || null;
|
||||
}
|
||||
@@ -161,6 +165,7 @@ function curateActor(actor, withDetails = false, isProfile = false) {
|
||||
entityId: actor.entity_id,
|
||||
aliasFor: actor.alias_for,
|
||||
dateOfBirth: actor.date_of_birth,
|
||||
age: actor.age,
|
||||
birthCountry: actor.birth_country_alpha2,
|
||||
...(withDetails && {
|
||||
alias: actor.alias && {
|
||||
@@ -250,6 +255,7 @@ function curateProfileEntry(profile) {
|
||||
entity_id: profile.entity?.id || null,
|
||||
date_of_birth: profile.dateOfBirth,
|
||||
date_of_death: profile.dateOfDeath,
|
||||
age: profile.age,
|
||||
url: profile.url,
|
||||
gender: profile.gender,
|
||||
ethnicity: profile.ethnicity,
|
||||
@@ -328,6 +334,7 @@ async function curateProfile(profile) {
|
||||
|| null;
|
||||
|
||||
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
|
||||
curatedProfile.age = Number(profile.age) || null;
|
||||
|
||||
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
|
||||
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
|
||||
@@ -396,7 +403,7 @@ async function curateProfile(profile) {
|
||||
}).filter(Boolean)
|
||||
: [];
|
||||
|
||||
curatedProfile.releases = toBaseReleases(profile.releases, profile.entity);
|
||||
curatedProfile.scenes = toBaseReleases(profile.scenes || profile.releases, profile.entity);
|
||||
|
||||
if (profile.ethnicity && !curatedProfile.ethnicity) logger.warn(`Unrecognized ethnicity returned by '${profile.entity.name}' scraper: ${profile.ethnicity}`);
|
||||
if ((profile.hairColor || profile.hair) && !curatedProfile.hairColor) logger.warn(`Unrecognized hair color returned by '${profile.entity.name}' scraper: ${profile.hairColor || profile.hair}`);
|
||||
@@ -483,6 +490,7 @@ async function interpolateProfiles(actorIds) {
|
||||
|
||||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||
profile.age = getHighest(valuesByProperty.age);
|
||||
|
||||
// ensure most frequent country, city and state match up
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.origin.map(location => location.country));
|
||||
@@ -666,10 +674,13 @@ async function scrapeActors(argNames) {
|
||||
|
||||
const [entities, existingActorEntries] = await Promise.all([
|
||||
knex('entities')
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent'))
|
||||
.select(knex.raw('entities.*, row_to_json(parents) as parent, json_agg(children) as children'))
|
||||
.whereIn('entities.slug', entitySlugs)
|
||||
.whereIn('entities.type', ['network', 'channel'])
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.orderBy('entities.type'),
|
||||
.leftJoin('entities as children', 'children.parent_id', 'entities.id')
|
||||
.orderBy('entities.type')
|
||||
.groupBy('entities.id', 'parents.id'),
|
||||
knex('actors')
|
||||
.select(['id', 'name', 'slug', 'entry_id'])
|
||||
.whereIn('slug', baseActors.map(baseActor => baseActor.slug))
|
||||
|
||||
@@ -49,7 +49,7 @@ async function init() {
|
||||
const actorNames = (argv.actors || []).concat(actorsFromFile || []);
|
||||
|
||||
const actors = (argv.actors || argv.actorsUpdate || argv.actorsFile) && await scrapeActors(actorNames);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.scenes).flat().filter(Boolean);
|
||||
|
||||
const updateBaseScenes = (argv.latest || argv.upcoming || argv.channels || argv.networks || argv.movies) && await fetchUpdates();
|
||||
|
||||
|
||||
@@ -4,7 +4,11 @@ const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function matchChannel(release, channel) {
|
||||
const series = channel.children || channel.parent.children;
|
||||
const series = channel.children || channel.parent?.children;
|
||||
|
||||
if (!series) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const serieNames = series.reduce((acc, serie) => ({
|
||||
...acc,
|
||||
@@ -26,7 +30,7 @@ function matchChannel(release, channel) {
|
||||
|
||||
if (serie) {
|
||||
return {
|
||||
slug: serie.slug,
|
||||
channel: serie.slug,
|
||||
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''),
|
||||
};
|
||||
}
|
||||
@@ -109,31 +113,34 @@ async function scrapeScene({ query }, url, channel, include) {
|
||||
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
||||
}
|
||||
|
||||
release.trailer = {
|
||||
src: query.video(),
|
||||
type: query.video('source', 'type'),
|
||||
quality: query.video('source', 'data-res'),
|
||||
referer: url,
|
||||
};
|
||||
|
||||
return {
|
||||
...release,
|
||||
...matchChannel(release, channel),
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url) {
|
||||
const profile = {};
|
||||
function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
|
||||
const profile = { url, gender };
|
||||
|
||||
const bio = query.cnts('div p').reduce((acc, item) => {
|
||||
const [key, value] = item.split(/\s*:\s*/);
|
||||
profile.age = query.number('div:nth-child(2) > p');
|
||||
profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
profile.description = query.cnt('div:nth-child(4) > p');
|
||||
|
||||
profile.avatar = {
|
||||
src: query.img('.model-page'),
|
||||
referer: url,
|
||||
};
|
||||
|
||||
console.log(bio);
|
||||
console.log(profile);
|
||||
profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -160,12 +167,16 @@ async function fetchScene(url, channel, baseRelease, include) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function getActorUrl(baseActor) {
|
||||
async function getActorUrl(baseActor, gender = 'female') {
|
||||
if (baseActor.url) {
|
||||
return baseActor.url;
|
||||
}
|
||||
|
||||
const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models');
|
||||
const overviewUrl = gender === 'female'
|
||||
? 'https://www.littlecaprice-dreams.com/pornstars/'
|
||||
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
|
||||
|
||||
const overviewRes = await qu.getAll(overviewUrl, '.models');
|
||||
|
||||
if (!overviewRes.ok) {
|
||||
return overviewRes.status;
|
||||
@@ -174,23 +185,36 @@ async function getActorUrl(baseActor) {
|
||||
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
|
||||
|
||||
if (!actorItem) {
|
||||
if (gender === 'female') {
|
||||
return getActorUrl(baseActor, 'male');
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return actorItem.query.url('a');
|
||||
const actorUrl = actorItem.query.url('a');
|
||||
|
||||
if (actorUrl) {
|
||||
return {
|
||||
url: actorUrl,
|
||||
gender,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, entity) {
|
||||
async function fetchProfile(baseActor, { entity }) {
|
||||
const actorUrl = await getActorUrl(baseActor);
|
||||
|
||||
if (!actorUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const actorRes = await qu.get(actorUrl, '#main-content');
|
||||
const actorRes = await qu.get(actorUrl.url, '#main-content');
|
||||
|
||||
if (actorRes.ok) {
|
||||
return scrapeProfile(actorRes.item, actorUrl, entity);
|
||||
return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
|
||||
}
|
||||
|
||||
return actorRes.status;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
const knex = require('../knex');
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('./http');
|
||||
|
||||
@@ -8,6 +9,20 @@ async function resolvePlace(query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// query is a nationality, lookup would get weird results (British resolves to British, Northern Ireland)
|
||||
const country = await knex('countries')
|
||||
.where('nationality', 'ilike', `%${query}%`)
|
||||
.orWhere('alpha3', 'ilike', `%${query}%`)
|
||||
.orWhere('alpha2', 'ilike', `%${query}%`)
|
||||
.orderBy('priority', 'desc')
|
||||
.first();
|
||||
|
||||
if (country) {
|
||||
return {
|
||||
country: country.alpha2,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// https://operations.osmfoundation.org/policies/nominatim/
|
||||
const res = await http.get(`https://nominatim.openstreetmap.org/search/${encodeURI(query)}?format=json&accept-language=en&addressdetails=1`, {
|
||||
|
||||
@@ -12,7 +12,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||
}
|
||||
|
||||
extend type Actor {
|
||||
age: Int @requires(columns: ["dateOfBirth"])
|
||||
ageFromBirth: Int @requires(columns: ["dateOfBirth"])
|
||||
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
|
||||
height(units:Units): String @requires(columns: ["height"])
|
||||
weight(units:Units): String @requires(columns: ["weight"])
|
||||
@@ -22,7 +22,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||
`,
|
||||
resolvers: {
|
||||
Actor: {
|
||||
age(parent, _args, _context, _info) {
|
||||
ageFromBirth(parent, _args, _context, _info) {
|
||||
if (!parent.dateOfBirth) return null;
|
||||
|
||||
return moment().diff(parent.dateOfBirth, 'years');
|
||||
|
||||
Reference in New Issue
Block a user