diff --git a/src/actors.js b/src/actors.js index 3d00093a..45b9618e 100755 --- a/src/actors.js +++ b/src/actors.js @@ -416,7 +416,7 @@ async function curateProfile(profile, actor) { curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null; // combined measurement value - const measurements = profile.measurements?.match(/(\d+)(\w+)[-x](\d+)[-x](\d+)/); // ExCoGi uses x + const measurements = profile.measurements?.match(/(\d+)(\w+)\s*[-x]\s*(\d+)\s*[-x]\s*(\d+)/); // ExCoGi uses x, Jules Jordan has spaces between the dashes if (measurements) { curatedProfile.bust = Number(measurements[1]); @@ -610,6 +610,14 @@ async function interpolateProfiles(actorIdsOrNames) { .filter((avatar) => avatar && (avatar.entropy === null || avatar.entropy > 5.5)) .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; + if (!profile.avatar_media_id) { + // try to settle for low quality avatar + profile.avatar_media_id = actorProfiles + .map((actorProfile) => actorProfile.avatar) + .filter((avatar) => avatar) + .sort((avatarA, avatarB) => avatarB.height - avatarA.height)[0]?.id || null; + } + return profile; }); diff --git a/src/media.js b/src/media.js index c72a5537..b3164ad5 100755 --- a/src/media.js +++ b/src/media.js @@ -353,8 +353,6 @@ async function extractSource(baseSource, { existingExtractMediaByUrl }) { if (typeof baseSource.defer === 'function') { const src = await baseSource.defer(); - console.log(baseSource, src); - return { ...baseSource, ...toBaseSource(src), diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index ffe19dbb..b9c1e01f 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -3,7 +3,6 @@ const util = require('util'); const Promise = require('bluebird'); const cheerio = require('cheerio'); -const { JSDOM } = require('jsdom'); const moment = require('moment'); const unprint = require('unprint'); @@ -272,9 +271,8 @@ function scrapeMovie({ el, query }, url, site) { }; } +/* function scrapeProfile(html, url, actorName, entity) { - const { document } = new JSDOM(html).window; - const bio = document.querySelector('.model_bio').textContent; const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb'); @@ -320,6 +318,36 @@ function scrapeProfile(html, url, actorName, entity) { return profile; } +*/ + +function scrapeProfile({ query }, url, name, entity) { + const profile = { url }; + + profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span'); // the spaces are important to avoid selecting a similar comment + + profile.height = heightToCm(query.content('//span[contains(text(), "Height")]/following-sibling::span')); + profile.measurements = query.content('//span[contains(text(), "Measurements")]/following-sibling::span'); + + const age = query.content('//span[contains(text(), "Age")]/following-sibling::span')?.trim(); + + if (age && /\w+ \d+, \d{4}/.test(age)) { + profile.dateOfBirth = unprint.extractDate(age, 'MMMM D, YYYY'); + } else { + profile.age = Number(age) || null; + } + + profile.avatar = [ + query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_3x' }), + query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_2x' }), + query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_1x' }), + query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0' }), + query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src' }), + ].filter(Boolean); + + profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true); + + return profile; +} async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) { const url = site.parameters?.latest @@ -376,10 +404,10 @@ async function fetchProfile({ name: actorName, url }, entity) { return null; } - const res = await http.get(profileUrl); + const res = await unprint.get(profileUrl); - if (res.statusCode === 200) { - return scrapeProfile(res.body.toString(), profileUrl, actorName, entity); + if (res.ok) { + return scrapeProfile(res.context, profileUrl, actorName, entity); } return null;