forked from DebaucheryLibrarian/traxxx
Fixed release sites for profile scraping.
This commit is contained in:
@@ -8,7 +8,7 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const { get, geta, ctxa } = require('../utils/q');
|
||||
const { get, geta, ctxa, parseDate } = require('../utils/q');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
@@ -304,27 +304,37 @@ function scrapeProfile(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic');
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*\d{2}/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
if (ageString) profile.age = Number(ageString[0].match(/\d{2}/)[0]);
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
|
||||
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6).trim(),
|
||||
avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6).trim(),
|
||||
avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6).trim(),
|
||||
avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7).trim(),
|
||||
avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7).trim(),
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
@@ -332,6 +342,8 @@ function scrapeProfile(html, url, actorName) {
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user