Add babepedia scraper #34

Open
opened 2021-02-11 00:18:26 +00:00 by Ghost · 0 comments

required #32 actor to pass gender to scrapper, as no males listed on site.

'use strict';

const { JSDOM } = require('jsdom');
const moment = require('moment');
const logger = require('../logger')(__filename);

const http = require('../utils/http');

function scrapeProfile(html, actorName) {
	const { document } = new JSDOM(html).window;
	const profile = { name: actorName };

	if (document.querySelectorAll('#babename').length === 0){
		return {};
	}

	const name = document.querySelector('#babename')?.textContent;
	if (actorName !== name) {
		profile.aliasFor = name;
	}
	
	function measurementsFromString(str){
		  const [bra, waist, hip] = str.split("-");
		  if (bra && waist && hip) {
			const measurements = {};
			measurements.bust = parseInt(bra);
			measurements.cup = measurements.bust ? bra.replace(measurements.bust, "")  : null;
			measurements.waist = Number(waist);
			measurements.hip = Number(hip);
			return measurements;
		  }
		  return null;
	}


	const bio = Array.from(document.querySelectorAll('#biolist li')).reduce((acc, item) => {
		const keyMatch = item.textContent.split(':');

		if (keyMatch) {
			let key = keyMatch[0].toLowerCase();
			let value = keyMatch[1]?.trim();

			if (key === 'birthplace') key = 'birthPlace';
			if (key === 'eye color') key = 'eyeColor';
			if (key === 'hair color') key = 'hairColor';

			if (key == 'measurements' && value) {
				const measurements = measurementsFromString(value);
				if (measurements) {
					if (measurements.bust) acc.bust = measurements.bust;
					if (measurements.cup) acc.cup = measurements.cup;
					if (measurements.waist) acc.waist = measurements.waist;
					if (measurements.hip) acc.hip = measurements.hip;
				}
			}

			if (key == 'height' && value) {
				const rawHeightMatch = value.match(/\d+cm/);
				const cm = rawHeightMatch ? rawHeightMatch[0] : null;
				value = cm ? parseInt(cm.replace("cm", "")) : null;
			}

			if (key == 'weight' && value) {
				const rawWeightMatch = value.match(/\d+kg/);
				const kg = rawWeightMatch ? rawWeightMatch[0] : null;
				value =  kg ? parseInt(kg.replace("kg", "")) : null;
			}

			acc[key] = value;
		}

		return acc;
	}, {});

	if (bio.born) profile.birthdate = moment.utc(bio.born.replace(' of ', ' '), 'dddd Do MMMM YYYY')?.toDate();

	if (bio.birthPlace) profile.birthPlace = bio.birthPlace;

	if (bio.eyeColor) profile.eyes = bio.eyeColor;
	if (bio.hairColor) profile.hair = bio.hairColor;
	if (bio.ethnicity) profile.ethnicity = bio.ethnicity;

	if (bio.bust) profile.bust = bio.bust;
	if (bio.cup) profile.cup = bio.cup;
	if (bio.waist) profile.waist = Number(bio.waist);
	if (bio.hip) profile.hip = Number(bio.hip);

	if (bio.height) profile.height = Number(bio.height);
	if (bio.weight) profile.weight = Number(bio.weight);

	if (bio.boobs) {
		if (bio.boobs?.toLowerCase().indexOf('fake') !== -1) {
			profile.naturalBoobs = false;
		}
		if (bio.boobs.toLowerCase().indexOf('real') !== -1) {
			profile.naturalBoobs = true;
		}
	}

	if (bio.tattoos) {
		if (bio.tattoos.toLowerCase() === 'none') {
			profile.hasTattoos = false;
		} else {
			profile.hasTattoos = true;
			profile.tattoos = bio.tattoos;
		}
	}

	if (bio.piercings) {
		if (bio.piercings.toLowerCase() === 'none') {
			profile.hasPiercings = false;
		} else {
			profile.hasPiercings = true;
			profile.piercings = bio.piercings;
		}
	}

	const aka = document.querySelector('#aka');
	if (aka) profile.aliases = aka.textContent?.replace('aka ','')?.split('/').map(alias => alias.trim());

	const avatar = document.querySelector('#profimg a')?.href;
	if (avatar)  profile.avatar = { src: `https://www.babepedia.com${avatar}`, credit: 'Babepedia' };

	profile.gender = 'female';

	return profile;
}

function scrapeSearch(html, actorName) {
	const { document } = new JSDOM(html).window;

	const link = document.querySelector('.results .thumbshot a');
	if (link && link.innerText?.contains(actorName))
		return document.querySelector('.results .thumbshot a')?.href || null;
	else {
		return null;
	}
}

async function fetchProfile(actor) {
	const actorName = actor.name.replace('\'', '');

	if (actor.gender === 'male') {
		return null;
	}

	const searchRes = await http.get(`https://www.babepedia.com/search/${actorName}`);

	let result = scrapeProfile(searchRes.body.toString(), actorName);
	if (result.name === actorName) {
		logger.verbose(result);
		return result;
	}

	const actorPath = scrapeSearch(searchRes.body.toString(), actorName);

	if (actorPath?.indexOf('babe') > 0) {
		const actorRes = await http.get(`https://www.babepedia.com${actorPath}`);

		if (actorRes.statusCode === 200) {
			result = scrapeProfile(actorRes.body.toString(), actorName);
			if (result.name === actorName) {
				logger.info(result);
				return result;
			}
		}
		return null;
	}
	return null;
}

module.exports = {
	fetchProfile,
};

required #32 actor to pass gender to scrapper, as no males listed on site. ``` 'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const logger = require('../logger')(__filename); const http = require('../utils/http'); function scrapeProfile(html, actorName) { const { document } = new JSDOM(html).window; const profile = { name: actorName }; if (document.querySelectorAll('#babename').length === 0){ return {}; } const name = document.querySelector('#babename')?.textContent; if (actorName !== name) { profile.aliasFor = name; } function measurementsFromString(str){ const [bra, waist, hip] = str.split("-"); if (bra && waist && hip) { const measurements = {}; measurements.bust = parseInt(bra); measurements.cup = measurements.bust ? bra.replace(measurements.bust, "") : null; measurements.waist = Number(waist); measurements.hip = Number(hip); return measurements; } return null; } const bio = Array.from(document.querySelectorAll('#biolist li')).reduce((acc, item) => { const keyMatch = item.textContent.split(':'); if (keyMatch) { let key = keyMatch[0].toLowerCase(); let value = keyMatch[1]?.trim(); if (key === 'birthplace') key = 'birthPlace'; if (key === 'eye color') key = 'eyeColor'; if (key === 'hair color') key = 'hairColor'; if (key == 'measurements' && value) { const measurements = measurementsFromString(value); if (measurements) { if (measurements.bust) acc.bust = measurements.bust; if (measurements.cup) acc.cup = measurements.cup; if (measurements.waist) acc.waist = measurements.waist; if (measurements.hip) acc.hip = measurements.hip; } } if (key == 'height' && value) { const rawHeightMatch = value.match(/\d+cm/); const cm = rawHeightMatch ? rawHeightMatch[0] : null; value = cm ? parseInt(cm.replace("cm", "")) : null; } if (key == 'weight' && value) { const rawWeightMatch = value.match(/\d+kg/); const kg = rawWeightMatch ? rawWeightMatch[0] : null; value = kg ? parseInt(kg.replace("kg", "")) : null; } acc[key] = value; } return acc; }, {}); if (bio.born) profile.birthdate = moment.utc(bio.born.replace(' of ', ' '), 'dddd Do MMMM YYYY')?.toDate(); if (bio.birthPlace) profile.birthPlace = bio.birthPlace; if (bio.eyeColor) profile.eyes = bio.eyeColor; if (bio.hairColor) profile.hair = bio.hairColor; if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.bust) profile.bust = bio.bust; if (bio.cup) profile.cup = bio.cup; if (bio.waist) profile.waist = Number(bio.waist); if (bio.hip) profile.hip = Number(bio.hip); if (bio.height) profile.height = Number(bio.height); if (bio.weight) profile.weight = Number(bio.weight); if (bio.boobs) { if (bio.boobs?.toLowerCase().indexOf('fake') !== -1) { profile.naturalBoobs = false; } if (bio.boobs.toLowerCase().indexOf('real') !== -1) { profile.naturalBoobs = true; } } if (bio.tattoos) { if (bio.tattoos.toLowerCase() === 'none') { profile.hasTattoos = false; } else { profile.hasTattoos = true; profile.tattoos = bio.tattoos; } } if (bio.piercings) { if (bio.piercings.toLowerCase() === 'none') { profile.hasPiercings = false; } else { profile.hasPiercings = true; profile.piercings = bio.piercings; } } const aka = document.querySelector('#aka'); if (aka) profile.aliases = aka.textContent?.replace('aka ','')?.split('/').map(alias => alias.trim()); const avatar = document.querySelector('#profimg a')?.href; if (avatar) profile.avatar = { src: `https://www.babepedia.com${avatar}`, credit: 'Babepedia' }; profile.gender = 'female'; return profile; } function scrapeSearch(html, actorName) { const { document } = new JSDOM(html).window; const link = document.querySelector('.results .thumbshot a'); if (link && link.innerText?.contains(actorName)) return document.querySelector('.results .thumbshot a')?.href || null; else { return null; } } async function fetchProfile(actor) { const actorName = actor.name.replace('\'', ''); if (actor.gender === 'male') { return null; } const searchRes = await http.get(`https://www.babepedia.com/search/${actorName}`); let result = scrapeProfile(searchRes.body.toString(), actorName); if (result.name === actorName) { logger.verbose(result); return result; } const actorPath = scrapeSearch(searchRes.body.toString(), actorName); if (actorPath?.indexOf('babe') > 0) { const actorRes = await http.get(`https://www.babepedia.com${actorPath}`); if (actorRes.statusCode === 200) { result = scrapeProfile(actorRes.body.toString(), actorName); if (result.name === actorName) { logger.info(result); return result; } } return null; } return null; } module.exports = { fetchProfile, }; ```
Ghost changed title from Add babepedia scrapper to Add babepedia scraper 2021-02-11 23:05:41 +00:00
Sign in to join this conversation.
No Milestone
No Assignees
1 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: DebaucheryLibrarian/traxxx#34
No description provided.