Add iafd as scraper #35

Open
opened 2021-02-11 00:20:19 +00:00 by Ghost · 0 comments

requires #32 to extract the correct actor by gender, as split results

'use strict';

const { JSDOM } = require('jsdom');
const moment = require('moment');
const logger = require('../logger')(__filename);

const http = require('../utils/http');

function scrapeProfile(html, actorName, actorGender) {
	const { document } = new JSDOM(html).window;
	const profile = { name: actorName };

    function measurementsFromString(str){
		  const [bra, waist, hip] = str.split("-");
		  if (bra && waist && hip) {
			const measurements = {};

			measurements.bust = parseInt(bra);
			measurements.cup = bra.replace(measurements.bust, "");
			measurements.waist = Number(waist);
			measurements.hip = Number(hip);
			return measurements;
		  }
		  return null;
	}

	let key = '';
	let value = '';
	const bio = Array.from(document.querySelectorAll('.bioheading, .biodata')).reduce((acc, item) => {
		if (item.classList.contains("bioheading")) {
			key = item.textContent?.toLowerCase();
			return acc;
		}
		if (item.classList.contains("biodata")) {
			value = item.textContent.trim();

			if (value === 'No data' || value === 'No known aliases') return acc;

			if (key === 'birthplace') key = 'birthPlace';
			if (key === 'eye color') key = 'eyeColor';
			if (key === 'hair colors') key = 'hairColor';

			if (key == 'measurements' && value) {
				const measurements = measurementsFromString(value);
				if (measurements.bust) acc.bust = measurements.bust;
				if (measurements.cup) acc.cup = measurements.cup;
				if (measurements.waist) acc.waist = measurements.waist;
				if (measurements.hip) acc.hip = measurements.hip;
			}

			if (key == 'height' && value) {
				const rawHeightMatch = value.match(/\d+cm/);
				const cm = rawHeightMatch ? rawHeightMatch[0] : null;
				value = cm ? parseInt(cm.replace("cm", "")) : null;
			}

			if (key == 'weight' && value) {
				const rawWeightMatch = value.match(/\d+kg/);
				const kg = rawWeightMatch ? rawWeightMatch[0] : null;
				value =  kg ? parseInt(kg.replace("kg", "")) : null;
			}

			if (key === 'birthday') {
				key = 'born';
				//value = item.querySelector('a').innerText;
			}
			if (key === 'performer aka') key = 'aka';

			acc[key] = value;
		}

		return acc;
	}, {});

	if (bio.born) profile.birthdate = moment.utc(bio.born.replace(',', ''), 'MMMM DD YYYY')?.toDate();

	if (bio.birthPlace) profile.birthPlace = bio.birthPlace;

	if (bio.eyeColor) profile.eyes = bio.eyeColor;
	if (bio.hairColor) profile.hair = bio.hairColor;
	if (bio.ethnicity) profile.ethnicity = bio.ethnicity;

	if (bio.bust) profile.bust = bio?.bust || '';
	if (bio.cup) profile.cup = bio.cup;
	if (bio.waist) profile.waist = Number(bio.waist);
	if (bio.hip) profile.hip = Number(bio.hip);

	if (bio.height) profile.height = Number(bio.height);
	if (bio.weight) profile.weight = Number(bio.weight);

	if (bio.boobs) {
		if (bio.boobs?.toLowerCase().indexOf('fake') !== -1) {
			profile.naturalBoobs = false;
		}
		if (bio.boobs.toLowerCase().indexOf('real') !== -1) {
			profile.naturalBoobs = true;
		}
	}

	if (bio.tattoos) {
		if (bio.tattoos.toLowerCase() === 'none') {
			profile.hasTattoos = false;
		} else {
			profile.hasTattoos = true;
			profile.tattoos = bio.tattoos;
		}
	}

	if (bio.piercings) {
		if (bio.piercings.toLowerCase() === 'none') {
			profile.hasPiercings = false;
		} else {
			profile.hasPiercings = true;
			profile.piercings = bio.piercings;
		}
	}

	if (bio.aka) profile.aliases = bio.aka.split(/,\s*/g);

	const name = document.querySelector('.container h1').textContent.trim();
	if (actorName !== name) {
		profile.aliasFor = name;
	}

	const avatar = document.querySelector('#headshot img').src;
	profile.avatar = { src: `${avatar}`, credit: 'Iafd' };

	profile.gender = actorGender || 'female';

	return profile;
}

function scrapeSearch(html, actorGender) {
	const { document } = new JSDOM(html).window;

	if (actorGender === 'male') {
		return document.querySelector('#tblMal a>img')?.parentElement?.href || null;
	}
	if (actorGender === 'female') {
		return document.querySelector('#tblFem a>img')?.parentElement?.href || null;
	}
	return document.querySelector('#tblFem a>img')?.parentElement?.href || 
		document.querySelector('#tblMal a>img')?.parentElement?.href ||null;
}

async function fetchProfile(actor) {
	const actorName = actor.name;
	let actorGender = actor.gender;
	
	const searchRes = await http.get(`https://www.iafd.com/results.asp?searchtype=comprehensive&searchstring=${actorName}`);
	if (searchRes.statusCode === 200) {
		const actorPath = scrapeSearch(searchRes.body.toString(), actorGender);
		if (actorPath) {
			if (actorPath.indexOf('gender=m') !== -1) {
				actorGender = 'male';
			}
			const actorRes = await http.get(`https://www.iafd.com${actorPath}`);

			if (actorRes.statusCode === 200) {
					const result = scrapeProfile(actorRes.body.toString(), actorName, actorGender);
					if (result.name === actorName) {
						logger.verbose(result);
						return result;
					}
			}
		}
	}
	
	return null;
}

module.exports = {
	fetchProfile,
};

requires #32 to extract the correct actor by gender, as split results ``` 'use strict'; const { JSDOM } = require('jsdom'); const moment = require('moment'); const logger = require('../logger')(__filename); const http = require('../utils/http'); function scrapeProfile(html, actorName, actorGender) { const { document } = new JSDOM(html).window; const profile = { name: actorName }; function measurementsFromString(str){ const [bra, waist, hip] = str.split("-"); if (bra && waist && hip) { const measurements = {}; measurements.bust = parseInt(bra); measurements.cup = bra.replace(measurements.bust, ""); measurements.waist = Number(waist); measurements.hip = Number(hip); return measurements; } return null; } let key = ''; let value = ''; const bio = Array.from(document.querySelectorAll('.bioheading, .biodata')).reduce((acc, item) => { if (item.classList.contains("bioheading")) { key = item.textContent?.toLowerCase(); return acc; } if (item.classList.contains("biodata")) { value = item.textContent.trim(); if (value === 'No data' || value === 'No known aliases') return acc; if (key === 'birthplace') key = 'birthPlace'; if (key === 'eye color') key = 'eyeColor'; if (key === 'hair colors') key = 'hairColor'; if (key == 'measurements' && value) { const measurements = measurementsFromString(value); if (measurements.bust) acc.bust = measurements.bust; if (measurements.cup) acc.cup = measurements.cup; if (measurements.waist) acc.waist = measurements.waist; if (measurements.hip) acc.hip = measurements.hip; } if (key == 'height' && value) { const rawHeightMatch = value.match(/\d+cm/); const cm = rawHeightMatch ? rawHeightMatch[0] : null; value = cm ? parseInt(cm.replace("cm", "")) : null; } if (key == 'weight' && value) { const rawWeightMatch = value.match(/\d+kg/); const kg = rawWeightMatch ? rawWeightMatch[0] : null; value = kg ? parseInt(kg.replace("kg", "")) : null; } if (key === 'birthday') { key = 'born'; //value = item.querySelector('a').innerText; } if (key === 'performer aka') key = 'aka'; acc[key] = value; } return acc; }, {}); if (bio.born) profile.birthdate = moment.utc(bio.born.replace(',', ''), 'MMMM DD YYYY')?.toDate(); if (bio.birthPlace) profile.birthPlace = bio.birthPlace; if (bio.eyeColor) profile.eyes = bio.eyeColor; if (bio.hairColor) profile.hair = bio.hairColor; if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.bust) profile.bust = bio?.bust || ''; if (bio.cup) profile.cup = bio.cup; if (bio.waist) profile.waist = Number(bio.waist); if (bio.hip) profile.hip = Number(bio.hip); if (bio.height) profile.height = Number(bio.height); if (bio.weight) profile.weight = Number(bio.weight); if (bio.boobs) { if (bio.boobs?.toLowerCase().indexOf('fake') !== -1) { profile.naturalBoobs = false; } if (bio.boobs.toLowerCase().indexOf('real') !== -1) { profile.naturalBoobs = true; } } if (bio.tattoos) { if (bio.tattoos.toLowerCase() === 'none') { profile.hasTattoos = false; } else { profile.hasTattoos = true; profile.tattoos = bio.tattoos; } } if (bio.piercings) { if (bio.piercings.toLowerCase() === 'none') { profile.hasPiercings = false; } else { profile.hasPiercings = true; profile.piercings = bio.piercings; } } if (bio.aka) profile.aliases = bio.aka.split(/,\s*/g); const name = document.querySelector('.container h1').textContent.trim(); if (actorName !== name) { profile.aliasFor = name; } const avatar = document.querySelector('#headshot img').src; profile.avatar = { src: `${avatar}`, credit: 'Iafd' }; profile.gender = actorGender || 'female'; return profile; } function scrapeSearch(html, actorGender) { const { document } = new JSDOM(html).window; if (actorGender === 'male') { return document.querySelector('#tblMal a>img')?.parentElement?.href || null; } if (actorGender === 'female') { return document.querySelector('#tblFem a>img')?.parentElement?.href || null; } return document.querySelector('#tblFem a>img')?.parentElement?.href || document.querySelector('#tblMal a>img')?.parentElement?.href ||null; } async function fetchProfile(actor) { const actorName = actor.name; let actorGender = actor.gender; const searchRes = await http.get(`https://www.iafd.com/results.asp?searchtype=comprehensive&searchstring=${actorName}`); if (searchRes.statusCode === 200) { const actorPath = scrapeSearch(searchRes.body.toString(), actorGender); if (actorPath) { if (actorPath.indexOf('gender=m') !== -1) { actorGender = 'male'; } const actorRes = await http.get(`https://www.iafd.com${actorPath}`); if (actorRes.statusCode === 200) { const result = scrapeProfile(actorRes.body.toString(), actorName, actorGender); if (result.name === actorName) { logger.verbose(result); return result; } } } } return null; } module.exports = { fetchProfile, }; ```
Ghost changed title from Add iafd as scrapper to Add iafd as scraper 2021-02-11 23:05:51 +00:00
Sign in to join this conversation.
No Milestone
No Assignees
1 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: DebaucheryLibrarian/traxxx#35
No description provided.