Added Kelly Madison profile scraper.

This commit is contained in:
2019-12-10 22:35:00 +01:00
parent 8802bb4317
commit b9bac6d8f9
11 changed files with 133 additions and 25 deletions

View File

@@ -2,6 +2,7 @@
const Promise = require('bluebird');
const UrlPattern = require('url-pattern');
const moment = require('moment');
const knex = require('./knex');
const argv = require('./argv');
@@ -49,6 +50,10 @@ async function curateActor(actor) {
scrapedAt: actor.scraped_at,
};
if (curatedActor.birthdate) {
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
}
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;

View File

@@ -1,6 +1,8 @@
'use strict';
const Promise = require('bluebird');
const moment = require('moment');
const knex = require('./knex');
const argv = require('./argv');
const whereOr = require('./utils/where-or');
@@ -17,9 +19,14 @@ const { fetchSites, findSiteByUrl } = require('./sites');
async function curateRelease(release) {
const [actors, tags, media] = await Promise.all([
knex('actors_associated')
.select('actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'media.thumbnail as avatar')
.select(
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
'media.thumbnail as avatar',
)
.where({ release_id: release.id })
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
.leftJoin('media', (builder) => {
builder
.on('media.target_id', 'actors.id')
@@ -52,7 +59,21 @@ async function curateRelease(release) {
url: release.url,
shootId: release.shoot_id,
entryId: release.entry_id,
actors,
actors: actors.map(actor => ({
id: actor.id,
slug: actor.slug,
name: actor.name,
gender: actor.gender,
birthdate: actor.birthdate,
age: moment().diff(actor.birthdate, 'years'),
avatar: actor.avatar,
origin: {
country: {
name: actor.birth_country_alias,
alpha2: actor.birth_country_alpha2,
},
},
})),
director: release.director,
tags,
duration: release.duration,

View File

@@ -4,6 +4,8 @@ const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { feetInchesToCm } = require('../utils/convert');
const siteMapByKey = {
PF: 'pornfidelity',
TF: 'teenfidelity',
@@ -112,6 +114,31 @@ function scrapeScene(html, url, site, shallowRelease) {
return release;
}
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
if (bio.Height) {
const [feet, inches] = bio.Height.match(/\d+/g);
profile.height = feetInchesToCm(feet, inches);
}
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await bhttp.get(url, {
@@ -139,7 +166,23 @@ async function fetchScene(url, site, shallowRelease) {
return scrapeScene(res.body.toString(), url, site, shallowRelease);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -6,7 +6,6 @@ const bangbros = require('./bangbros');
const blowpass = require('./blowpass');
const dogfart = require('./dogfart');
const evilangel = require('./evilangel');
const kellymadison = require('./kellymadison');
const kink = require('./kink');
const mikeadriano = require('./mikeadriano');
const mofos = require('./mofos');
@@ -20,6 +19,7 @@ const vixen = require('./vixen');
const ddfnetwork = require('./ddfnetwork');
const brazzers = require('./brazzers');
const julesjordan = require('./julesjordan');
const kellymadison = require('./kellymadison');
const legalporno = require('./legalporno');
const xempire = require('./xempire');
@@ -55,6 +55,7 @@ module.exports = {
xempire,
brazzers,
freeones,
kellymadison,
julesjordan,
legalporno,
pornhub,