Added Kelly Madison profile scraper.
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const UrlPattern = require('url-pattern');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
@@ -49,6 +50,10 @@ async function curateActor(actor) {
|
||||
scrapedAt: actor.scraped_at,
|
||||
};
|
||||
|
||||
if (curatedActor.birthdate) {
|
||||
curatedActor.age = moment().diff(curatedActor.birthdate, 'years');
|
||||
}
|
||||
|
||||
if (actor.birth_city) curatedActor.origin.city = actor.birth_city;
|
||||
if (actor.birth_state) curatedActor.origin.state = actor.birth_state;
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('./knex');
|
||||
const argv = require('./argv');
|
||||
const whereOr = require('./utils/where-or');
|
||||
@@ -17,9 +19,14 @@ const { fetchSites, findSiteByUrl } = require('./sites');
|
||||
async function curateRelease(release) {
|
||||
const [actors, tags, media] = await Promise.all([
|
||||
knex('actors_associated')
|
||||
.select('actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'media.thumbnail as avatar')
|
||||
.select(
|
||||
'actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'actors.birthdate',
|
||||
'birth_countries.alpha2 as birth_country_alpha2', 'birth_countries.name as birth_country_name', 'birth_countries.alias as birth_country_alias',
|
||||
'media.thumbnail as avatar',
|
||||
)
|
||||
.where({ release_id: release.id })
|
||||
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
|
||||
.leftJoin('countries as birth_countries', 'actors.birth_country_alpha2', 'birth_countries.alpha2')
|
||||
.leftJoin('media', (builder) => {
|
||||
builder
|
||||
.on('media.target_id', 'actors.id')
|
||||
@@ -52,7 +59,21 @@ async function curateRelease(release) {
|
||||
url: release.url,
|
||||
shootId: release.shoot_id,
|
||||
entryId: release.entry_id,
|
||||
actors,
|
||||
actors: actors.map(actor => ({
|
||||
id: actor.id,
|
||||
slug: actor.slug,
|
||||
name: actor.name,
|
||||
gender: actor.gender,
|
||||
birthdate: actor.birthdate,
|
||||
age: moment().diff(actor.birthdate, 'years'),
|
||||
avatar: actor.avatar,
|
||||
origin: {
|
||||
country: {
|
||||
name: actor.birth_country_alias,
|
||||
alpha2: actor.birth_country_alpha2,
|
||||
},
|
||||
},
|
||||
})),
|
||||
director: release.director,
|
||||
tags,
|
||||
duration: release.duration,
|
||||
|
||||
@@ -4,6 +4,8 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
const siteMapByKey = {
|
||||
PF: 'pornfidelity',
|
||||
TF: 'teenfidelity',
|
||||
@@ -112,6 +114,31 @@ function scrapeScene(html, url, site, shallowRelease) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
@@ -139,7 +166,23 @@ async function fetchScene(url, site, shallowRelease) {
|
||||
return scrapeScene(res.body.toString(), url, site, shallowRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -6,7 +6,6 @@ const bangbros = require('./bangbros');
|
||||
const blowpass = require('./blowpass');
|
||||
const dogfart = require('./dogfart');
|
||||
const evilangel = require('./evilangel');
|
||||
const kellymadison = require('./kellymadison');
|
||||
const kink = require('./kink');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
const mofos = require('./mofos');
|
||||
@@ -20,6 +19,7 @@ const vixen = require('./vixen');
|
||||
const ddfnetwork = require('./ddfnetwork');
|
||||
const brazzers = require('./brazzers');
|
||||
const julesjordan = require('./julesjordan');
|
||||
const kellymadison = require('./kellymadison');
|
||||
const legalporno = require('./legalporno');
|
||||
const xempire = require('./xempire');
|
||||
|
||||
@@ -55,6 +55,7 @@ module.exports = {
|
||||
xempire,
|
||||
brazzers,
|
||||
freeones,
|
||||
kellymadison,
|
||||
julesjordan,
|
||||
legalporno,
|
||||
pornhub,
|
||||
|
||||
Reference in New Issue
Block a user