Added sparse date mode. Fixed profile fetch error catching. Updated Kelly Madison scraper, using site IDs and fixed profile scraper.
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
const { feetInchesToCm, femaleFeetUsToEu } = require('../utils/convert');
|
||||
|
||||
const siteMapByKey = {
|
||||
PF: 'pornfidelity',
|
||||
@@ -16,14 +18,11 @@ const siteMapByKey = {
|
||||
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.reduce((acc, { query }) => {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.shootId = query.q('.card-meta .text-right, .row .text-right, .card-footer-item:last-child', true);
|
||||
|
||||
const siteId = release.shootId.match(/\d?\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
|
||||
const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a'));
|
||||
[release.entryId] = pathname.match(/\d+$/);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
@@ -47,15 +46,19 @@ function scrapeLatest(scenes, site) {
|
||||
};
|
||||
}
|
||||
|
||||
/* using site ID, filter no longer needed
|
||||
const siteId = release.shootId.match(/\d?\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
|
||||
if (site.slug !== siteSlug) {
|
||||
// using generic network overview, scene is not from the site we want
|
||||
return { ...acc, unextracted: [...acc.unextracted, release] };
|
||||
}
|
||||
|
||||
return { ...acc, scenes: [...acc.scenes, release] };
|
||||
}, {
|
||||
scenes: [],
|
||||
unextracted: [],
|
||||
*/
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -114,34 +117,47 @@ async function scrapeScene({ query, html }, url, baseRelease, channel, session)
|
||||
}));
|
||||
}
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bioKeys = query.all('table.table td:nth-child(1)', true);
|
||||
const bioValues = query.all('table.table td:nth-child(2)', true);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key.slice(0, -1)]: bioValues[index] }), {});
|
||||
const bioKeys = query.contents('table.table td:nth-child(1), table.table th');
|
||||
const bioValues = query.contents('table.table td:nth-child(2)');
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({
|
||||
...acc,
|
||||
[slugify(key, '_')]: bioValues[index],
|
||||
}), {});
|
||||
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.measurements) profile.measurements = bio.measurements;
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.shoe_size) profile.foot = femaleFeetUsToEu(bio.shoe_size);
|
||||
|
||||
if (bio.height) {
|
||||
const [feet, inches] = bio.height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
profile.avatar = query.img('img[src*="model"]');
|
||||
if (bio.birthday) {
|
||||
const [month, day] = bio.birthday.split('/');
|
||||
const birthday = new Date(Date.UTC(0, Number(month) - 1, Number(day)));
|
||||
|
||||
birthday.setUTCFullYear(0); // indicate birth year is unknown
|
||||
|
||||
profile.dateOfBirth = new Date(birthday);
|
||||
}
|
||||
|
||||
profile.avatar = query.img('img[src*="model"][src*="headshot"]');
|
||||
profile.photos = query.imgs('img[src*="model"][src*="thumb_image"], img[src*="model"][src*="bg_image"]');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const url = `${channel.url}/episodes/search?page=${page}&site=${channel.parameters.siteId || ''}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await http.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
@@ -165,16 +181,17 @@ async function fetchScene(url, channel, baseRelease) {
|
||||
return res.ok ? scrapeScene(res.item, url, baseRelease, channel, session) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
async function fetchProfile({ name: actorName }, { entity }) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
|
||||
|
||||
const res = await unprint.get(`${entity.url}/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
return scrapeProfile(res.context);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
@@ -265,6 +265,7 @@ const scrapers = {
|
||||
julesjordan,
|
||||
karups,
|
||||
kellymadison,
|
||||
'5kporn': kellymadison,
|
||||
killergram,
|
||||
kink,
|
||||
kinkmen: kink,
|
||||
|
||||
Reference in New Issue
Block a user