Added Kelly Madison Media (Teen Fidelity) release scraper.
This commit is contained in:
parent
691124fa1c
commit
85e55eebaf
Binary file not shown.
After Width: | Height: | Size: 8.6 KiB |
Binary file not shown.
After Width: | Height: | Size: 8.6 KiB |
Binary file not shown.
After Width: | Height: | Size: 6.7 KiB |
Binary file not shown.
After Width: | Height: | Size: 6.4 KiB |
|
@ -50,6 +50,12 @@ const networks = [
|
||||||
name: 'Jules Jordan',
|
name: 'Jules Jordan',
|
||||||
url: 'https://www.julesjordan.com',
|
url: 'https://www.julesjordan.com',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'kellymadison',
|
||||||
|
name: 'Kelly Madison Media',
|
||||||
|
url: 'https://www.kellymadison.com',
|
||||||
|
description: 'Home of Kelly Madison and Ryan Madison',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'kink',
|
slug: 'kink',
|
||||||
name: 'Kink',
|
name: 'Kink',
|
||||||
|
|
|
@ -1048,6 +1048,28 @@ function getSites(networksMap) {
|
||||||
parameters: JSON.stringify({ independent: true }),
|
parameters: JSON.stringify({ independent: true }),
|
||||||
network_id: networksMap.julesjordan,
|
network_id: networksMap.julesjordan,
|
||||||
},
|
},
|
||||||
|
// KELLY MADISON MEDIA
|
||||||
|
{
|
||||||
|
slug: 'teenfidelity',
|
||||||
|
name: 'Teen Fidelity',
|
||||||
|
url: 'https://www.teenfidelity.com',
|
||||||
|
description: 'Home of Kelly Madison and Ryan Madison',
|
||||||
|
network_id: networksMap.kellymadison,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'pornfidelity',
|
||||||
|
name: 'Porn Fidelity',
|
||||||
|
url: 'https://www.pornfidelity.com',
|
||||||
|
description: 'Home of Kelly Madison and Ryan Madison',
|
||||||
|
network_id: networksMap.kellymadison,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
slug: 'kellymadison',
|
||||||
|
name: 'Kelly Madison',
|
||||||
|
url: 'https://www.pornfidelity.com',
|
||||||
|
description: 'Home of Kelly Madison and Ryan Madison',
|
||||||
|
network_id: networksMap.kellymadison,
|
||||||
|
},
|
||||||
// KINK
|
// KINK
|
||||||
{
|
{
|
||||||
slug: 'thirtyminutesoftorment',
|
slug: 'thirtyminutesoftorment',
|
||||||
|
|
|
@ -179,8 +179,6 @@ async function storePhotos(release, releaseId) {
|
||||||
|
|
||||||
const pluckedPhotos = pluckPhotos(release.photos, release);
|
const pluckedPhotos = pluckPhotos(release.photos, release);
|
||||||
|
|
||||||
console.log(release.photos, pluckedPhotos);
|
|
||||||
|
|
||||||
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
|
|
||||||
if (newPhotos.length === 0) return;
|
if (newPhotos.length === 0) return;
|
||||||
|
@ -201,7 +199,9 @@ async function storePhotos(release, releaseId) {
|
||||||
|
|
||||||
async function storeTrailer(release, releaseId) {
|
async function storeTrailer(release, releaseId) {
|
||||||
// support scrapers supplying multiple qualities
|
// support scrapers supplying multiple qualities
|
||||||
const trailer = Array.isArray(release.trailer) ? release.trailer[0] : release.trailer;
|
const trailer = Array.isArray(release.trailer)
|
||||||
|
? (release.trailer.find(trailerX => [1080, 720].includes(trailerX.quality) || release.trailer[0]))
|
||||||
|
: release.trailer;
|
||||||
|
|
||||||
if (!trailer || !trailer.src) {
|
if (!trailer || !trailer.src) {
|
||||||
console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
||||||
|
|
|
@ -45,7 +45,7 @@ async function scrapeRelease(url, release, deep = false) {
|
||||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual releases`);
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual releases`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const scene = await scraper.fetchScene(url, site);
|
const scene = await scraper.fetchScene(url, site, release);
|
||||||
|
|
||||||
if (!deep && argv.save) {
|
if (!deep && argv.save) {
|
||||||
// don't store release when called by site scraper
|
// don't store release when called by site scraper
|
||||||
|
|
|
@ -78,6 +78,8 @@ async function deepFetchReleases(baseReleases) {
|
||||||
deep: true,
|
deep: true,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...release,
|
...release,
|
||||||
deep: false,
|
deep: false,
|
||||||
|
|
|
@ -0,0 +1,147 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const bhttp = require('bhttp');
|
||||||
|
const { JSDOM } = require('jsdom');
|
||||||
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const siteMapByKey = {
|
||||||
|
PF: 'pornfidelity',
|
||||||
|
TF: 'teenfidelity',
|
||||||
|
KM: 'kellymadison',
|
||||||
|
};
|
||||||
|
|
||||||
|
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
||||||
|
|
||||||
|
function extractTextNode(parentEl) {
|
||||||
|
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatest(html, site) {
|
||||||
|
const { document } = new JSDOM(html).window;
|
||||||
|
|
||||||
|
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||||
|
const release = { site };
|
||||||
|
|
||||||
|
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||||
|
|
||||||
|
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||||
|
const siteSlug = siteMapByKey[siteId];
|
||||||
|
|
||||||
|
if (site.slug !== siteSlug) {
|
||||||
|
// using generic network overview, scene is not from the site we want
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const durationEl = scene.querySelector('.content a');
|
||||||
|
|
||||||
|
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||||
|
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||||
|
|
||||||
|
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||||
|
|
||||||
|
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||||
|
const dateString = extractTextNode(dateEl);
|
||||||
|
|
||||||
|
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||||
|
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||||
|
|
||||||
|
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||||
|
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||||
|
|
||||||
|
release.poster = scene.querySelector('.card-img-top').src;
|
||||||
|
release.trailer = {
|
||||||
|
src: scene.querySelector('video').src,
|
||||||
|
};
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}).filter(scene => scene);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeScene(html, url, site, shallowRelease) {
|
||||||
|
const { document } = new JSDOM(html).window;
|
||||||
|
const release = { url, site };
|
||||||
|
|
||||||
|
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||||
|
const titleString = extractTextNode(titleEl);
|
||||||
|
|
||||||
|
if (!shallowRelease) [release.entryId] = url.match(/\d+/);
|
||||||
|
|
||||||
|
release.title = titleString
|
||||||
|
.replace('Trailer: ', '')
|
||||||
|
.replace(/- \w+ #\d+$/, '')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||||
|
|
||||||
|
const episode = titleString.match(/#\d+$/)[0];
|
||||||
|
const siteKey = siteMapBySlug[release.channel];
|
||||||
|
|
||||||
|
release.shootId = `${siteKey} ${episode}`;
|
||||||
|
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||||
|
|
||||||
|
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||||
|
const dateString = extractTextNode(dateEl);
|
||||||
|
|
||||||
|
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||||
|
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||||
|
|
||||||
|
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||||
|
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||||
|
|
||||||
|
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||||
|
|
||||||
|
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||||
|
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||||
|
|
||||||
|
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||||
|
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||||
|
const resolution = Number(res.match(/\d+/)[0]);
|
||||||
|
|
||||||
|
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||||
|
});
|
||||||
|
|
||||||
|
release.trailer = trailers.map((trailer, index) => ({
|
||||||
|
src: trailer,
|
||||||
|
quality: resolutions[index],
|
||||||
|
}));
|
||||||
|
|
||||||
|
[release.poster] = document.body.innerHTML
|
||||||
|
.match(/poster: .*\.jpg/)[0]
|
||||||
|
.match(/https:\/\/.*\.jpg/);
|
||||||
|
|
||||||
|
if (shallowRelease) release.photos = [shallowRelease.poster]; // deep poster different from shallow poster, use shallow poster as additional photo
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatest(site, page = 1) {
|
||||||
|
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||||
|
const res = await bhttp.get(url, {
|
||||||
|
headers: {
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||||
|
return scrapeLatest(res.body.html, site);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchScene(url, site, shallowRelease) {
|
||||||
|
const { pathname } = new URL(url);
|
||||||
|
|
||||||
|
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||||
|
headers: {
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return scrapeScene(res.body.toString(), url, site, shallowRelease);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetchLatest,
|
||||||
|
fetchScene,
|
||||||
|
};
|
|
@ -6,6 +6,7 @@ const bangbros = require('./bangbros');
|
||||||
const blowpass = require('./blowpass');
|
const blowpass = require('./blowpass');
|
||||||
const dogfart = require('./dogfart');
|
const dogfart = require('./dogfart');
|
||||||
const evilangel = require('./evilangel');
|
const evilangel = require('./evilangel');
|
||||||
|
const kellymadison = require('./kellymadison');
|
||||||
const kink = require('./kink');
|
const kink = require('./kink');
|
||||||
const mikeadriano = require('./mikeadriano');
|
const mikeadriano = require('./mikeadriano');
|
||||||
const mofos = require('./mofos');
|
const mofos = require('./mofos');
|
||||||
|
@ -37,6 +38,7 @@ module.exports = {
|
||||||
dogfartnetwork: dogfart,
|
dogfartnetwork: dogfart,
|
||||||
evilangel,
|
evilangel,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
|
kellymadison,
|
||||||
kink,
|
kink,
|
||||||
legalporno,
|
legalporno,
|
||||||
mikeadriano,
|
mikeadriano,
|
||||||
|
|
|
@ -58,7 +58,7 @@ async function matchTags(rawTags) {
|
||||||
|
|
||||||
async function associateTags(release, releaseId) {
|
async function associateTags(release, releaseId) {
|
||||||
if (!release.tags || release.tags.length === 0) {
|
if (!release.tags || release.tags.length === 0) {
|
||||||
console.warn(`No tags available for (${release.site.name}, ${releaseId}}) "${release.title}"`);
|
console.warn(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,14 +5,17 @@ const config = require('config');
|
||||||
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
||||||
function pluckPhotos(photos, release, specifiedLimit) {
|
function pluckPhotos(photos, release, specifiedLimit) {
|
||||||
const limit = specifiedLimit || config.media.limit;
|
const limit = specifiedLimit || config.media.limit;
|
||||||
console.log(limit);
|
|
||||||
|
if (photos.length <= limit) {
|
||||||
|
return photos;
|
||||||
|
}
|
||||||
|
|
||||||
const plucked = [1]
|
const plucked = [1]
|
||||||
.concat(
|
.concat(
|
||||||
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))),
|
||||||
);
|
);
|
||||||
|
|
||||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex]); // remove duplicates, may happen when photo total and photo limit are close
|
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = pluckPhotos;
|
module.exports = pluckPhotos;
|
||||||
|
|
Loading…
Reference in New Issue