diff --git a/public/img/logos/kellymadison/kellymadison.png b/public/img/logos/kellymadison/kellymadison.png new file mode 100644 index 00000000..ff73dde3 Binary files /dev/null and b/public/img/logos/kellymadison/kellymadison.png differ diff --git a/public/img/logos/kellymadison/network.png b/public/img/logos/kellymadison/network.png new file mode 100644 index 00000000..74217ad5 Binary files /dev/null and b/public/img/logos/kellymadison/network.png differ diff --git a/public/img/logos/kellymadison/pornfidelity.png b/public/img/logos/kellymadison/pornfidelity.png new file mode 100644 index 00000000..cddb25fa Binary files /dev/null and b/public/img/logos/kellymadison/pornfidelity.png differ diff --git a/public/img/logos/kellymadison/teenfidelity.png b/public/img/logos/kellymadison/teenfidelity.png new file mode 100644 index 00000000..44f8b53b Binary files /dev/null and b/public/img/logos/kellymadison/teenfidelity.png differ diff --git a/seeds/00_networks.js b/seeds/00_networks.js index 135cc90b..e737f7e7 100644 --- a/seeds/00_networks.js +++ b/seeds/00_networks.js @@ -50,6 +50,12 @@ const networks = [ name: 'Jules Jordan', url: 'https://www.julesjordan.com', }, + { + slug: 'kellymadison', + name: 'Kelly Madison Media', + url: 'https://www.kellymadison.com', + description: 'Home of Kelly Madison and Ryan Madison', + }, { slug: 'kink', name: 'Kink', diff --git a/seeds/01_sites.js b/seeds/01_sites.js index da501ca9..705df1a4 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -1048,6 +1048,28 @@ function getSites(networksMap) { parameters: JSON.stringify({ independent: true }), network_id: networksMap.julesjordan, }, + // KELLY MADISON MEDIA + { + slug: 'teenfidelity', + name: 'Teen Fidelity', + url: 'https://www.teenfidelity.com', + description: 'Home of Kelly Madison and Ryan Madison', + network_id: networksMap.kellymadison, + }, + { + slug: 'pornfidelity', + name: 'Porn Fidelity', + url: 'https://www.pornfidelity.com', + description: 'Home of Kelly Madison and Ryan Madison', + network_id: networksMap.kellymadison, + }, + { + slug: 'kellymadison', + name: 'Kelly Madison', + url: 'https://www.pornfidelity.com', + description: 'Home of Kelly Madison and Ryan Madison', + network_id: networksMap.kellymadison, + }, // KINK { slug: 'thirtyminutesoftorment', diff --git a/src/media.js b/src/media.js index 6365ebc2..010e1a27 100644 --- a/src/media.js +++ b/src/media.js @@ -179,8 +179,6 @@ async function storePhotos(release, releaseId) { const pluckedPhotos = pluckPhotos(release.photos, release); - console.log(release.photos, pluckedPhotos); - const newPhotos = await filterSourceDuplicates(pluckedPhotos, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`); if (newPhotos.length === 0) return; @@ -201,7 +199,9 @@ async function storePhotos(release, releaseId) { async function storeTrailer(release, releaseId) { // support scrapers supplying multiple qualities - const trailer = Array.isArray(release.trailer) ? release.trailer[0] : release.trailer; + const trailer = Array.isArray(release.trailer) + ? (release.trailer.find(trailerX => [1080, 720].includes(trailerX.quality) || release.trailer[0])) + : release.trailer; if (!trailer || !trailer.src) { console.warn(`No trailer available for (${release.site.name}, ${releaseId}}) "${release.title}"`); diff --git a/src/scrape-release.js b/src/scrape-release.js index 31d5fe71..c39641a0 100644 --- a/src/scrape-release.js +++ b/src/scrape-release.js @@ -45,7 +45,7 @@ async function scrapeRelease(url, release, deep = false) { throw new Error(`The '${site.name}'-scraper cannot fetch individual releases`); } - const scene = await scraper.fetchScene(url, site); + const scene = await scraper.fetchScene(url, site, release); if (!deep && argv.save) { // don't store release when called by site scraper diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 9df808c3..f03dfbb2 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -78,6 +78,8 @@ async function deepFetchReleases(baseReleases) { deep: true, }; } catch (error) { + console.error(error); + return { ...release, deep: false, diff --git a/src/scrapers/kellymadison.js b/src/scrapers/kellymadison.js new file mode 100644 index 00000000..4fa64fbf --- /dev/null +++ b/src/scrapers/kellymadison.js @@ -0,0 +1,147 @@ +'use strict'; + +const bhttp = require('bhttp'); +const { JSDOM } = require('jsdom'); +const moment = require('moment'); + +const siteMapByKey = { + PF: 'pornfidelity', + TF: 'teenfidelity', + KM: 'kellymadison', +}; + +const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {}); + +function extractTextNode(parentEl) { + return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), ''); +} + +function scrapeLatest(html, site) { + const { document } = new JSDOM(html).window; + + return Array.from(document.querySelectorAll('.episode'), (scene) => { + const release = { site }; + + release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim(); + + const siteId = release.shootId.match(/\w{2}/)[0]; + const siteSlug = siteMapByKey[siteId]; + + if (site.slug !== siteSlug) { + // using generic network overview, scene is not from the site we want + return null; + } + + const durationEl = scene.querySelector('.content a'); + + [release.entryId] = durationEl.href.match(/\d+$/); + release.url = `${site.url}/episodes/${release.entryId}`; + + release.title = scene.querySelector('h5 a').textContent.trim(); + + const dateEl = scene.querySelector('.card-meta .text-left').childNodes; + const dateString = extractTextNode(dateEl); + + release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate(); + release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent); + + const durationString = durationEl.textContent.match(/\d+ min/); + if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60; + + release.poster = scene.querySelector('.card-img-top').src; + release.trailer = { + src: scene.querySelector('video').src, + }; + + return release; + }).filter(scene => scene); +} + +function scrapeScene(html, url, site, shallowRelease) { + const { document } = new JSDOM(html).window; + const release = { url, site }; + + const titleEl = document.querySelector('.card-header.row h4').childNodes; + const titleString = extractTextNode(titleEl); + + if (!shallowRelease) [release.entryId] = url.match(/\d+/); + + release.title = titleString + .replace('Trailer: ', '') + .replace(/- \w+ #\d+$/, '') + .trim(); + + release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase(); + + const episode = titleString.match(/#\d+$/)[0]; + const siteKey = siteMapBySlug[release.channel]; + + release.shootId = `${siteKey} ${episode}`; + release.description = document.querySelector('p.card-text').textContent.trim(); + + const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes; + const dateString = extractTextNode(dateEl); + + release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate(); + release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent); + + const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent; + const durationString = durationRaw.match(/\d+:\d+/)[0]; + + release.duration = moment.duration(`00:${durationString}`).asSeconds(); + + const trailerStart = document.body.innerHTML.indexOf('player.updateSrc'); + const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart)); + + const trailers = trailerString.match(/https:\/\/.*.mp4/g); + const resolutions = trailerString.match(/res: '\d+'/g).map((res) => { + const resolution = Number(res.match(/\d+/)[0]); + + return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high + }); + + release.trailer = trailers.map((trailer, index) => ({ + src: trailer, + quality: resolutions[index], + })); + + [release.poster] = document.body.innerHTML + .match(/poster: .*\.jpg/)[0] + .match(/https:\/\/.*\.jpg/); + + if (shallowRelease) release.photos = [shallowRelease.poster]; // deep poster different from shallow poster, use shallow poster as additional photo + + return release; +} + +async function fetchLatest(site, page = 1) { + const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites + const res = await bhttp.get(url, { + headers: { + 'X-Requested-With': 'XMLHttpRequest', + }, + }); + + if (res.statusCode === 200 && res.body.status === 'success') { + return scrapeLatest(res.body.html, site); + } + + return null; +} + +async function fetchScene(url, site, shallowRelease) { + const { pathname } = new URL(url); + + const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, { + headers: { + 'X-Requested-With': 'XMLHttpRequest', + }, + }); + + return scrapeScene(res.body.toString(), url, site, shallowRelease); +} + +module.exports = { + fetchLatest, + fetchScene, +}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 77d0872b..6b26678c 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -6,6 +6,7 @@ const bangbros = require('./bangbros'); const blowpass = require('./blowpass'); const dogfart = require('./dogfart'); const evilangel = require('./evilangel'); +const kellymadison = require('./kellymadison'); const kink = require('./kink'); const mikeadriano = require('./mikeadriano'); const mofos = require('./mofos'); @@ -37,6 +38,7 @@ module.exports = { dogfartnetwork: dogfart, evilangel, julesjordan, + kellymadison, kink, legalporno, mikeadriano, diff --git a/src/tags.js b/src/tags.js index a4c95e85..6499d15d 100644 --- a/src/tags.js +++ b/src/tags.js @@ -58,7 +58,7 @@ async function matchTags(rawTags) { async function associateTags(release, releaseId) { if (!release.tags || release.tags.length === 0) { - console.warn(`No tags available for (${release.site.name}, ${releaseId}}) "${release.title}"`); + console.warn(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`); return; } diff --git a/src/utils/pluck-photos.js b/src/utils/pluck-photos.js index fcdbdedf..e6cb1a1c 100644 --- a/src/utils/pluck-photos.js +++ b/src/utils/pluck-photos.js @@ -5,14 +5,17 @@ const config = require('config'); // pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1 function pluckPhotos(photos, release, specifiedLimit) { const limit = specifiedLimit || config.media.limit; - console.log(limit); + + if (photos.length <= limit) { + return photos; + } const plucked = [1] .concat( - Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))), + Array.from({ length: limit }, (value, index) => Math.round((index + 1) * (photos.length / (limit)))), ); - return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex]); // remove duplicates, may happen when photo total and photo limit are close + return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close } module.exports = pluckPhotos;