diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 42bf306d..22528e50 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -5284,6 +5284,8 @@ const sites = [ parent: 'kellymadison', parameters: { siteId: 3, + // older scene pages are only available on PF, even though they are categorized on TS or KM + archive: 'https://www.pornfidelity.com', }, }, { @@ -5304,6 +5306,7 @@ const sites = [ parent: 'kellymadison', parameters: { siteId: 1, + archive: 'https://www.pornfidelity.com', }, }, { diff --git a/src/scrapers/kellymadison.js b/src/scrapers/kellymadison.js index 2f52bc46..f9434b03 100755 --- a/src/scrapers/kellymadison.js +++ b/src/scrapers/kellymadison.js @@ -25,13 +25,18 @@ function scrapeLatest(scenes, site) { const { pathname } = new URL(query.url('h5 a, .ep-title a, .title a')); [release.entryId] = pathname.match(/\d+$/); - release.url = `${site.url}${pathname}`; release.title = query.cnt('h5 a, .ep-title a, .title a'); release.date = query.date('.card-meta .text-left, .row .col-4:first-child, .card-footer-item:first-child', ['MMM D', 'MMM D, YYYY'], /\w+ \d+(, \w+)?/); release.actors = query.cnts('.models a, .ep-models a, a[href*="models/"]'); + // older scenes do not have a working scene page on their native site, only on Porn Fidelity + // scenes older than year do not show a date; this is not when the URLs stop working, but it's a rough guideline + release.url = site.parameters.archive && !release.date + ? `${site.parameters.archive}${pathname}` + : `${site.url}${pathname}`; + release.duration = query.dur('.content a'); const duration = query.cnt('.content a, .ep-runtime strong, .subtitle:last-child a')?.match(/(\d+) min/)?.[1]; @@ -103,18 +108,21 @@ async function scrapeScene({ query, html }, url, baseRelease, channel, session) // const token = query.meta('name=_token'); // const trailerInfoUrl = `${channel.url}/episodes/trailer/sources/${release.entryId}?type=trailer`; const trailerInfoUrl = html.match(/'(http.*\/trailer\/sources.*)'/)?.[1]; - const trailerInfoRes = await http.post(trailerInfoUrl, null, { session }); - if (trailerInfoRes.ok && trailerInfoRes.body.sources?.length > 0) { - release.trailer = trailerInfoRes.body.sources.map((trailer) => ({ - src: trailer.src, - type: trailer.type, - /* unreliable, sometimes actual video is 720p - quality: trailer.res - .replace(4000, 2160) - .replace(5000, 2880), - */ - })); + if (trailerInfoUrl) { + const trailerInfoRes = await http.post(trailerInfoUrl, null, { session }); + + if (trailerInfoRes.ok && trailerInfoRes.body.sources?.length > 0) { + release.trailer = trailerInfoRes.body.sources.map((trailer) => ({ + src: trailer.src, + type: trailer.type, + /* unreliable, sometimes actual video is 720p + quality: trailer.res + .replace(4000, 2160) + .replace(5000, 2880), + */ + })); + } } return release; @@ -176,9 +184,14 @@ async function fetchScene(url, channel, baseRelease) { const res = await qu.get(url, null, { 'X-Requested-With': 'XMLHttpRequest', - }, { session }); + }, { + session, + followRedirects: false, // redirects to sign-up page if scene not found + }); - return res.ok ? scrapeScene(res.item, url, baseRelease, channel, session) : res.status; + return res.ok + ? scrapeScene(res.item, url, baseRelease, channel, session) + : res.status; } async function fetchProfile({ name: actorName }, { entity }) { diff --git a/src/utils/qu.js b/src/utils/qu.js index 52974914..af9d46c4 100755 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -142,6 +142,10 @@ function all(context, selectors, attrArg, applyTrim = true) { const attr = attrArg === true ? 'textContent' : attrArg; const elements = [].concat(selectors).reduce((acc, selector) => acc || getElements(context, selector), null); + if (!Array.isArray(elements)) { + return []; + } + if (attr) { return elements.map((el) => q(el, null, attr, applyTrim)); }