From bf4beafb96d741da84eb87c71725354d89d62c2b Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 1 Feb 2026 04:20:16 +0100 Subject: [PATCH] Updated Mike Adriano for JS-only sites. --- src/scrapers/mikeadriano.js | 156 +++++++++++------------------------- 1 file changed, 46 insertions(+), 110 deletions(-) diff --git a/src/scrapers/mikeadriano.js b/src/scrapers/mikeadriano.js index 45610bf6..d00c29f3 100755 --- a/src/scrapers/mikeadriano.js +++ b/src/scrapers/mikeadriano.js @@ -2,150 +2,86 @@ const unprint = require('unprint'); -const http = require('../utils/http'); const { convert } = require('../utils/convert'); -function scrapeAll(scenes, channel) { - return scenes.map(({ query }) => { - const release = {}; - - release.title = query.content('h3.title a, .content-title-wrap a'); - release.url = query.url('h3.title a, h1.title a, .content-title-wrap a', { origin: channel.url }); - - const pathname = new URL(release.url).pathname; - - release.entryId = pathname.match(/\/scenes\/([\w-]+)/)?.[1]; - - release.description = query.content('.desc, .content-description'); - release.date = query.date('.date, time, .hide', 'Do MMM YYYY', { match: null }); - - release.actors = query.contents('h4.models a, .content-models a'); - release.duration = query.duration('//span[contains(@class, "total-time") and text()[contains(., ":")]]'); // total-time is also used for photo counts on True Anal - - const [poster, ...primaryPhotos] = query.imgs('a img'); - const secondaryPhotos = query.styles('.thumb-top, .thumb-bottom, .thumb-mouseover', { styleAttribute: 'background-image' }).map((style) => style.match(/url\((.*)\)/)?.[1]); - - release.poster = [ - poster.replace(/-c\d+x\d+/, ''), - poster, - ]; - - release.photos = primaryPhotos.concat(secondaryPhotos); - - return release; - }); -} - -async function scrapeScene({ query }, url, channel) { +function scrapeScene(data, channel) { const release = {}; - const pathname = new URL(url).pathname; - const data = query.json('#__NEXT_DATA__')?.props?.pageProps?.content; + release.entryId = data.slug; + release.url = `${channel.origin}/scenes/${data.slug}`; - release.entryId = data?.slug || pathname.match(/\/scenes\/([\w-]+)/)?.[1]; + release.title = data.title; + release.description = data.description; - release.title = data?.title || query.content('.content-page-info .title'); - release.description = data?.description || query.content('.content-page-info .desc'); - release.date = data?.formatted_date - ? unprint.extractDate(data.formatted_date, 'Do MMM YYYY', { match: null }) - : query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY', { match: null }); + release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss'); + release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration); - release.actors = data?.models_thumbs?.map((actor) => ({ - name: actor.name, - url: actor.slug && `${channel.url}/models/${actor.slug}`, - avatar: actor.thumb, - })) - || query.elements('.content-page-info .models a').map((actorEl) => ({ - name: unprint.query(actorEl), - url: unprint.url(actorEl, null), - })); + release.actors = (data.models_thumbs || data.models_slugs)?.map((model) => ({ + name: model.name, + url: model.slug && `${channel.origin}/models/${model.slug}`, + avatar: model.thumb, + })) || data.models; - release.duration = data?.seconds_duration || query.duration('.content-page-info .total-time:last-child'); + release.tags = data.tags; + release.qualities = data.videos && Object.values(data.videos).map((video) => video.height); - release.poster = [data?.trailer_screencap, data?.thumb, data?.extra_thumbails?.[0]].filter(Boolean); - release.photos = data?.extra_thumbnails?.slice(1); // first photo is poster + release.poster = [ + data.trailer_screencap, + data.thumb, + data.extra_thumbnails?.[0], + ].filter(Boolean); - release.trailer = data?.trailer_url || null; - release.caps = data?.thumbs; + release.photos = data.extra_thumbnails?.slice(1); // first photo is poster + release.caps = data.thumbs; - release.tags = data?.tags; + release.trailer = data.trailer_url || null; // empty string if missing - release.qualities = data?.videos && Object.values(data.videos).map((video) => video.height); + // photo count / photos duration isn't reliable, exactly 1000 for most All Anal scenes return release; } -async function fetchLatestContent(url, parameters) { - if (parameters.useBrowser) { - const res = await http.get(url, { - bypassBrowser: 'shared', - bypass: { - evaluate: async () => { - // images lazy loaded by JS, gradually scroll through page - return Array.from(this.document.querySelectorAll('.content-item ')).reduce(async (chain, el) => { - await chain; - - return new Promise((resolve) => { - el.scrollIntoView(); - setTimeout(resolve, 20); - }); - }, Promise.resolve()); - }, - }, - }); - - if (res.statusCode !== 200) { - return { - ok: false, - status: res.statusCode, - }; - } - - const context = unprint.init(res.body); - - return { - ok: true, - status: res.statusCode, - context, - }; - } - - const res = await unprint.get(url); - - return res; -} - -async function fetchLatest(channel, page = 1, { parameters }) { +async function fetchLatest(channel, page = 1) { const url = `${channel.url}/scenes?page=${page}`; - const res = await fetchLatestContent(url, parameters); + const res = await unprint.get(url); if (res.ok) { if (res.context.query.exists('a[href*="stackpath.com"]')) { throw new Error('URL blocked by StackPath'); } - return scrapeAll(unprint.initAll(res.context.query.all('.content-item-large, .content-item, .content-border')), channel); + const scenes = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.contents.data; + + if (scenes) { + return scenes.map((scene) => scrapeScene(scene, channel)); + } + + return null; } return res.status; } -async function fetchScene(url, channel) { - const cookieJar = http.cookieJar(); - const session = http.session({ cookieJar }); +async function fetchScene(url, channel, baseRelease) { + if (baseRelease.entryId) { + // deep data identical to base data + return baseRelease; + } - const res = await http.get(url, { - session, - }); + const res = await unprint.get(url); if (res.ok) { - const context = unprint.init(res.body); - - if (context.query.exists('a[href*="stackpath.com"]')) { + if (res.context.query.exists('a[href*="stackpath.com"]')) { throw new Error('URL blocked by StackPath'); } - return scrapeScene(context, url, channel); + const scene = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.content; + + if (scene) { + return scrapeScene(scene, channel); + } + + return null; } return res.status;