Updated Mike Adriano for JS-only sites.
This commit is contained in:
parent
3f8714003c
commit
bf4beafb96
|
|
@ -2,150 +2,86 @@
|
||||||
|
|
||||||
const unprint = require('unprint');
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const http = require('../utils/http');
|
|
||||||
const { convert } = require('../utils/convert');
|
const { convert } = require('../utils/convert');
|
||||||
|
|
||||||
function scrapeAll(scenes, channel) {
|
function scrapeScene(data, channel) {
|
||||||
return scenes.map(({ query }) => {
|
|
||||||
const release = {};
|
|
||||||
|
|
||||||
release.title = query.content('h3.title a, .content-title-wrap a');
|
|
||||||
release.url = query.url('h3.title a, h1.title a, .content-title-wrap a', { origin: channel.url });
|
|
||||||
|
|
||||||
const pathname = new URL(release.url).pathname;
|
|
||||||
|
|
||||||
release.entryId = pathname.match(/\/scenes\/([\w-]+)/)?.[1];
|
|
||||||
|
|
||||||
release.description = query.content('.desc, .content-description');
|
|
||||||
release.date = query.date('.date, time, .hide', 'Do MMM YYYY', { match: null });
|
|
||||||
|
|
||||||
release.actors = query.contents('h4.models a, .content-models a');
|
|
||||||
release.duration = query.duration('//span[contains(@class, "total-time") and text()[contains(., ":")]]'); // total-time is also used for photo counts on True Anal
|
|
||||||
|
|
||||||
const [poster, ...primaryPhotos] = query.imgs('a img');
|
|
||||||
const secondaryPhotos = query.styles('.thumb-top, .thumb-bottom, .thumb-mouseover', { styleAttribute: 'background-image' }).map((style) => style.match(/url\((.*)\)/)?.[1]);
|
|
||||||
|
|
||||||
release.poster = [
|
|
||||||
poster.replace(/-c\d+x\d+/, ''),
|
|
||||||
poster,
|
|
||||||
];
|
|
||||||
|
|
||||||
release.photos = primaryPhotos.concat(secondaryPhotos);
|
|
||||||
|
|
||||||
return release;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeScene({ query }, url, channel) {
|
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const pathname = new URL(url).pathname;
|
release.entryId = data.slug;
|
||||||
const data = query.json('#__NEXT_DATA__')?.props?.pageProps?.content;
|
release.url = `${channel.origin}/scenes/${data.slug}`;
|
||||||
|
|
||||||
release.entryId = data?.slug || pathname.match(/\/scenes\/([\w-]+)/)?.[1];
|
release.title = data.title;
|
||||||
|
release.description = data.description;
|
||||||
|
|
||||||
release.title = data?.title || query.content('.content-page-info .title');
|
release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss');
|
||||||
release.description = data?.description || query.content('.content-page-info .desc');
|
release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration);
|
||||||
release.date = data?.formatted_date
|
|
||||||
? unprint.extractDate(data.formatted_date, 'Do MMM YYYY', { match: null })
|
|
||||||
: query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY', { match: null });
|
|
||||||
|
|
||||||
release.actors = data?.models_thumbs?.map((actor) => ({
|
release.actors = (data.models_thumbs || data.models_slugs)?.map((model) => ({
|
||||||
name: actor.name,
|
name: model.name,
|
||||||
url: actor.slug && `${channel.url}/models/${actor.slug}`,
|
url: model.slug && `${channel.origin}/models/${model.slug}`,
|
||||||
avatar: actor.thumb,
|
avatar: model.thumb,
|
||||||
}))
|
})) || data.models;
|
||||||
|| query.elements('.content-page-info .models a').map((actorEl) => ({
|
|
||||||
name: unprint.query(actorEl),
|
|
||||||
url: unprint.url(actorEl, null),
|
|
||||||
}));
|
|
||||||
|
|
||||||
release.duration = data?.seconds_duration || query.duration('.content-page-info .total-time:last-child');
|
release.tags = data.tags;
|
||||||
|
release.qualities = data.videos && Object.values(data.videos).map((video) => video.height);
|
||||||
|
|
||||||
release.poster = [data?.trailer_screencap, data?.thumb, data?.extra_thumbails?.[0]].filter(Boolean);
|
release.poster = [
|
||||||
release.photos = data?.extra_thumbnails?.slice(1); // first photo is poster
|
data.trailer_screencap,
|
||||||
|
data.thumb,
|
||||||
|
data.extra_thumbnails?.[0],
|
||||||
|
].filter(Boolean);
|
||||||
|
|
||||||
release.trailer = data?.trailer_url || null;
|
release.photos = data.extra_thumbnails?.slice(1); // first photo is poster
|
||||||
release.caps = data?.thumbs;
|
release.caps = data.thumbs;
|
||||||
|
|
||||||
release.tags = data?.tags;
|
release.trailer = data.trailer_url || null; // empty string if missing
|
||||||
|
|
||||||
release.qualities = data?.videos && Object.values(data.videos).map((video) => video.height);
|
// photo count / photos duration isn't reliable, exactly 1000 for most All Anal scenes
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatestContent(url, parameters) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
if (parameters.useBrowser) {
|
|
||||||
const res = await http.get(url, {
|
|
||||||
bypassBrowser: 'shared',
|
|
||||||
bypass: {
|
|
||||||
evaluate: async () => {
|
|
||||||
// images lazy loaded by JS, gradually scroll through page
|
|
||||||
return Array.from(this.document.querySelectorAll('.content-item ')).reduce(async (chain, el) => {
|
|
||||||
await chain;
|
|
||||||
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
el.scrollIntoView();
|
|
||||||
setTimeout(resolve, 20);
|
|
||||||
});
|
|
||||||
}, Promise.resolve());
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.statusCode !== 200) {
|
|
||||||
return {
|
|
||||||
ok: false,
|
|
||||||
status: res.statusCode,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const context = unprint.init(res.body);
|
|
||||||
|
|
||||||
return {
|
|
||||||
ok: true,
|
|
||||||
status: res.statusCode,
|
|
||||||
context,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const res = await unprint.get(url);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1, { parameters }) {
|
|
||||||
const url = `${channel.url}/scenes?page=${page}`;
|
const url = `${channel.url}/scenes?page=${page}`;
|
||||||
const res = await fetchLatestContent(url, parameters);
|
const res = await unprint.get(url);
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (res.context.query.exists('a[href*="stackpath.com"]')) {
|
if (res.context.query.exists('a[href*="stackpath.com"]')) {
|
||||||
throw new Error('URL blocked by StackPath');
|
throw new Error('URL blocked by StackPath');
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrapeAll(unprint.initAll(res.context.query.all('.content-item-large, .content-item, .content-border')), channel);
|
const scenes = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.contents.data;
|
||||||
|
|
||||||
|
if (scenes) {
|
||||||
|
return scenes.map((scene) => scrapeScene(scene, channel));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, channel) {
|
async function fetchScene(url, channel, baseRelease) {
|
||||||
const cookieJar = http.cookieJar();
|
if (baseRelease.entryId) {
|
||||||
const session = http.session({ cookieJar });
|
// deep data identical to base data
|
||||||
|
return baseRelease;
|
||||||
|
}
|
||||||
|
|
||||||
const res = await http.get(url, {
|
const res = await unprint.get(url);
|
||||||
session,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const context = unprint.init(res.body);
|
if (res.context.query.exists('a[href*="stackpath.com"]')) {
|
||||||
|
|
||||||
if (context.query.exists('a[href*="stackpath.com"]')) {
|
|
||||||
throw new Error('URL blocked by StackPath');
|
throw new Error('URL blocked by StackPath');
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrapeScene(context, url, channel);
|
const scene = res.context.query.json('#__NEXT_DATA__')?.props.pageProps.content;
|
||||||
|
|
||||||
|
if (scene) {
|
||||||
|
return scrapeScene(scene, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue