Moved PurgatoryX to Radical API scraper.
This commit is contained in:
@@ -4,62 +4,28 @@ const unprint = require('unprint');
|
||||
const mime = require('mime');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
const teaserOrder = ['large', 'small', 'mobile', 'mp4', 'jpg'];
|
||||
|
||||
function scrapeSceneMetadata(data, channel) {
|
||||
function getVideoPath(data, parameters) {
|
||||
if (data.is_published === 0 && parameters.upcoming) {
|
||||
return parameters.upcoming;
|
||||
}
|
||||
|
||||
if (parameters.videos) {
|
||||
return parameters.videos;
|
||||
}
|
||||
|
||||
return 'videos';
|
||||
}
|
||||
|
||||
function scrapeScene(data, channel, parameters) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = data.id;
|
||||
release.url = `${channel.url}/tour/videos/${data.id}/${slugify(data.title, '-', { removePunctuation: true })}`;
|
||||
|
||||
release.title = data.title;
|
||||
release.description = data.description;
|
||||
|
||||
release.date = new Date(data.release_date);
|
||||
release.duration = data.seconds_duration || qu.durationToSeconds(data.videos_duration);
|
||||
|
||||
release.actors = data.models.map((model) => ({
|
||||
entryId: model.id,
|
||||
name: model.name,
|
||||
gender: model.gender,
|
||||
avatar: model.thumb,
|
||||
url: `${channel.url}/tour/models/${model.id}/${slugify(model.name, '-', { removePunctuation: true })}`,
|
||||
}));
|
||||
|
||||
release.poster = data.trailer?.poster || [data.thumb?.replace('mobile.jpg', '.jpg'), data.thumb];
|
||||
release.photos = [
|
||||
data.extra_thumbs?.find((url) => /portrait1.jpg/.test(url)),
|
||||
data.extra_thumbs?.find((url) => /scene.jpg/.test(url)),
|
||||
data.extra_thumbs?.find((url) => /portrait2.jpg/.test(url)),
|
||||
]; // ordered by chronology: portrait1.jpg and scene.jpg are usually pre-shoot poses, portrait2.jpg is the cumshot aftermath
|
||||
|
||||
release.trailer = data.trailer && {
|
||||
src: data.trailer.src,
|
||||
type: data.trailer.type,
|
||||
};
|
||||
|
||||
release.teaser = data.special_thumbs;
|
||||
|
||||
release.tags = [].concat(data.tags?.map((tag) => tag.name));
|
||||
release.qualities = data.downloads && Object.values(data.downloads)?.map((download) => download.meta_data.height);
|
||||
release.stars = data.rating;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAllMetadata(scenes, channel) {
|
||||
return scenes.map((data) => scrapeSceneMetadata(data, channel));
|
||||
}
|
||||
|
||||
function scrapeSceneApi(data, channel, parameters) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = data.id;
|
||||
release.url = `${channel.url}/${parameters.videos || 'videos'}/${data.slug}`;
|
||||
release.url = `${channel.url}/${getVideoPath(data, parameters)}/${data.slug}`;
|
||||
|
||||
release.title = data.title;
|
||||
release.description = data.description;
|
||||
@@ -75,6 +41,7 @@ function scrapeSceneApi(data, channel, parameters) {
|
||||
return {
|
||||
name: actor.name,
|
||||
avatar: actor.thumb,
|
||||
url: actor.slug && `${channel.url}/models/${actor.slug}`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -119,64 +86,8 @@ function scrapeSceneApi(data, channel, parameters) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeAllApi(scenes, channel, parameters) {
|
||||
return scenes.map((data) => scrapeSceneApi(data, channel, parameters));
|
||||
}
|
||||
|
||||
function scrapeProfileMetadata(data, channel) {
|
||||
const profile = {};
|
||||
|
||||
profile.entryId = data.id;
|
||||
profile.url = `${channel.url}/tour/models/${data.id}/${slugify(data.name, '-', { removePunctuation: true })}`;
|
||||
|
||||
profile.description = data.attributes.bio?.value;
|
||||
profile.dateOfBirth = qu.parseDate(data.attributes.birthdate?.value, 'YYYY-MM-DD');
|
||||
profile.gender = data.gender;
|
||||
profile.age = data.attributes.age?.value;
|
||||
profile.birthPlace = data.attributes.born?.value;
|
||||
|
||||
profile.measurements = data.attributes.measurements?.value;
|
||||
profile.height = feetInchesToCm(data.attributes.height?.value);
|
||||
profile.weight = lbsToKg(data.attributes.weight?.value);
|
||||
|
||||
profile.eyes = data.attributes.eyes?.value;
|
||||
profile.hairColor = data.attributes.hair?.value;
|
||||
|
||||
profile.avatar = data.thumb;
|
||||
profile.date = new Date(data.publish_date);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileApi(data, channel, scenes, parameters) {
|
||||
const profile = {};
|
||||
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase
|
||||
|
||||
profile.entryId = bio.id;
|
||||
|
||||
profile.description = bio.bio;
|
||||
|
||||
profile.gender = bio.gender;
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||
profile.birthPlace = bio.born;
|
||||
profile.age = bio.age;
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
profile.eyes = bio.eyes;
|
||||
profile.hairColor = bio.hair;
|
||||
|
||||
profile.avatar = data.thumb;
|
||||
|
||||
if (scenes) {
|
||||
profile.scenes = scrapeAllApi(scenes, channel, parameters);
|
||||
}
|
||||
|
||||
return profile;
|
||||
function scrapeAll(scenes, channel, parameters) {
|
||||
return scenes.map((data) => scrapeScene(data, channel, parameters));
|
||||
}
|
||||
|
||||
async function fetchEndpoint(channel, parameters) {
|
||||
@@ -194,7 +105,7 @@ async function fetchEndpoint(channel, parameters) {
|
||||
return parameters.endpoint;
|
||||
}
|
||||
|
||||
async function fetchLatestApi(channel, page, { parameters }) {
|
||||
async function fetchLatest(channel, page, { parameters }) {
|
||||
const endpoint = await fetchEndpoint(channel, parameters);
|
||||
|
||||
if (!endpoint) {
|
||||
@@ -208,86 +119,25 @@ async function fetchLatestApi(channel, page, { parameters }) {
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.ok && res.body.pageProps?.contents?.data) {
|
||||
return scrapeAllApi(res.body.pageProps.contents.data, channel, parameters);
|
||||
return scrapeAll(res.body.pageProps.contents.data, channel, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneApi(url, channel, baseScene, { parameters }) {
|
||||
const slug = new URL(url).pathname.split('/').at(-1);
|
||||
const endpoint = await fetchEndpoint(channel);
|
||||
const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`);
|
||||
|
||||
if (res.ok && res.body.pageProps?.content) {
|
||||
return scrapeSceneApi(res.body.pageProps.content, channel, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfileApi(actor, { channel, parameters }) {
|
||||
const endpoint = await fetchEndpoint(channel);
|
||||
const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`);
|
||||
|
||||
if (res.ok && res.body.pageProps?.model) {
|
||||
return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestMetadata(channel, page = 1) {
|
||||
const url = `${channel.url}/tour/videos?page=${page}`;
|
||||
const res = await http.get(url, {
|
||||
parse: true,
|
||||
extract: {
|
||||
async function fetchUpcoming(channel, _page, { parameters }) {
|
||||
const res = await unprint.get(channel.url, {
|
||||
parser: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.window.__DATA__) {
|
||||
return scrapeAllMetadata(res.window.__DATA__.videos.items, channel);
|
||||
}
|
||||
|
||||
if (res.ok) {
|
||||
return res.window.__DATA__?.error || null;
|
||||
}
|
||||
const data = res.context.query.json('#__NEXT_DATA__');
|
||||
const scene = data?.props.pageProps.upcoming_scene;
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneMetadata(url, channel) {
|
||||
const res = await http.get(url, {
|
||||
parse: true,
|
||||
extract: {
|
||||
runScripts: 'dangerously',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.window.__DATA__?.video) {
|
||||
return scrapeSceneMetadata(res.window.__DATA__.video, channel);
|
||||
}
|
||||
|
||||
if (res.ok) {
|
||||
return res.window.__DATA__?.error || null;
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfileMetadata(actor, channel) {
|
||||
const res = await http.get(`${channel.url}/tour/search-preview/${actor.name}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const model = res.body.models?.items.find((modelX) => slugify(modelX.name) === actor.slug);
|
||||
|
||||
if (model) {
|
||||
return scrapeProfileMetadata(model, channel);
|
||||
if (scene) {
|
||||
return scrapeScene(scene, channel, parameters);
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -296,16 +146,63 @@ async function fetchProfileMetadata(actor, channel) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel, _baseScene, { parameters }) {
|
||||
const slug = new URL(url).pathname.split('/').at(-1);
|
||||
const endpoint = await fetchEndpoint(channel);
|
||||
const res = await http.get(`${channel.url}/_next/data/${endpoint}/${parameters.videos || 'videos'}/${slug}.json?slug=${slug}`);
|
||||
|
||||
if (res.ok && res.body.pageProps?.content) {
|
||||
return scrapeScene(res.body.pageProps.content, channel, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeProfile(data, channel, scenes, parameters) {
|
||||
const profile = {};
|
||||
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [slugify(key, '_'), value])); // keys are mixed upper and lowercase
|
||||
|
||||
profile.entryId = bio.id;
|
||||
|
||||
profile.description = bio.bio;
|
||||
|
||||
profile.gender = bio.gender;
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||
profile.birthPlace = bio.born || bio.birthplace;
|
||||
profile.age = bio.age;
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
profile.height = convert(bio.height, 'cm');
|
||||
profile.weight = convert(bio.weight, 'lb', 'kg');
|
||||
|
||||
profile.eyes = bio.eyes || bio.eye_color;
|
||||
profile.hairColor = bio.hair || bio.hair_color;
|
||||
|
||||
profile.avatar = data.thumb;
|
||||
|
||||
if (scenes) {
|
||||
profile.scenes = scrapeAll(scenes, channel, parameters);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, { channel, parameters }) {
|
||||
const endpoint = await fetchEndpoint(channel);
|
||||
const res = await http.get(`${channel.url}/_next/data/${endpoint}/models/${actor.slug}.json?slug=${actor.slug}`);
|
||||
|
||||
if (res.ok && res.body.pageProps?.model) {
|
||||
return scrapeProfile(res.body.pageProps.model, channel, res.body.pageProps.model_contents, parameters);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
metadata: {
|
||||
// probably deprecated
|
||||
fetchLatest: fetchLatestMetadata,
|
||||
fetchScene: fetchSceneMetadata,
|
||||
fetchProfile: fetchProfileMetadata,
|
||||
},
|
||||
api: {
|
||||
fetchLatest: fetchLatestApi,
|
||||
fetchScene: fetchSceneApi,
|
||||
fetchProfile: fetchProfileApi,
|
||||
},
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user