Updated Radical scraper. Added town property to location resolve tool.
This commit is contained in:
parent
ccac1f96dd
commit
1ed47c3173
|
@ -1,10 +1,14 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const unprint = require('unprint');
|
||||||
|
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
|
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
|
||||||
|
|
||||||
|
const teaserOrder = ['large', 'small', 'mobile'];
|
||||||
|
|
||||||
function scrapeSceneMetadata(data, channel) {
|
function scrapeSceneMetadata(data, channel) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -50,20 +54,49 @@ function scrapeAllMetadata(scenes, channel) {
|
||||||
return scenes.map((data) => scrapeSceneMetadata(data, channel));
|
return scenes.map((data) => scrapeSceneMetadata(data, channel));
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAllApi(scenes, channel) {
|
function scrapeSceneApi(data, channel) {
|
||||||
return scenes.map((data) => {
|
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.entryId = data.id;
|
release.entryId = data.id;
|
||||||
|
release.url = `${channel.url}/videos/${data.slug}`;
|
||||||
|
|
||||||
release.title = data.title;
|
release.title = data.title;
|
||||||
release.description = data.description;
|
release.description = data.description;
|
||||||
|
|
||||||
console.log(data);
|
release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY');
|
||||||
console.log(release);
|
release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration);
|
||||||
|
|
||||||
|
release.actors = data.models_thumbs?.map((actor) => ({
|
||||||
|
name: actor.name,
|
||||||
|
avatar: actor.thumb,
|
||||||
|
})) || data.models;
|
||||||
|
|
||||||
|
release.poster = data.trailer_screencap;
|
||||||
|
|
||||||
|
release.photos = [
|
||||||
|
...data.previews?.full || [],
|
||||||
|
...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set
|
||||||
|
&& !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path
|
||||||
|
&& !(thumbnail.includes('_player') && release.poster?.includes('_player'))) || [],
|
||||||
|
];
|
||||||
|
|
||||||
|
release.caps = data.thumbs;
|
||||||
|
|
||||||
|
release.trailer = data.trailer_url;
|
||||||
|
release.teaser = data.special_thumbnails.sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label)));
|
||||||
|
|
||||||
|
release.tags = data.tags;
|
||||||
|
|
||||||
|
release.channel = slugify(data.site, '');
|
||||||
|
release.qualities = Object.values(data.videos || []).map((video) => video.height);
|
||||||
|
|
||||||
|
release.photoCount = Number(data.photos_duration) || null;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
}
|
||||||
|
|
||||||
|
function scrapeAllApi(scenes, channel) {
|
||||||
|
return scenes.map((data) => scrapeSceneApi(data, channel));
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfileMetadata(data, channel) {
|
function scrapeProfileMetadata(data, channel) {
|
||||||
|
@ -91,6 +124,68 @@ function scrapeProfileMetadata(data, channel) {
|
||||||
return profile;
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function scrapeProfileApi(data, channel, scenes) {
|
||||||
|
const profile = {};
|
||||||
|
const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase
|
||||||
|
|
||||||
|
profile.entryId = bio.id;
|
||||||
|
|
||||||
|
profile.description = bio.bio;
|
||||||
|
|
||||||
|
profile.gender = bio.gender;
|
||||||
|
|
||||||
|
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
|
||||||
|
profile.birthPlace = bio.born;
|
||||||
|
profile.age = bio.age;
|
||||||
|
|
||||||
|
profile.measurements = bio.measurements;
|
||||||
|
|
||||||
|
profile.height = feetInchesToCm(bio.height);
|
||||||
|
profile.weight = lbsToKg(bio.weight);
|
||||||
|
|
||||||
|
profile.eyes = bio.eyes;
|
||||||
|
profile.hairColor = bio.hair;
|
||||||
|
|
||||||
|
profile.avatar = data.thumb;
|
||||||
|
|
||||||
|
if (scenes) {
|
||||||
|
profile.scenes = scrapeAllApi(scenes, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatestApi(channel, page, { parameters }) {
|
||||||
|
const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos.json?order_by=publish_date&sort_by=desc&per_page=8&page=${page}`);
|
||||||
|
|
||||||
|
if (res.ok && res.body.pageProps?.contents?.data) {
|
||||||
|
return scrapeAllApi(res.body.pageProps.contents.data, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSceneApi(url, channel, baseScene, { parameters }) {
|
||||||
|
const slug = new URL(url).pathname.split('/').at(-1);
|
||||||
|
const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos/${slug}.json?slug=${slug}`);
|
||||||
|
|
||||||
|
if (res.ok && res.body.pageProps?.content) {
|
||||||
|
return scrapeSceneApi(res.body.pageProps.content, channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfileApi(actor, { channel, parameters }) {
|
||||||
|
const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/models/${actor.slug}.json?slug=${actor.slug}`);
|
||||||
|
|
||||||
|
if (res.ok && res.body.pageProps?.model) {
|
||||||
|
return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchLatestMetadata(channel, page = 1) {
|
async function fetchLatestMetadata(channel, page = 1) {
|
||||||
const url = `${channel.url}/tour/videos?page=${page}`;
|
const url = `${channel.url}/tour/videos?page=${page}`;
|
||||||
const res = await http.get(url, {
|
const res = await http.get(url, {
|
||||||
|
@ -111,16 +206,6 @@ async function fetchLatestMetadata(channel, page = 1) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatestApi(channel, page, { parameters }) {
|
|
||||||
const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`);
|
|
||||||
|
|
||||||
if (res.ok) {
|
|
||||||
return scrapeAllApi(res.body.pageProps.contents.data, channel);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchSceneMetadata(url, channel) {
|
async function fetchSceneMetadata(url, channel) {
|
||||||
const res = await http.get(url, {
|
const res = await http.get(url, {
|
||||||
parse: true,
|
parse: true,
|
||||||
|
@ -169,5 +254,7 @@ module.exports = {
|
||||||
},
|
},
|
||||||
api: {
|
api: {
|
||||||
fetchLatest: fetchLatestApi,
|
fetchLatest: fetchLatestApi,
|
||||||
|
fetchScene: fetchSceneApi,
|
||||||
|
fetchProfile: fetchProfileApi,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -190,6 +190,7 @@ const scrapers = {
|
||||||
bamvisions,
|
bamvisions,
|
||||||
bang,
|
bang,
|
||||||
bangbros,
|
bangbros,
|
||||||
|
bjraw: radical,
|
||||||
blacked: vixen,
|
blacked: vixen,
|
||||||
blackedraw: vixen,
|
blackedraw: vixen,
|
||||||
blackambush: elevatedx,
|
blackambush: elevatedx,
|
||||||
|
|
|
@ -56,7 +56,7 @@ async function resolvePlace(query) {
|
||||||
const place = {};
|
const place = {};
|
||||||
|
|
||||||
if (item.class === 'place' || item.class === 'boundary') {
|
if (item.class === 'place' || item.class === 'boundary') {
|
||||||
const location = rawPlace[item.type] || rawPlace.city || rawPlace.place;
|
const location = rawPlace[item.type] || rawPlace.city || rawPlace.place || rawPlace.town;
|
||||||
|
|
||||||
if (location) {
|
if (location) {
|
||||||
place.place = location;
|
place.place = location;
|
||||||
|
|
Loading…
Reference in New Issue