From 1ed47c31732542ff42a615ef98990c4812fce6c5 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Mon, 14 Aug 2023 01:54:17 +0200 Subject: [PATCH] Updated Radical scraper. Added town property to location resolve tool. --- src/scrapers/radical.js | 133 ++++++++++++++++++++++++++++++------- src/scrapers/scrapers.js | 1 + src/utils/resolve-place.js | 2 +- 3 files changed, 112 insertions(+), 24 deletions(-) diff --git a/src/scrapers/radical.js b/src/scrapers/radical.js index 448c1a4f..d5a5b883 100755 --- a/src/scrapers/radical.js +++ b/src/scrapers/radical.js @@ -1,10 +1,14 @@ 'use strict'; +const unprint = require('unprint'); + const http = require('../utils/http'); const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); const { lbsToKg, feetInchesToCm } = require('../utils/convert'); +const teaserOrder = ['large', 'small', 'mobile']; + function scrapeSceneMetadata(data, channel) { const release = {}; @@ -50,20 +54,49 @@ function scrapeAllMetadata(scenes, channel) { return scenes.map((data) => scrapeSceneMetadata(data, channel)); } +function scrapeSceneApi(data, channel) { + const release = {}; + + release.entryId = data.id; + release.url = `${channel.url}/videos/${data.slug}`; + + release.title = data.title; + release.description = data.description; + + release.date = unprint.extractDate(data.publish_date, 'YYYY/MM/DD HH:mm:ss') || unprint.extractDate(data.formatted_date, 'Do MMM YYYY'); + release.duration = data.seconds_duration || unprint.extractDuration(data.videos_duration); + + release.actors = data.models_thumbs?.map((actor) => ({ + name: actor.name, + avatar: actor.thumb, + })) || data.models; + + release.poster = data.trailer_screencap; + + release.photos = [ + ...data.previews?.full || [], + ...data.extra_thumbnails?.filter((thumbnail) => !thumbnail.includes('mobile') // mobile is the cropped photo of a photo already in the set + && !(thumbnail.includes('_scene') && release.poster?.includes('_scene')) // likely the same photo, filename may differ so cannot compare full path + && !(thumbnail.includes('_player') && release.poster?.includes('_player'))) || [], + ]; + + release.caps = data.thumbs; + + release.trailer = data.trailer_url; + release.teaser = data.special_thumbnails.sort((teaserA, teaserB) => teaserOrder.findIndex((label) => teaserA.includes(label)) - teaserOrder.findIndex((label) => teaserB.includes(label))); + + release.tags = data.tags; + + release.channel = slugify(data.site, ''); + release.qualities = Object.values(data.videos || []).map((video) => video.height); + + release.photoCount = Number(data.photos_duration) || null; + + return release; +} + function scrapeAllApi(scenes, channel) { - return scenes.map((data) => { - const release = {}; - - release.entryId = data.id; - - release.title = data.title; - release.description = data.description; - - console.log(data); - console.log(release); - - return release; - }); + return scenes.map((data) => scrapeSceneApi(data, channel)); } function scrapeProfileMetadata(data, channel) { @@ -91,6 +124,68 @@ function scrapeProfileMetadata(data, channel) { return profile; } +function scrapeProfileApi(data, channel, scenes) { + const profile = {}; + const bio = Object.fromEntries(Object.entries(data).map(([key, value]) => [key.toLowerCase(), value])); // keys are mixed upper and lowercase + + profile.entryId = bio.id; + + profile.description = bio.bio; + + profile.gender = bio.gender; + + profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); + profile.birthPlace = bio.born; + profile.age = bio.age; + + profile.measurements = bio.measurements; + + profile.height = feetInchesToCm(bio.height); + profile.weight = lbsToKg(bio.weight); + + profile.eyes = bio.eyes; + profile.hairColor = bio.hair; + + profile.avatar = data.thumb; + + if (scenes) { + profile.scenes = scrapeAllApi(scenes, channel); + } + + return profile; +} + +async function fetchLatestApi(channel, page, { parameters }) { + const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos.json?order_by=publish_date&sort_by=desc&per_page=8&page=${page}`); + + if (res.ok && res.body.pageProps?.contents?.data) { + return scrapeAllApi(res.body.pageProps.contents.data, channel); + } + + return res.status; +} + +async function fetchSceneApi(url, channel, baseScene, { parameters }) { + const slug = new URL(url).pathname.split('/').at(-1); + const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos/${slug}.json?slug=${slug}`); + + if (res.ok && res.body.pageProps?.content) { + return scrapeSceneApi(res.body.pageProps.content, channel); + } + + return res.status; +} + +async function fetchProfileApi(actor, { channel, parameters }) { + const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/models/${actor.slug}.json?slug=${actor.slug}`); + + if (res.ok && res.body.pageProps?.model) { + return scrapeProfileApi(res.body.pageProps.model, channel, res.body.pageProps.model_contents); + } + + return res.status; +} + async function fetchLatestMetadata(channel, page = 1) { const url = `${channel.url}/tour/videos?page=${page}`; const res = await http.get(url, { @@ -111,16 +206,6 @@ async function fetchLatestMetadata(channel, page = 1) { return res.status; } -async function fetchLatestApi(channel, page, { parameters }) { - const res = await http.get(`${channel.url}/_next/data/${parameters.endpoint}/videos.json?order_by=publish_date&sort_by=desc&per_page=30&page=${page}`); - - if (res.ok) { - return scrapeAllApi(res.body.pageProps.contents.data, channel); - } - - return res.status; -} - async function fetchSceneMetadata(url, channel) { const res = await http.get(url, { parse: true, @@ -169,5 +254,7 @@ module.exports = { }, api: { fetchLatest: fetchLatestApi, + fetchScene: fetchSceneApi, + fetchProfile: fetchProfileApi, }, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index f3314661..624d6175 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -190,6 +190,7 @@ const scrapers = { bamvisions, bang, bangbros, + bjraw: radical, blacked: vixen, blackedraw: vixen, blackambush: elevatedx, diff --git a/src/utils/resolve-place.js b/src/utils/resolve-place.js index 7aa23b64..8bb25915 100755 --- a/src/utils/resolve-place.js +++ b/src/utils/resolve-place.js @@ -56,7 +56,7 @@ async function resolvePlace(query) { const place = {}; if (item.class === 'place' || item.class === 'boundary') { - const location = rawPlace[item.type] || rawPlace.city || rawPlace.place; + const location = rawPlace[item.type] || rawPlace.city || rawPlace.place || rawPlace.town; if (location) { place.place = location;