traxxx/src/scrapers/loveherfilms.js

218 lines
5.6 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
// const { getImageWithFallbacks } = require('./elevatedx');
function curateSources(item) {
if (!item) {
return null;
}
if (item.sources) {
return item.sources
.toSorted((sourceA, sourceB) => sourceB.resolution - sourceA.resolution)
.map((source) => source.path);
}
return item.previewImage;
}
function scrapeScene(data, channel) {
const release = {};
release.entryId = data.slug?.toLowerCase(); // _id can't be used for API lookup
release.url = data.slug && `${channel.origin}/tour/trailers/${data.slug}.html`;
release.title = data.title;
release.description = data.description;
release.date = new Date(data.releaseDateVideo);
release.actors = data.models?.map((actor) => ({
name: actor.modelName,
url: actor.slug && `${channel.origin}/tour/models/${actor.slug}.html`,
entryId: actor._id,
}));
release.tags = data.categories?.map((category) => category.title);
if (data.type?.toLowerCase() === 'bts') {
release.tags = release.tags.concat('bts');
}
release.poster = curateSources(data.thumb);
release.photos = [...data.photos?.map((photo) => curateSources(photo)) || []].filter(Boolean);
if (data.thumbHover && data.thumbHover.baseName !== '849') { // placeholder image
release.photos = release.photos.concat([curateSources(data.thumbHover)]);
}
const trailerType = data.trailer?.type === 'previewTrailer'
? 'teaser'
: 'trailer';
release[trailerType] = data.trailer?.sources?.map((source) => ({
src: source.path,
quality: source.quality || source.height || null, // only available on teaser
expectType: {
'application/octet-stream': 'video/mp4',
},
}));
return release;
}
async function fetchLatest(channel, page = 1, { parameters }) {
// bonus-type scenes are third-party, don't include
const url = `${channel.origin.replace('www.', 'api.')}/v1/content-sets?types[]=Content&types[]=Tease&types[]=BTS&sort=latest&limit=27&offset=${(page - 1) * 27}`;
const res = await unprint.get(url, {
interface: 'request',
headers: {
'X-Site-Id': parameters.xSiteId,
},
});
if (res.ok && res.data?.items) {
return res.data.items.map((item) => scrapeScene(item, channel));
}
return res.status;
}
/* no entry ID, enable when two entry IDs are supported
async function fetchUpcoming(channel, _page, { parameters }) {
// unsure if site ID parameter is necessary when the header is present, but the site uses it
const url = `${channel.origin.replace('www.', 'api.')}/v1/content-sets/upcoming?siteId${parameters.xSiteId}`;
const res = await unprint.get(url, {
interface: 'request',
headers: {
'X-Site-Id': parameters.xSiteId,
},
});
if (res.ok && res.data) {
return res.data.map((item) => scrapeScene(item, channel));
}
return res.status;
}
*/
async function fetchScene(sceneUrl, entity, _baseRelease, { parameters }) {
// deep data not identical, contains photoset and full trailer
const slug = new URL(sceneUrl).pathname.match(/\/trailers\/(.*?)\.html/)[1];
const url = `${entity.origin.replace('www.', 'api.')}/v1/content-sets/${slug}`;
const res = await unprint.get(url, {
interface: 'request',
headers: {
'X-Site-Id': parameters.xSiteId,
},
});
if (res.ok && res.data) {
return scrapeScene(res.data, entity);
}
return res.status;
}
function scrapeProfile({ query }, url) {
const profile = { url };
const bio = Object.fromEntries(query.all('ul[class*="HeroModel_list"] li').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'p:first-child'), '_'),
unprint.query.content(bioEl, 'p:last-child'),
]));
profile.description = query.content('p[class*="HeroModel_text"]');
profile.birthPlace = bio.place_of_birth;
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, ['MMMM DD, YYYY', 'MM/DD/YYYY']);
profile.ethnicity = bio.ethnicity;
profile.measurements = bio.measurements;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.hairColor = bio.hair_color;
profile.eyes = bio.eye_color;
profile.foot = unprint.extractNumber(bio.feet_size, { match: /\((\d+(.\d+)?) eur\)/i, matchIndex: 1 });
if (/(natural)|(real)/i.test(bio.boob_type || bio.tits_type)) {
profile.naturalBoobs = true;
}
if (/(enhanced)|(fake)/i.test(bio.boob_type || bio.tits_type)) {
profile.naturalBoobs = false;
}
if (bio.tattoos) {
profile.hasTattoos = !/none/i.test(bio.tattoos);
profile.tattoos = profile.hasTattoos ? bio.tattoos : null;
}
if (bio.piercings) {
profile.hasPiercings = !/none/i.test(bio.piercings);
profile.piercings = profile.hasPiercings ? bio.piercings : null;
}
profile.avatar = query.sourceSet('picture[class*="modelImage"] img');
return profile;
}
async function getActorUrl(actor, { entity, parameters }) {
if (actor.url) {
return actor.url;
}
const url = `${entity.origin.replace('www.', 'api.')}/v1/models?limit=12&offset=0&query=&sort=latest&modelsNames[]=${slugify(actor.name, '+')}`;
const res = await unprint.get(url, {
interface: 'request',
headers: {
'X-Site-Id': parameters.xSiteId,
},
});
if (res.ok) {
const actorSlug = res.data.items?.find((item) => slugify(item.modelName) === actor.slug)?.slug;
if (actorSlug) {
return `${entity.origin}/tour/models/${actorSlug}.html`;
}
}
return null;
}
async function fetchProfile(actor, context) {
const actorUrl = await getActorUrl(actor, context);
if (!actorUrl) {
return null;
}
const res = await unprint.get(actorUrl);
if (res.ok) {
return scrapeProfile(res.context, actorUrl, context);
}
return res.status;
}
module.exports = {
fetchLatest,
// fetchUpcoming,
fetchProfile,
fetchScene,
};