traxxx/src/scrapers/badoink.js

145 lines
4.8 KiB
JavaScript

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
function getPoster(posterSources) {
if (posterSources?.[0]) {
const { origin, pathname } = new URL(posterSources[0]);
return [
`${origin}${pathname}`,
...posterSources,
];
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a', 'href', { origin: channel.url });
release.entryId = query.q(null, 'data-video-card-scene-id') || query.q('video', 'data-video-preview-scene-id') || new URL(release.url).pathname.match(/(\d+)\/?$/)?.[1];
release.title = query.cnt('.video-card-title span') || query.q('.video-card-title', 'title');
release.description = query.cnt('.video-card-description');
release.date = query.date('.video-card-upload-date', 'YYYY-MM-DD HH:mm:ss', null, 'content') || query.date('.video-card-upload-date', 'MMMM DD, YYYY');
release.duration = query.duration('.video-card-duration', null, 'content') || query.number('.video-card-duration') * 60;
release.actors = query.all('.video-card-details--cast a').map((el) => ({
name: qu.query.cnt(el),
url: qu.query.url(el, null, 'href', { origin: channel.url }),
}));
release.tags = query.cnts('.video-card-tags a');
const posterSources = query.srcset('picture source', 'data-srcset') || [query.img('.video-card-image', 'data-src')];
release.poster = getPoster(posterSources);
release.teaser = query.video('video') || (channel.parameters?.teasers !== false ? `https://p.badoinkvr.com/d/scene_files/${release.entryId}/preview/preview.mp4` : null);
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1];
release.title = query.cnt('.video-title', '.breadcrumbs__item:last-child [itemprop=name]');
release.description = query.cnt('.video-description');
release.date = query.date('.video-upload-date', 'YYYY-MM-DD HH:mm:ss', null, 'content') || query.date('.video-upload-date', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = query.duration('.video-duration', null, 'content') || query.number('.video-duration') * 60;
release.actors = query.all('.video-actors a').map((el) => ({
name: qu.query.cnt(el),
url: qu.query.url(el, null, 'href', { origin: channel.url }),
}));
release.tags = query.cnts('.video-tags a');
const posterSources = query.srcset('.video-image-container picture source', 'srcset') || [query.img('.video-image')];
release.poster = getPoster(posterSources);
const photos = query.imgs('.gallery .gallery-item', 'data-big-image');
const extraPhotoCount = query.number('.gallery .gallery-item:last-child', /\d+/, 'data-desktop-text');
const lastPhoto = photos.slice(-1)[0];
const photosHaveIndex = /_\d+\./.test(lastPhoto);
release.photos = [
...photos,
...Array.from(photosHaveIndex ? { length: extraPhotoCount } : [], (value, index) => lastPhoto.replace(/_\d+\./, `_${index + photos.length + 1}.`)),
];
release.teaser = channel.parameters?.teasers !== false ? `https://p.badoinkvr.com/d/scene_files/${release.entryId}/preview/preview.mp4` : null;
return release;
}
function scrapeProfile({ query }, url, entity) {
const profile = { url };
const bio = query.all('.girl-details-stats-item').reduce((acc, el) => ({
...acc,
[slugify(qu.query.cnt(el, '.girl-details-stat'))]: qu.query.cnt(el, '.girl-details-stat-value'),
}), {});
profile.description = query.cnt('.girl-details-bio');
profile.age = bio.age;
profile.birthPlace = bio.country;
profile.ethnicity = bio.ethnicity;
profile.height = convert(bio.height, 'cm');
profile.weight = convert(bio.weight, 'lb', 'kg');
profile.measurements = bio.measurements;
profile.hairColor = bio.hair;
profile.eyes = bio.eyes;
const avatarSources = query.srcset('.girl-details-photo-content picture source', 'srcset') || [query.img('.girl-details-photo')];
profile.avatar = getPoster(avatarSources);
profile.social = query.urls('.girl-details-social-media-list a');
profile.scenes = scrapeAll(qu.initAll(query.all('.video-card')), entity);
return profile;
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`${channel.url}/${channel.parameters?.latest || 'vrpornvideos'}/${page}`, '.video-card', {
Cookie: 'affsubid=12345-;', // required to show teaser video, exact number doesn't seem to matter
});
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchProfile(baseActor, { entity }) {
const url = `${entity.url}/${entity.parameters?.actor || 'pornstar'}/${slugify(baseActor.name, '')}/`;
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.item, url, entity);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene,
};