Files
traxxx/src/scrapers/littlecapricedreams.js

252 lines
6.2 KiB
JavaScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { stripQuery } = require('../utils/url');
const { convert } = require('../utils/convert');
const channelMap = {
vr: 'littlecapricevr',
vrporn: 'littlecapricevr',
superprivat: 'superprivatex',
superprivate: 'superprivatex',
nasst: 'nassty',
sexlesson: 'sexlessons',
};
function matchChannel(release, channel) {
const series = channel.children || channel.parent?.children;
if (!series) {
return null;
}
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
// ensure longest key matches first
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
const serieName = release.title?.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serieSlug = slugify(serieName, '');
const serie = serieName && serieNames[channelMap[serieSlug] || serieSlug];
if (serie) {
return serie.slug;
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url(null);
release.entryId = query.attribute(null, 'class').match(/project-(\d{3,})/)?.[1];
release.title = query.content('h2')?.trim().replace(/\.\.\.$/, '');
const poster = query.img('img');
if (poster) {
release.poster = [
stripQuery(poster),
poster,
].map((src) => ({
src,
referer: channel.url,
}));
}
release.channel = matchChannel(release, channel);
return release;
});
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
// however, some serie pages contain videos from other series
const res = await unprint.get(channel.url, { selectAll: '.project-type-video' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function fetchAlbumUrl(sceneUrl) {
// Upjax-Action query is redundant, but imitates original request
const res = await unprint.get(`${sceneUrl}?endpoint_request_timestamp=${Math.floor(Date.now() / 1000)}&Upjax-Action=lcd.project.actions`, {
headers: {
Referer: sceneUrl,
'Upjax-Action': 'lcd.project.actions',
'Upjax-Method': 'GET',
},
});
if (res.ok) {
const albumUrl = res.data.js?.match(/"(https.*?)"/)?.[1];
if (albumUrl) {
return albumUrl;
}
}
return null;
}
async function attachPhotos(sceneUrl, release) {
const albumUrl = await fetchAlbumUrl(sceneUrl);
if (albumUrl) {
const res = await unprint.get(albumUrl);
if (res.ok) {
release.photos = res.context.query.imgs('.gallery img').map((imgUrl) => ({ // eslint-disable-line no-param-reassign
src: imgUrl,
referer: sceneUrl,
}));
release.photoCount = res.context.query.number('.image-amount'); // eslint-disable-line no-param-reassign
}
}
return null;
}
async function scrapeScene({ query }, { url, include }) {
const release = {};
release.entryId = query.attribute('#main-project-content', 'class').match(/project-(\d{3,})/)?.[1];
release.title = query.content('.project-header h1');
release.description = query.content('.desc-text');
release.date = query.date('.relese-date', 'D. MMM YYYY', { match: /\d{1,2}\. \w{3} \d{4}/ }); // sic
release.duration = query.duration('.video-duration');
release.actors = query.all('.project-models .list a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.project-tags a[href*="videos/#"]');
const poster = query.attribute('meta[property="og:image"]', 'content')
|| query.attribute('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
if (include.photos) {
await attachPhotos(url, release);
}
const trailerFrame = query.url('.video iframe', { attribute: 'src' });
const trailerId = trailerFrame?.match(/\/embed\/\d+\/([a-z0-9-]+)/)?.[1];
if (trailerId) {
release.trailer = {
stream: `https://trailer.littlecaprice-dreams.com/${trailerId}/playlist.m3u8`,
quality: 1080,
referer: url,
};
}
const channelSlug = slugify(query.content('.project-tags a[href*="collection/"]'), '');
release.channel = channelMap[channelSlug] || channelSlug;
return release;
}
function scrapeProfile({ query }, { url, avatar }, entity) {
const profile = { url };
profile.nationality = query.content('.info h2').match(/nationality: (\w+)/i)?.[1];
profile.cup = query.content('.info h2').match(/cu[pb]-size: (\w{1,2})/i)?.[1]; // sic
profile.measurements = query.content('.info h2').match(/\d{2}-\d{2}-\d{2}/i)?.[0]; // sic
profile.height = convert(query.content('.info h2')?.match(/\d \d{1,2}″/)?.[0], 'cm');
const description = query.content('.info div:last-child');
if (!/coming soon/i.test(description) || description.length > 50) {
profile.description = description;
}
if (avatar) {
profile.avatar = [
stripQuery(avatar),
avatar,
].map((src) => ({
src,
referer: url,
}));
}
profile.photos = query.imgs('.img-poster');
profile.scenes = scrapeAll(unprint.initAll(query.all('.project-type-video')), entity);
return profile;
}
async function getActorUrl(baseActor) {
// male performers are listed, but hidden
const overviewRes = await unprint.get('https://www.littlecaprice-dreams.com/models/', { selectAll: '.model-preview' });
if (!overviewRes.ok) {
return overviewRes.status;
}
const actorItem = overviewRes.context.find(({ query }) => slugify(query.text('h2')) === baseActor.slug);
if (!actorItem) {
return null;
}
const actorUrl = actorItem.query.url(null);
const actorAvatar = actorItem.query.img();
if (actorUrl) {
return {
url: actorUrl,
avatar: actorAvatar,
};
}
return null;
}
async function fetchProfile(baseActor, { entity }) {
// using search for avatar, not on model page
const actorResult = await getActorUrl(baseActor);
if (!actorResult) {
return null;
}
const actorRes = await unprint.get(actorResult.url, { select: '.model-page' });
if (actorRes.ok) {
return scrapeProfile(actorRes.context, actorResult, entity);
}
return actorRes.status;
}
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene,
};