traxxx/src/scrapers/littlecapricedreams.js

228 lines
5.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent?.children;
if (!series) {
return null;
}
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
serieNames.vr = serieNames.littlecapricevr;
serieNames.superprivat = serieNames.superprivatex;
serieNames.superprivate = serieNames.superprivatex;
serieNames.nasst = serieNames.nassty;
serieNames.sexlesson = serieNames.sexlessons;
// ensure longest key matches first
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
if (serie) {
return {
channel: serie.slug,
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-:/]+\\s*`, 'ig'), ''),
};
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query, el }) => {
const release = {};
release.url = query.url('a');
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.meta h3');
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
release.poster = {
src: query.img('img'),
referer: channel.url,
};
return {
...release,
...matchChannel(release, channel),
};
});
}
async function fetchPhotos(url) {
if (url) {
const res = await qu.get(url, '.et_post_gallery');
if (res.ok) {
return res.item.query.urls('a').map((imgUrl) => ({
src: imgUrl,
referer: url,
}));
}
}
return null;
}
async function scrapeScene({ query }, url, channel, include) {
const release = {};
const script = query.cnt('script.yoast-schema-graph');
const data = script && JSON.parse(script);
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.vid_title');
release.description = query.cnt('.vid_desc p');
release.date = query.date('.vid_date', 'MMMM D, YYYY');
release.duration = query.dur('.vid_length');
release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.tags = query.cnts('.vid_infos a[rel="tag"]');
const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject');
const poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
if (include.photos) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
}
release.trailer = {
src: query.video(),
type: query.video('source', 'type'),
quality: query.video('source', 'data-res'),
referer: url,
};
return {
...release,
...matchChannel(release, channel),
};
}
function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
const profile = { url, gender };
profile.age = query.number('div:nth-child(2) > p');
profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
profile.description = query.cnt('div:nth-child(4) > p');
profile.avatar = {
src: query.img('.model-page'),
referer: url,
};
profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
return profile;
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
// however, some serie pages contain videos from other series
const res = await qu.getAll(channel.url, '.project');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel, baseRelease, include) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel, include);
}
return res.status;
}
async function getActorUrl(baseActor, gender = 'female') {
if (baseActor.url) {
return baseActor.url;
}
const overviewUrl = gender === 'female'
? 'https://www.littlecaprice-dreams.com/pornstars/'
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
const overviewRes = await qu.getAll(overviewUrl, '.models');
if (!overviewRes.ok) {
return overviewRes.status;
}
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
if (!actorItem) {
if (gender === 'female') {
return getActorUrl(baseActor, 'male');
}
return null;
}
const actorUrl = actorItem.query.url('a');
if (actorUrl) {
return {
url: actorUrl,
gender,
};
}
return null;
}
async function fetchProfile(baseActor, { entity }) {
const actorUrl = await getActorUrl(baseActor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl.url, '#main-content');
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
}
return actorRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};