forked from DebaucheryLibrarian/traxxx
228 lines
5.3 KiB
JavaScript
228 lines
5.3 KiB
JavaScript
'use strict';
|
||
|
||
const qu = require('../utils/qu');
|
||
const slugify = require('../utils/slugify');
|
||
|
||
function matchChannel(release, channel) {
|
||
const series = channel.children || channel.parent?.children;
|
||
|
||
if (!series) {
|
||
return null;
|
||
}
|
||
|
||
const serieNames = series.reduce((acc, serie) => ({
|
||
...acc,
|
||
[serie.name]: serie,
|
||
[serie.slug]: serie,
|
||
}), {});
|
||
|
||
serieNames.vr = serieNames.littlecapricevr;
|
||
serieNames.superprivat = serieNames.superprivatex;
|
||
serieNames.superprivate = serieNames.superprivatex;
|
||
serieNames.nasst = serieNames.nassty;
|
||
serieNames.sexlesson = serieNames.sexlessons;
|
||
|
||
// ensure longest key matches first
|
||
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
|
||
|
||
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
|
||
const serie = serieName && serieNames[slugify(serieName, '')];
|
||
|
||
if (serie) {
|
||
return {
|
||
channel: serie.slug,
|
||
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''),
|
||
};
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function scrapeAll(scenes, channel) {
|
||
return scenes.map(({ query, el }) => {
|
||
const release = {};
|
||
|
||
release.url = query.url('a');
|
||
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
|
||
|
||
release.title = query.cnt('.meta h3');
|
||
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
|
||
|
||
release.poster = {
|
||
src: query.img('img'),
|
||
referer: channel.url,
|
||
};
|
||
|
||
return {
|
||
...release,
|
||
...matchChannel(release, channel),
|
||
};
|
||
});
|
||
}
|
||
|
||
async function fetchPhotos(url) {
|
||
if (url) {
|
||
const res = await qu.get(url, '.et_post_gallery');
|
||
|
||
if (res.ok) {
|
||
return res.item.query.urls('a').map((imgUrl) => ({
|
||
src: imgUrl,
|
||
referer: url,
|
||
}));
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
async function scrapeScene({ query }, url, channel, include) {
|
||
const release = {};
|
||
|
||
const script = query.cnt('script.yoast-schema-graph');
|
||
const data = script && JSON.parse(script);
|
||
|
||
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
|
||
|
||
release.title = query.cnt('.vid_title');
|
||
release.description = query.cnt('.vid_desc p');
|
||
|
||
release.date = query.date('.vid_date', 'MMMM D, YYYY');
|
||
release.duration = query.dur('.vid_length');
|
||
|
||
release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({
|
||
name: query.cnt(actorEl),
|
||
url: query.url(actorEl, null),
|
||
}));
|
||
|
||
release.tags = query.cnts('.vid_infos a[rel="tag"]');
|
||
|
||
const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject');
|
||
|
||
const poster = posterData?.url
|
||
|| query.q('meta[property="og:image"]', 'content')
|
||
|| query.q('meta[name="twitter:image"]', 'content');
|
||
|
||
release.poster = {
|
||
src: poster,
|
||
referer: url,
|
||
};
|
||
|
||
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
|
||
|
||
if (include.photos) {
|
||
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
||
}
|
||
|
||
release.trailer = {
|
||
src: query.video(),
|
||
type: query.video('source', 'type'),
|
||
quality: query.video('source', 'data-res'),
|
||
referer: url,
|
||
};
|
||
|
||
return {
|
||
...release,
|
||
...matchChannel(release, channel),
|
||
};
|
||
}
|
||
|
||
function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) {
|
||
const profile = { url, gender };
|
||
|
||
profile.age = query.number('div:nth-child(2) > p');
|
||
profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1];
|
||
|
||
profile.description = query.cnt('div:nth-child(4) > p');
|
||
|
||
profile.avatar = {
|
||
src: query.img('.model-page'),
|
||
referer: url,
|
||
};
|
||
|
||
profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity);
|
||
|
||
return profile;
|
||
}
|
||
|
||
async function fetchLatest(channel) {
|
||
// no apparent pagination, all updates on one page
|
||
// using channels in part because main overview contains indistinguishable photo albums
|
||
// however, some serie pages contain videos from other series
|
||
const res = await qu.getAll(channel.url, '.project');
|
||
|
||
if (res.ok) {
|
||
return scrapeAll(res.items, channel);
|
||
}
|
||
|
||
return res.status;
|
||
}
|
||
|
||
async function fetchScene(url, channel, baseRelease, include) {
|
||
const res = await qu.get(url);
|
||
|
||
if (res.ok) {
|
||
return scrapeScene(res.item, url, channel, include);
|
||
}
|
||
|
||
return res.status;
|
||
}
|
||
|
||
async function getActorUrl(baseActor, gender = 'female') {
|
||
if (baseActor.url) {
|
||
return baseActor.url;
|
||
}
|
||
|
||
const overviewUrl = gender === 'female'
|
||
? 'https://www.littlecaprice-dreams.com/pornstars/'
|
||
: 'https://www.littlecaprice-dreams.com/male-models-pornstars/';
|
||
|
||
const overviewRes = await qu.getAll(overviewUrl, '.models');
|
||
|
||
if (!overviewRes.ok) {
|
||
return overviewRes.status;
|
||
}
|
||
|
||
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
|
||
|
||
if (!actorItem) {
|
||
if (gender === 'female') {
|
||
return getActorUrl(baseActor, 'male');
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
const actorUrl = actorItem.query.url('a');
|
||
|
||
if (actorUrl) {
|
||
return {
|
||
url: actorUrl,
|
||
gender,
|
||
};
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
async function fetchProfile(baseActor, { entity }) {
|
||
const actorUrl = await getActorUrl(baseActor);
|
||
|
||
if (!actorUrl) {
|
||
return null;
|
||
}
|
||
|
||
const actorRes = await qu.get(actorUrl.url, '#main-content');
|
||
|
||
if (actorRes.ok) {
|
||
return scrapeProfile(actorRes.item, actorUrl, baseActor, entity);
|
||
}
|
||
|
||
return actorRes.status;
|
||
}
|
||
|
||
module.exports = {
|
||
fetchLatest,
|
||
fetchScene,
|
||
fetchProfile,
|
||
};
|