traxxx/src/scrapers/littlecapricedreams.js

204 lines
4.7 KiB
JavaScript
Raw Normal View History

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function matchChannel(release, channel) {
const series = channel.children || channel.parent.children;
const serieNames = series.reduce((acc, serie) => ({
...acc,
[serie.name]: serie,
[serie.slug]: serie,
}), {});
serieNames.vr = serieNames.littlecapricevr;
serieNames.superprivat = serieNames.superprivatex;
serieNames.superprivate = serieNames.superprivatex;
serieNames.nasst = serieNames.nassty;
serieNames.sexlesson = serieNames.sexlessons;
// ensure longest key matches first
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')];
if (serie) {
return {
slug: serie.slug,
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-:/]+\\s*`, 'ig'), ''),
};
}
return null;
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query, el }) => {
const release = {};
release.url = query.url('a');
release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.meta h3');
release.date = query.date('.meta .post-meta', 'MMMM D, YYYY');
release.poster = {
src: query.img('img'),
referer: channel.url,
};
return {
...release,
...matchChannel(release, channel),
};
});
}
async function fetchPhotos(url) {
if (url) {
const res = await qu.get(url, '.et_post_gallery');
if (res.ok) {
return res.item.query.urls('a').map(imgUrl => ({
src: imgUrl,
referer: url,
}));
}
}
return null;
}
async function scrapeScene({ query }, url, channel, include) {
const release = {};
const script = query.cnt('script.yoast-schema-graph');
const data = script && JSON.parse(script);
release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1];
release.title = query.cnt('.vid_title');
release.description = query.cnt('.vid_desc p');
release.date = query.date('.vid_date', 'MMMM D, YYYY');
release.duration = query.dur('.vid_length');
release.actors = query.all('.vid_infos a[href*="author/"]').map(actorEl => ({
name: query.cnt(actorEl),
url: query.url(actorEl, null),
}));
release.tags = query.cnts('.vid_infos a[rel="tag"]');
const posterData = data['@graph']?.find(item => item['@type'] === 'ImageObject');
const poster = posterData?.url
|| query.q('meta[property="og:image"]', 'content')
|| query.q('meta[name="twitter:image"]', 'content');
release.poster = {
src: poster,
referer: url,
};
release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07?
if (include.photos) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
}
return {
...release,
...matchChannel(release, channel),
};
}
function scrapeProfile({ query }, url) {
const profile = {};
const bio = query.cnts('div p').reduce((acc, item) => {
const [key, value] = item.split(/\s*:\s*/);
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
profile.avatar = {
src: query.img('.model-page'),
referer: url,
};
console.log(bio);
console.log(profile);
return profile;
}
async function fetchLatest(channel) {
// no apparent pagination, all updates on one page
// using channels in part because main overview contains indistinguishable photo albums
// however, some serie pages contain videos from other series
const res = await qu.getAll(channel.url, '.project');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel, baseRelease, include) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, url, channel, include);
}
return res.status;
}
async function getActorUrl(baseActor) {
if (baseActor.url) {
return baseActor.url;
}
const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models');
if (!overviewRes.ok) {
return overviewRes.status;
}
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
if (!actorItem) {
return null;
}
return actorItem.query.url('a');
}
async function fetchProfile(baseActor, entity) {
const actorUrl = await getActorUrl(baseActor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl, '#main-content');
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, entity);
}
return actorRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};