'use strict'; const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); function matchChannel(release, channel) { const series = channel.children || channel.parent?.children; if (!series) { return null; } const serieNames = series.reduce((acc, serie) => ({ ...acc, [serie.name]: serie, [serie.slug]: serie, }), {}); serieNames.vr = serieNames.littlecapricevr; serieNames.superprivat = serieNames.superprivatex; serieNames.superprivate = serieNames.superprivatex; serieNames.nasst = serieNames.nassty; serieNames.sexlesson = serieNames.sexlessons; // ensure longest key matches first const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length); const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0]; const serie = serieName && serieNames[slugify(serieName, '')]; if (serie) { return { channel: serie.slug, title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''), }; } return null; } function scrapeAll(scenes, channel) { return scenes.map(({ query, el }) => { const release = {}; release.url = query.url('a'); release.entryId = query.q(el, null, 'id')?.match(/post-(\d+)/)?.[1]; release.title = query.cnt('.meta h3'); release.date = query.date('.meta .post-meta', 'MMMM D, YYYY'); release.poster = { src: query.img('img'), referer: channel.url, }; return { ...release, ...matchChannel(release, channel), }; }); } async function fetchPhotos(url) { if (url) { const res = await qu.get(url, '.et_post_gallery'); if (res.ok) { return res.item.query.urls('a').map((imgUrl) => ({ src: imgUrl, referer: url, })); } } return null; } async function scrapeScene({ query }, url, channel, include) { const release = {}; const script = query.cnt('script.yoast-schema-graph'); const data = script && JSON.parse(script); release.entryId = query.q('article.project', 'id')?.match(/post-(\d+)/)?.[1]; release.title = query.cnt('.vid_title'); release.description = query.cnt('.vid_desc p'); release.date = query.date('.vid_date', 'MMMM D, YYYY'); release.duration = query.dur('.vid_length'); release.actors = query.all('.vid_infos a[href*="author/"]').map((actorEl) => ({ name: query.cnt(actorEl), url: query.url(actorEl, null), })); release.tags = query.cnts('.vid_infos a[rel="tag"]'); const posterData = data['@graph']?.find((item) => item['@type'] === 'ImageObject'); const poster = posterData?.url || query.q('meta[property="og:image"]', 'content') || query.q('meta[name="twitter:image"]', 'content'); release.poster = { src: poster, referer: url, }; release.stars = Math.min(Number(query.q('.post-ratings-image', 'title')?.match(/average:\s*(\d\.\d+)/)?.[1]), 5) || null; // rating out of 5, yet sometimes 5.07? if (include.photos) { release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]')); } release.trailer = { src: query.video(), type: query.video('source', 'type'), quality: query.video('source', 'data-res'), referer: url, }; return { ...release, ...matchChannel(release, channel), }; } function scrapeProfile({ query, el }, { url, gender }, baseActor, entity) { const profile = { url, gender }; profile.age = query.number('div:nth-child(2) > p'); profile.birthPlace = query.cnt('div:nth-child(3) > p')?.match(/nationality[\s:]+(\w+)/i)?.[1]; profile.description = query.cnt('div:nth-child(4) > p'); profile.avatar = { src: query.img('.model-page'), referer: url, }; profile.scenes = scrapeAll(qu.initAll(el, '.project_category-videos'), entity); return profile; } async function fetchLatest(channel) { // no apparent pagination, all updates on one page // using channels in part because main overview contains indistinguishable photo albums // however, some serie pages contain videos from other series const res = await qu.getAll(channel.url, '.project'); if (res.ok) { return scrapeAll(res.items, channel); } return res.status; } async function fetchScene(url, channel, baseRelease, include) { const res = await qu.get(url); if (res.ok) { return scrapeScene(res.item, url, channel, include); } return res.status; } async function getActorUrl(baseActor, gender = 'female') { if (baseActor.url) { return baseActor.url; } const overviewUrl = gender === 'female' ? 'https://www.littlecaprice-dreams.com/pornstars/' : 'https://www.littlecaprice-dreams.com/male-models-pornstars/'; const overviewRes = await qu.getAll(overviewUrl, '.models'); if (!overviewRes.ok) { return overviewRes.status; } const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug); if (!actorItem) { if (gender === 'female') { return getActorUrl(baseActor, 'male'); } return null; } const actorUrl = actorItem.query.url('a'); if (actorUrl) { return { url: actorUrl, gender, }; } return null; } async function fetchProfile(baseActor, { entity }) { const actorUrl = await getActorUrl(baseActor); if (!actorUrl) { return null; } const actorRes = await qu.get(actorUrl.url, '#main-content'); if (actorRes.ok) { return scrapeProfile(actorRes.item, actorUrl, baseActor, entity); } return actorRes.status; } module.exports = { fetchLatest, fetchScene, fetchProfile, };