'use strict'; const unprint = require('unprint'); const format = require('template-format'); const slugify = require('../utils/slugify'); const tryUrls = require('../utils/try-urls'); const { convert } = require('../utils/convert'); function deriveEntryId(release) { if (release.date && release.url) { const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1]; return `${slugify(unprint.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`; } if (release.date && release.title) { return `${slugify(unprint.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; } return null; } function extractPoster(posterPath, site, baseRelease) { if (posterPath && !/400.jpg/.test(posterPath)) { const poster = unprint.prefixUrl(posterPath, site.parameters?.media || site.url); const posterSources = [ poster, // upscaled poster.replace('-1x', '-2x'), poster.replace('-1x', '-3x'), ]; if (baseRelease?.poster) { return [posterSources, [baseRelease.poster]]; } return [posterSources, []]; } return [baseRelease?.poster || null, []]; } function getImageWithFallbacks(query, selector, site, el) { const sources = el ? [ unprint.query.attribute(el, selector, 'src0_3x'), unprint.query.attribute(el, selector, 'src0_2x'), unprint.query.attribute(el, selector, 'src0_1x'), ] : [ query.attribute(selector, 'src0_3x'), query.attribute(selector, 'src0_2x'), query.attribute(selector, 'src0_1x'), ]; return sources.filter(Boolean).map((src) => unprint.prefixUrl(src, site.parameters?.media || site.url)); } function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.title = query.content('h4 a'); release.url = query.url('a'); release.date = query.date('.date', 'YYYY-MM-DD'); release.duration = query.duration('.time'); const count = query.number('a img', { attribute: 'cnt' }); [release.poster, ...release.photos] = Array.from({ length: count }, (_value, index) => [ query.img('a img', { attribute: `src${index}_3x`, origin: channel.url }), query.img('a img', { attribute: `src${index}_2x`, origin: channel.url }), query.img('a img', { attribute: `src${index}_1x`, origin: channel.url }), ]); release.stars = query.count('img[src*="star_full"]') + (query.count('img[src*="star_half"]') * 0.5); release.entryId = deriveEntryId(release); return release; }); } function scrapeAllT1(scenes, site, accNetworkReleases) { return scenes.map(({ query }) => { const release = {}; release.title = query.attribute('h4 a', 'title') || query.content('h4 a'); release.url = query.url('h4 a'); release.date = query.date('.more-info-div', 'MMM D, YYYY'); release.duration = query.duration('.more-info-div'); if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes']; const posterPath = query.attribute('.img-div img', 'src0_1x') || query.img('img.video_placeholder'); if (posterPath) { const poster = unprint.prefixUrl(posterPath, site.parameters?.media || site.url); release.poster = [ poster.replace('-1x', '-3x'), poster.replace('-1x', '-2x'), poster, ]; } // release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1]; release.entryId = deriveEntryId(release); if (site.parameters?.accFilter && accNetworkReleases?.map((accRelease) => accRelease.entryId).includes(release.entryId)) { // filter out releases that were already scraped from a categorized site, requeryires sequeryential site scraping return null; } return release; }).filter(Boolean); } async function fetchLatest(site, page = 1, _include, { uniqueReleases = [], duplicateReleases = [] }) { const url = (site.parameters?.latest && format(site.parameters.latest, { page })) || (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`) || `${site.url}/categories/movies_${page}_d.html`; const res = await unprint.get(url, { selectAll: '.modelfeature, .item-video, .updateItem' }); if (!res.ok) { return res.status; } if (site.parameters?.t1) { return scrapeAllT1(res.context, site, [...uniqueReleases, ...duplicateReleases]); } return scrapeAll(res.context, site, uniqueReleases); } function scrapeScene({ html, query }, channel, url) { const release = { url }; // url used for entry ID release.title = query.content('.videoDetails h3'); release.description = query.content('.videoDetails p'); release.date = query.date('.videoInfo p', ['MM/DD/YYYY', 'YYYY-MM-DD']); release.duration = Number(query.content('.videoInfo p:nth-of-type(2)')?.match(/(\d+) min/i)?.[1]) * 60; release.actors = query.contents('.update_models a'); const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1]; const poster = unprint.prefixUrl(posterPath, channel.url) || query.img('.update_thumb', 'src0_1x', { origin: channel.url }); // latter used when trailer requires signup [release.poster, ...release.photos] = [poster, ...query.imgs('.item-thumb img', 'src0_1x', { origin: channel.url })] .map((src) => src && [ src.replace('-1x', '-3x'), src.replace('-1x', '-2x'), src, ]); const trailerPath = html.match(/\/trailers?\/.*.mp4/); if (trailerPath) { release.trailer = unprint.prefixUrl(trailerPath, channel.parameters?.media || channel.url); } release.tags = query.contents('.featuring a[href*="categories/"]'); release.stars = query.count('.stars img[src*="star_full"]') + (query.count('.stars img[src*="star_half"]') * 0.5); release.entryId = deriveEntryId(release); return release; } function scrapeSceneT1({ html, query }, site, url, baseRelease) { const release = { url }; release.title = query.content('.trailer-section-head .section-title'); release.description = query.text('.row .update-info-block'); release.date = query.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); release.duration = query.duration('.update-info-row:nth-child(2)'); release.actors = query.all('.models-list-thumbs a').map((el) => ({ name: unprint.query.content(el, 'span'), avatar: getImageWithFallbacks(query, 'img', site, el), })); release.tags = query.contents('.tags a'); // const posterPath = html.match(/poster="(.*\.jpg)/)?.[1]; const posterPath = query.img('.player-thumb img', { attribute: 'src0_1x' }); const trailer = html.match(/ channel.parameters?.match || channel.name).join('|'), 'i'); const channel = release.tags.find((tag) => channelRegExp.test(tag)); if (channel) { release.channel = slugify(channel, ''); } } release.entryId = deriveEntryId(release); return release; } async function fetchScene(url, site, baseRelease) { const res = await unprint.get(url); if (!res.ok) { return res.status; } if (site.parameters?.t1) { return scrapeSceneT1(res.context, site, url, baseRelease); } return scrapeScene(res.context, site, url, baseRelease); } async function fetchActorScenes({ query }, channel, accScenes = []) { const scenes = scrapeAll(unprint.initAll(query.all('.item-video')), channel); const nextPage = query.url('.next a'); if (nextPage) { const res = await unprint.get(nextPage); if (res.ok) { return fetchActorScenes(res.context, channel, scenes.concat(accScenes)); } } return accScenes.concat(scenes); } async function scrapeProfile({ query }, url, channel, options) { const profile = { url }; const bio = query.all('.stats li').reduce((acc, bioEl) => { const key = unprint.query.content(bioEl, 'strong'); const value = unprint.query.url(bioEl, null) || unprint.query.text(bioEl); return { ...acc, [slugify(key, '_')]: value, }; }, {}); if (bio.date_of_birth) profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY'); if (bio.birthplace) profile.birthPlace = bio.birthplace; if (bio.fun_fact) profile.description = bio.fun_fact; if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]); if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]); if (bio.shoe_size) profile.foot = Number(bio.shoe_size); profile.measurements = bio.measurements; if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || convert(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1], 'cm')) || null; if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || convert(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1], 'cm')) || null; if (bio.circumcised && /yes/i.test(bio.circumcised)) profile.isCircumcised = true; if (bio.circumcised && /no/i.test(bio.circumcised)) profile.isCircumcised = false; if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true; if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false; if (bio.tattoos && /(yes)|(some)|(many)/i.test(bio.tattoos)) profile.hasTattoos = true; if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false; if (bio.piercings && /(yes)|(some)|(many)/i.test(bio.piercings)) profile.hasPiercings = true; if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false; if (bio.aliases) profile.aliases = bio.aliases.split(',').map((alias) => alias.trim()); profile.socials = [bio.onlyfans, bio.twitter, bio.instagram, bio.domain].filter(Boolean); profile.avatar = [ query.img('.profile-pic img', { attribute: 'src0_3x', origin: channel.url }), query.img('.profile-pic img', { attribute: 'src0_2x', origin: channel.url }), query.img('.profile-pic img', { attribute: 'src0_1x', origin: channel.url }), ]; if (options.includeActorScenes) { profile.releases = await fetchActorScenes({ query }, channel); } return profile; } function scrapeProfileT1({ query }, url, site) { const profile = { url }; const bio = query.contents('.detail-div + .detail-div p, .detail-div p').reduce((acc, info) => { const [key, value] = info.split(':'); if (!value) return acc; return { ...acc, [slugify(key, '_')]: value.trim(), }; }, {}); profile.measurements = bio.measurements; if (bio.fun_fact) profile.description = bio.fun_fact; if (bio.age) profile.age = Number(bio.age); const heightMetric = bio.height?.match(/(\d{3})(\b|c)/); const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g); if (heightMetric) { profile.height = Number(heightMetric[1]); } if (heightImperial) { profile.height = convert(`${heightImperial[0]}' ${heightImperial[1]}"`, 'cm'); } profile.avatar = getImageWithFallbacks(query, '.img-div img', site); const qReleases = unprint.initAll(query.all('.item-video')); profile.releases = scrapeAllT1(qReleases, site); return profile; } async function fetchProfile({ name: actorName }, { channel }, options) { const actorSlugA = slugify(actorName, '', { lower: false }); const actorSlugB = slugify(actorName); const t1 = channel.parameters?.t1 ? 't1/' : ''; const { res, url } = channel.parameters?.profile ? await tryUrls([ format(channel.parameters.profile, { actor: actorSlugA }), format(channel.parameters.profile, { actor: actorSlugB }), ], { followRedirects: false }) : await tryUrls([ `${channel.url}/${t1}models/${actorSlugA}.html`, `${channel.url}/${t1}models/${actorSlugB}.html`, ], { followRedirects: false }); if (!res.ok) { return res.status; } if (channel.parameters?.t1) { return scrapeProfileT1(res.context, url, channel); } return scrapeProfile(res.context, url, channel, options); } module.exports = { fetchLatest, fetchScene, fetchProfile, scrapeAllT1, };