'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); const { stripQuery } = require('../utils/url'); function scrapeAllA(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a.thumb-img, a.thumb', { origin: channel.url, protocol: 'http' }); release.entryId = new URL(release.url).pathname.match(/(\d+)\/?$/)?.[1]; release.title = query.text('.thumb-title, .title'); release.date = query.date('.thumb-added, .date', ['MMM D, YYYY', 'MMMM DD, YYYY'], /\w+ \d{1,2}, \d{4}/); release.duration = query.duration('.thumb-duration'); release.actors = query.all('.thumb-models a, .models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null, { origin: channel.url, protocol: 'http' }), })); const [, photoUrl, photoCount] = query.attribute('.thumb-img img', 'onmouseover')?.match(/'(.*)', (\d+)\)/) || []; if (photoUrl && photoCount) { [release.poster, ...release.photos] = Array.from({ length: 5 }, (_value, index) => unprint.prefixUrl(`${photoUrl}${index + 1}.jpg`, channel.origin, { protocol: 'http' })); } else { release.poster = query.img('.thumb-img img, .thumb img', { origin: channel.url, protocol: 'http' }); } release.tags = query.contents('.tags a'); release.rating = query.number('.thumb-rating'); return release; }); } async function fetchLatestA(channel, page) { const url = channel.parameters?.latest ? `${channel.parameters.latest}/${page}` : `${channel.url}/latest-updates/${page}/`; const res = await unprint.get(url, { selectAll: '.list-thumbs ul > li, .main-thumbs > li' }); if (res.ok) { return scrapeAllA(res.context, channel); } return res.status; } function scrapeAllB(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.title = query.content('.title, h2'); release.duration = query.duration('.time'); const description = query.content('.description, p textarea'); if (!/there is no description/i.test(description)) { release.description = description; } release.poster = query.poster('#player, #example_video_1', { origin: channel.origin, protocol: 'http' }); release.trailer = query.video('#player source, #example_video_1 source', { origin: channel.origin, protocol: 'http' }); release.photos = query.imgs('img[src*="sets/"], img[src*="thumbnails/"]', { origin: channel.origin, protocol: 'http' }); release.entryId = release.poster?.match(/\/sets\/(.*)\//)?.[1] || slugify(release.title); return release; }); } async function fetchLatestB(channel, page) { const url = channel.parameters?.paginated ? `${channel.url}/page/${page}` : channel.url; const res = await unprint.get(url, { selectAll: '#container, article:not(.sortby)', parser: { runScripts: 'dangerously', }, }); if (res.ok) { return scrapeAllB(res.context, channel); } return res.status; } function scrapeSceneA({ query }, url, channel) { const release = {}; release.entryId = new URL(url).pathname.match(/(\d+)\/?$/)?.[1]; release.title = query.content('.title, .scene-title h3').replace(/:$/, ''); const description = query.content('.text-desc p, .info-description p'); if (!/there is no description/i.test(description)) { release.description = description; } release.duration = query.duration('.media-body li span, .duration'); release.actors = query.all('.media-body a[href*="models/"], .models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null, { origin: channel.url, protocol: 'http' }), })); release.tags = query.contents('.media-body a[href*="tags/"], .tags a'); release.poster = Array.from(new Set([ query.img('.player-preview', { protocol: 'http' }), unprint.prefixUrl(`/contents/videos_screenshots/0/${release.entryId}/preview_trailer.mp4.jpg`, channel.url, { protocol: 'http' }), unprint.prefixUrl(query.attribute('param[name="flashvars"]', 'value')?.match(/poster=(.*\.jpg)/)?.[1], channel.url, { protocol: 'http' }), unprint.prefixUrl(`/contents/scenes/${release.entryId}/thumbnails/920x518.jpg`, channel.url, { protocol: 'http' }), ].filter(Boolean))); release.photos = query.urls('.thumb-album a:not([href="#"]), .thumbs-photo a:not([href*="signup"])', { origin: channel.url, protocol: 'http' }) .concat(query.imgs('.thumb-album a[href="#"] img, .thumbs-photo a[href*="signup"] img', { origin: channel.url, protocol: 'http' })); release.trailer = stripQuery(query.url('a[href*="get_file/"], .download a')); return release; } async function fetchSceneA(url, channel) { const res = await unprint.get(url, { select: '.main, .main-content' }); if (res.ok) { return scrapeSceneA(res.context, url, channel); } return res.status; } function scrapeProfileA({ query }, entity) { const profile = {}; const bio = query.all('.list-model-info li, .profile-info li').reduce((acc, bioEl) => ({ ...acc, [slugify(unprint.query.content(bioEl, '.title, span'), '_')]: unprint.query.content(bioEl, ':nth-child(2)') || unprint.query.attribute(bioEl, ':nth-child(2)', 'title') || unprint.query.text(bioEl), }), {}); profile.dateOfBirth = unprint.extractDate(bio.birth_date || bio.date_of_birth, 'DD MMMM, YYYY', { match: null }); profile.birthPlace = bio.nationality || bio.place_of_birth || null; profile.weight = unprint.extractNumber(bio.weight); profile.height = unprint.extractNumber(bio.height); profile.eyes = bio.eye_color; profile.hairColor = bio.hair || bio.hair_color; profile.aliases = query.text('.sub-title')?.replace(/:\s*/, '').split(/,\s*/); profile.measurements = bio.measurements || bio.body_shape_dimensions; const description = query.content('.model-biography p'); const avatar = query.img('.model-box img, .profile-model-photo', { origin: entity.url, protocol: 'http' }); if (!/there is no description/i.test(description)) { profile.description = description; } if (avatar) { profile.avatar = Array.from(new Set([ avatar, avatar.replace('s2_', 's1_'), avatar.replace('s1_', 's2_'), ])); } profile.scenes = scrapeAllA(unprint.initAll(query.all('.list-thumbs .thumb, .main-thumbs > li')), entity); return profile; } async function getActorUrl(actor, entity) { if (actor.url) { return actor.url; } // Double View Casting seems to be case sensitive... const res = await unprint.get(`${entity.origin}/models/search/?q=${actor.name}`, { selectAll: '.thumb-modal, .big-thumb' }); if (!res.ok) { return res.status; } const actorItem = res.context.find(({ query }) => slugify(query.content('.thumb-title a, .title')) === actor.slug); if (!actorItem) { return null; } const actorUrl = actorItem.query.url('a', { origin: entity.url, protocol: 'http' }); if (actorUrl) { return actorUrl; } return null; } async function fetchProfileA(actor, { entity }) { const actorUrl = await getActorUrl(actor, entity); if (actorUrl) { const actorRes = await unprint.get(actorUrl); if (actorRes.ok) { return scrapeProfileA(actorRes.context, entity); } } return null; } module.exports = { a: { fetchLatest: fetchLatestA, fetchScene: fetchSceneA, fetchProfile: fetchProfileA, }, b: { fetchLatest: fetchLatestB, }, };