'use strict'; const format = require('template-format'); const qu = require('../utils/q'); const slugify = require('../utils/slugify'); const { convert } = require('../utils/convert'); function deriveEntryId(release) { if (release.date && release.url) { const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1]; return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`; } if (release.date && release.title) { return `${slugify(qu.formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; } return null; } function extractPoster(posterPath, channel, baseRelease) { if (posterPath && !/400.jpg/.test(posterPath)) { const poster = qu.prefixUrl(posterPath, channel.parameters?.media || channel.url); const posterSources = [ poster, // upscaled poster.replace('-1x', '-2x'), poster.replace('-1x', '-3x'), ]; if (baseRelease?.poster) { return [posterSources, [baseRelease.poster]]; } return [posterSources, []]; } return [baseRelease?.poster || null, []]; } function getImageWithFallbacks(q, selector, site, el) { const sources = el ? [ q(el, selector, 'src0_3x'), q(el, selector, 'src0_2x'), q(el, selector, 'src0_1x'), ] : [ q(selector, 'src0_3x'), q(selector, 'src0_2x'), q(selector, 'src0_1x'), ]; return sources.filter(Boolean).map((src) => `${site.parameters?.media || site.url}${src}`); } function scrapeAllClassic(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])'); release.entryId = query.url('.updateThumb img', 'alt'); release.title = query.cnt('.updateInfo h5 a'); release.actors = query.cnts('.tour_update_models a'); release.date = query.date('.availdate, .updateInfo p span:last-child', 'MM/DD/YYYY'); release.poster = query.img('.updateThumb img'); const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1]; if (trailer) { release.trailer = `${channel.url}${trailer}`; } return release; }); } function scrapeAllTubular(scenes, channel, accNetworkReleases) { return scenes.map(({ query }) => { const release = {}; release.title = query.q('h4 a', 'title') || query.q('h4 a', true); release.url = query.url('h4 a'); release.date = query.date('.more-info-div', 'MMM D, YYYY'); release.duration = query.dur('.more-info-div'); const posterPath = query.q('.img-div img', 'src0_1x') || query.img('img.video_placeholder'); if (posterPath) { const poster = /^http/.test(posterPath) ? posterPath : `${channel.parameters?.media || channel.url}${posterPath}`; release.poster = [ poster.replace('-1x', '-3x'), poster.replace('-1x', '-2x'), poster, ]; } release.teaser = query.video(); // release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1]; release.entryId = deriveEntryId(release); if (channel.parameters?.accFilter && accNetworkReleases?.map((accRelease) => accRelease.entryId).includes(release.entryId)) { // filter out releases that were already scraped from a categorized site, requeryires sequeryential site scraping return null; } return release; }); } function scrapeSceneClassic({ query, html }, url, channel) { const release = {}; release.title = query.q('.updatesBlock h2', true); release.poster = query.meta('property="og:image"'); release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1]; const trailer = html.match(/src="(.+\.mp4)"/)?.[1]; if (trailer) { release.trailer = { src: `${channel.url}${trailer}`, }; } return release; } function scrapeSceneTubular({ query, html }, entity, url, baseRelease) { const release = {}; release.title = query.q('.trailer-section-head .section-title, .title-block .section-title', true); release.description = query.text('.row .update-info-block'); release.date = query.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); release.duration = query.dur('.update-info-row:nth-child(2)'); release.actors = query.all('.models-list-thumbs a').map((el) => ({ name: query.cnt(el, 'span'), avatar: getImageWithFallbacks(query.q, 'img', entity, el), url: query.url(el, null), })); release.tags = query.all('.tags a', true); const posterPath = query.q('.player-thumb img', 'src0_1x'); const trailer = html.match(/ channel.parameters?.match || channel.name).join('|'), 'i'); const channel = release.tags.find((tag) => channelRegExp.test(tag)); if (channel) { release.channel = slugify(channel, ''); } } release.entryId = deriveEntryId(release); return release; } async function scrapeProfile({ query }, entity, parameters) { const profile = {}; const bio = query.cnt('.model_bio, .detail-div'); const avatarEl = query.q('.model_bio_pic img, .model_bio_thumb'); profile.age = Number(bio?.match(/Age:\s*(\d{2})/)?.[1]) || null; profile.dateOfBirth = qu.parseDate(bio?.match(/Age:\s*(\w+ \d{1,2}, \d{4})/)?.[0], 'MMMM D, YYYY'); profile.height = convert(bio?.match(/\d+\s*(feet|')\s*\d+\s*(inches|"|$)/)?.[0], 'cm'); profile.measurements = bio?.match(/\w+[-x]\d+[-x]\d+/)?.[0] || null; profile.aliases = bio?.match(/also known as:\s*([\w\s]+(,\s*)?)+/i)?.[1].split(/,\s*/) || []; if (avatarEl) { const avatarSources = [ avatarEl.getAttribute('src0_3x'), avatarEl.getAttribute('src0_2x'), avatarEl.getAttribute('src0_1x'), avatarEl.getAttribute('src0'), avatarEl.getAttribute('src'), ] .filter((avatar) => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images .map((avatar) => qu.prefixUrl(avatar, entity.url)); if (avatarSources.length) profile.avatar = avatarSources; } if (parameters?.layout === 'classic') { profile.scenes = scrapeAllClassic(qu.initAll(query.all('.bodyArea .updateItem')), entity); } if (parameters?.layout === 'tubular') { profile.scenes = scrapeAllTubular(qu.initAll(query.all('.modelfeature, .item-video')), entity); } return profile; } async function fetchLatest(site, page = 1, options, preData, allScraper) { const url = (site.parameters?.latest && format(site.parameters.latest, { page })) || `${site.url}/categories/movies_${page}_d.html`; const res = await qu.getAll(url, '.modelfeature, .item-video, .bodyArea .updateItem'); if (!res.ok) { return res.status; } return allScraper(res.items, site, preData?.uniqueReleases); } async function fetchUpcomingClassic(channel) { const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem'); if (res.ok) { return scrapeAllClassic(res.items, channel); } return res.status; } async function fetchLatestClassic(channel, page, options, preData) { return fetchLatest(channel, page, options, preData, scrapeAllClassic); } async function fetchLatestTubular(channel, page, options, preData) { return fetchLatest(channel, page, options, preData, scrapeAllTubular); } async function fetchProfile({ name: actorName, url }, { entity, parameters }) { const actorSlugA = slugify(actorName, ''); const actorSlugB = slugify(actorName, '-'); if (!url && !parameters?.profile && !entity.url) { return null; } const urls = Array.from(new Set([ url, entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugA }) : `${entity.url}/models/${actorSlugA}.html`, entity.parameters?.profile ? format(entity.parameters.profile, { actorSlug: actorSlugB }) : `${entity.url}/models/${actorSlugB}.html`, ])); return urls.reduce(async (chain, profileUrl) => { const profile = await chain; if (profile) { return profile; } if (!profileUrl) { return null; } const res = await qu.get(profileUrl); if (res.statusCode === 200) { return scrapeProfile(res.item, entity, parameters); } return null; }, Promise.resolve()); } module.exports = { classic: { fetchLatest: fetchLatestClassic, fetchUpcoming: fetchUpcomingClassic, fetchProfile, scrapeAll: scrapeAllClassic, scrapeScene: scrapeSceneClassic, }, tubular: { fetchLatest: fetchLatestTubular, fetchProfile, scrapeAll: scrapeAllTubular, scrapeScene: scrapeSceneTubular, }, };