'use strict'; const blake2 = require('blake2'); const knex = require('../knex'); const { ex, ctxa } = require('../utils/q'); const http = require('../utils/http'); async function getSiteSlugs() { return knex('sites') .pluck('sites.slug') .join('networks', 'networks.id', 'sites.network_id') .where('networks.slug', 'perfectgonzo'); } function getHash(identifier) { const hash = blake2.createHash('blake2b', { digestLength: 8 }); hash.update(Buffer.from(identifier)); return hash.digest('hex'); } function extractMaleModelsFromTags(tagContainer) { if (!tagContainer) { return []; } const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0); const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models'); if (modelLabelIndex > -1) { const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3); const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex); return maleModels.map(model => model.text); } return []; } async function extractChannelFromPhoto(photo, metaSiteSlugs) { const siteSlugs = metaSiteSlugs || await getSiteSlugs(); const channelMatch = photo.match(new RegExp(siteSlugs.join('|'))); if (channelMatch) { return channelMatch[0]; } return null; } async function scrapeLatest(html, site) { const siteSlugs = await getSiteSlugs(); const { element } = ex(html); return ctxa(element, '#content-main .itemm').map(({ q, qa, qlength, qdate, qimages, }) => { const release = { site, meta: { siteSlugs, }, }; const sceneLink = q('a'); release.title = sceneLink.title; release.url = `${site.url}${sceneLink.href}`; release.date = qdate('.nm-date', 'MM/DD/YYYY'); const slug = new URL(release.url).pathname.split('/')[2]; release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`); release.actors = release.title.split('&').map(actor => actor.trim()); [release.poster, ...release.photos] = qimages('.bloc-link img'); release.tags = qa('.dropdown ul a', true).slice(1); release.duration = qlength('.dropdown p:first-child'); return release; }); } async function scrapeScene(html, site, url, metaSiteSlugs) { const { q, qa, qlength, qdate, qposter, qtrailer, } = ex(html); const release = { url, site }; release.title = q('#movie-header h2', true); release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); release.description = q('.container .mg-md', true); release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)'); release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container'))); release.tags = qa('.tag-container a', true); const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true); if (/4K/.test(uhd)) release.tags = release.tags.concat('4k'); release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src); release.poster = qposter(); const trailer = qtrailer(); if (trailer) release.trailer = { src: trailer }; if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs); if (release.channel) { const { pathname } = new URL(url); release.url = `https://${release.channel}.com${pathname}`; const slug = pathname.split('/')[2]; release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`); } return release; } async function fetchLatest(site, page = 1) { const url = `${site.url}/movies/page-${page}`; const res = await http.get(url); if (res.statusCode === 200) { return scrapeLatest(res.body.toString(), site); } return []; } async function fetchScene(url, site, release) { const res = await http.get(url); if (res.statusCode === 200) { return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs); } return []; } module.exports = { fetchLatest, fetchScene, };