'use strict'; const { get, geta, ctxa } = require('../utils/q'); function extractActors(actorString) { return actorString ?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense .split(/\band\b|\bvs\b|\/|,|&/ig) .map(actor => actor.trim()) .filter(actor => !!actor && !/\banal\b|\bschool\b|\bgamer\b|\breturn\b|\bfor\b|\bare\b|\bpart\b|realdoll|bimbo|p\d+/ig.test(actor)) || []; } function matchActors(actorString, models) { if (!actorString) { return []; } return models.filter(model => new RegExp(model.name, 'i').test(actorString)); } function scrapeLatest(scenes, site, models) { return scenes.map(({ qu }) => { const release = {}; const pathname = qu.url('a.itemimg').slice(1); [release.entryId] = pathname.split('/').slice(-1); release.url = `${site.url}${pathname}`; release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true); release.description = qu.q('.mas_longdescription', true); release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/); const actorString = qu.q('.mas_description', true); const actors = matchActors(actorString, models); if (actors.length > 0) release.actors = actors; else release.actors = extractActors(actorString); const posterPath = qu.img('.itemimg img'); release.poster = `${site.url}/${posterPath}`; return release; }); } function scrapeScene({ html, qu }, url, site, include, models) { const release = { url }; [release.entryId] = url.split('/').slice(-1); release.title = qu.q('.mas_title', true); release.description = qu.q('.mas_longdescription', true); release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, ''); const actors = matchActors(actorString, models); if (actors.length > 0) release.actors = actors; else release.actors = extractActors(actorString); release.tags = qu.all('.tags a', true); release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`); const posterIndex = 'splash:'; const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4); if (poster) release.poster = `${site.url}/${poster}`; const trailerIndex = html.indexOf('video/mp4'); const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4); if (trailer) release.trailer = { src: `${site.url}${trailer}` }; return release; } function extractModels({ el }, site) { const models = ctxa(el, '.item'); return models.map(({ qu }) => { const actor = { gender: 'female' }; const avatar = qu.q('.itemimg img'); actor.avatar = `${site.url}/${avatar.src}`; actor.name = avatar.alt .split(':').slice(-1)[0] .replace(/xtreme girl|nurse/ig, '') .trim(); const actorPath = qu.url('.itemimg'); actor.url = `${site.url}${actorPath.slice(1)}`; return actor; }); } async function fetchModels(site, page = 1, accModels = []) { const url = `${site.url}/?models/${page}`; const res = await get(url); if (res.ok) { const models = extractModels(res.item, site); const nextPage = res.item.qa('.pagenumbers', true) .map(pageX => Number(pageX)) .filter(Boolean) // remove << and >> .includes(page + 1); if (nextPage) { return fetchModels(site, page + 1, accModels.concat(models)); } return accModels.concat(models, { name: 'Dr. Gray' }); } return []; } async function fetchLatest(site, page = 1, models) { const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`; const res = await geta(url, '.item'); return res.ok ? scrapeLatest(res.items, site, models) : res.status; } async function fetchScene(url, site, release, beforeFetchLatest) { const models = beforeFetchLatest || await fetchModels(site); const res = await get(url); return res.ok ? scrapeScene(res.item, url, site, models) : res.status; } module.exports = { fetchLatest, fetchScene, beforeFetchLatest: fetchModels, };