'use strict'; const qu = require('../utils/qu'); // TODO: profile scraping function scrapeLatestBlog(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a.more:not([href*="/join.php"])', 'href', { origin: channel.url }); if (release.url) { release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-'); } else { release.entryId = query.img('.bigthumb').match(/\/scenes\/(\w+)/)?.[1]; } release.title = query.q('h5 strong', true)?.match(/. - (.+)$/)[1] || query.text('.videos h3'); release.description = query.text('p'); release.date = query.date('h5 strong, .videos h3', 'MMM. DD, YYYY', /\w+. \d{2}, \d{4}/); // remove common patterns so only the name is left const curatedTitle = release.title.replace(/\b(part \d|\banal|bts)\b/gi, '').trim(); if (!/\band\b/.test(curatedTitle) && new RegExp(curatedTitle).test(release.description)) { // scene title is probably the actor name release.actors = [release.title]; } release.poster = query.img('.bigthumb', null, { origin: channel.url }); release.photos = query.imgs('.smallthumb', null, { origin: channel.url }); release.tags = query.all('a[href*="/keywords"]', true); return release; }); } function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('.updateInfo h5 a:not([href*="content/"]):not([href*="#coming"])'); release.entryId = query.url('.updateThumb img', 'alt'); release.title = query.q('.updateInfo h5 a', true); release.actors = query.all('.tour_update_models a', true); release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY'); release.poster = query.img('.updateThumb img'); const trailer = query.q('.updateInfo h5 a', 'onclick')?.match(/'(.+)'/)?.[1]; if (trailer) { release.trailer = { src: `${channel.url}${trailer}`, }; } return release; }); } function scrapeSceneBlog({ query }, url, channel) { const release = {}; release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)\/(\d+)/).slice(1, 3).join('-'); release.title = query.text('h4 strong, .videos h3'); release.description = query.q('#about p, .videos p', true); const actors = query.urls('a[href*="/girl/"]').map(actorUrl => actorUrl.match(/video-([\w\s]+)/)?.[1]).filter(Boolean); if (actors.length > 0) { release.actors = actors; } else { // release.actors = [query.q('.previewmed h5 strong', true)?.match(/^([\w\s]+),/)?.[0] || query.q('.videos h3', true)].filter(Boolean); release.actors = [release.title]; } release.tags = query.all('.info a[href*="/keywords"], .buttons a[href*="/keywords"]', true); release.poster = query.img('#info .main-preview, .bigthumb', null, { origin: channel.url }); release.photos = [query.img('.previewmed img', null, { origin: channel.url })].concat(query.imgs('.hd-clip img, .smallthumb', null, { origin: channel.url })).filter(photo => photo); return release; } function scrapeScene({ query, html }, url, channel) { const release = {}; release.title = query.q('.updatesBlock h2', true); release.poster = query.meta('property="og:image"'); release.entryId = release.poster.match(/\/content\/(.*)\//)?.[1]; const trailer = html.match(/src="(.+\.mp4)"/)?.[1]; if (trailer) { release.trailer = { src: `${channel.url}${trailer}`, }; } return release; } async function fetchLatestBlog(channel, page) { const url = `${channel.url}/free/updates/videos/${(page - 1) * 10}`; const res = await qu.getAll(url, '.videos'); return res.ok ? scrapeLatestBlog(res.items, channel) : res.status; } async function fetchLatest(channel, page = 1) { if (channel.parameters?.blog) { return fetchLatestBlog(channel, page); } const url = `${channel.url}/categories/Movies_${page}_d.html`; const res = await qu.getAll(url, '.bodyArea .updateItem'); return res.ok ? scrapeAll(res.items, channel) : res.status; } async function fetchUpcoming(channel) { if (channel.parameters?.blog) { return []; } const res = await qu.getAll(channel.url, '#owl-upcomingScenes .updateItem'); return res.ok ? scrapeAll(res.items, channel) : res.status; } async function fetchScene(url, channel) { const res = await qu.get(url); if (res.ok) { if (channel.parameters?.blog) { return scrapeSceneBlog(res.item, url, channel); } return scrapeScene(res.item, url, channel); } return res.status; } module.exports = { fetchLatest, fetchScene, fetchUpcoming, };