'use strict'; const qu = require('../utils/qu'); const http = require('../utils/http'); const slugify = require('../utils/slugify'); function scrapeLatest(scenes, site) { return scenes.map(({ query }) => { // if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site); const release = {}; const titleEl = query.q('.galleryTitleText, .articleTitleText'); const [title, ...actors] = titleEl.textContent.split('|'); const date = query.date('.articlePostDateText td', 'MMM D, YYYY'); const url = query.url(titleEl, 'a'); [release.entryId] = url.split('/').slice(-2); release.url = `${site.url}${url}`; if (date) { release.title = title.trim(); release.date = date; } else { // title should contain date instead, not applicable in brief mode release.title = title.slice(title.indexOf(':') + 1).trim(); release.date = qu.ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY'); } release.actors = actors.map(actor => actor.trim()); const description = query.q('.articleCopyText', true); if (description) release.description = description.slice(0, description.lastIndexOf('(')); const duration = query.dur('.articleCopyText a:nth-child(2)'); if (duration) release.duration = duration; release.likes = parseInt(query.q('.articlePostDateText td:nth-child(3)', true), 10); const cover = query.img('a img'); release.covers = [[ cover.replace('_thumbnail', ''), cover, ]]; return release; }); } function scrapeLatestAlt(scenes, site) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('figure a', 'href', { origin: site.parameters.latest }); release.title = query.cnt('.has-text-weight-bold'); release.date = query.date('span.tag', 'YYYY-MM-DD'); release.actors = query.cnts('a.tag'); const cover = query.img('.image img'); release.poster = cover.replace('poster_noplay', 'trailer_noplay'); release.covers = [cover]; release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; return release; }); } function scrapeScene({ query }, site) { const release = {}; const titleEl = query.q('.articleTitleText'); const [title, ...actors] = titleEl.textContent.split('|'); const url = query.url(titleEl, 'a'); [release.entryId] = url.split('/').slice(-2); release.url = `${site.url}${url}`; release.title = title.trim(); release.description = query.q('.articleCopyText', true); release.actors = actors.map(actor => actor.trim()); release.date = query.date('.articlePostDateText', 'MMMM D, YYYY'); release.duration = query.dur('.articlePostDateText a:nth-child(2)'); const [cover, ...photos] = query.imgs('img[src*="images"]'); release.covers = [cover]; release.photos = photos; release.poster = query.poster(); const trailer = query.trailer(); if (trailer) release.trailer = { src: trailer }; return release; } async function scrapeSceneAlt({ query }, url, channel, session) { const release = {}; release.title = query.cnt('.columns div.is-size-5'); release.description = query.cnt('.has-background-black-ter > div:nth-child(4)'); release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD'); release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]'); release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span'); release.poster = query.img('#videoPlayer, #iodvideo', 'poster'); release.photos = query.imgs('body > div:nth-child(6) img'); release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; release.trailer = query.video(); if (!release.trailer) { const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session }); if (trailerRes.ok) { release.trailer = trailerRes.body; } } return release; } async function fetchLatest(site, page = 1) { const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`) // || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page || `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`; const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column')) // || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]')) || qu.get(url)); // JSON containing html as a property if (res.ok) { if (site.parameters?.scraper === 'alt') { return scrapeLatestAlt(res.items, site); } /* if (site.slug === 'paintoy') { return scrapeLatest(res.items, site); } */ return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site); } return res.status; } async function fetchScene(url, site) { const session = http.session(); const res = await qu.get(url, null, null, { session }); if (res.ok) { if (site.parameters?.scraper === 'alt') { return scrapeSceneAlt(res.item, url, site, session); } return scrapeScene(res.item, site); } return res.status; } module.exports = { fetchLatest, fetchScene, };