'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); function scrapeLatestBlock(scenes) { return scenes.map(({ html, query }) => { const release = {}; release.title = query.content('h4 a'); release.url = query.url('h4 a'); release.date = unprint.extractDate(html, 'MM/DD/YYYY', { match: /\d{2}\/\d{2}\/\d{4}/ }); release.actors = query.contents('.tour_update_models a'); // native videothumb entry ID does not occur on scene page, date is not available on all sites release.entryId = slugify([...[].concat(release.actors || []).sort(), release.title]); release.poster = query.dataset('.video_placeholder', 'src'); release.teaser = query.video(); return release; }); } function scrapeLatestClassic(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a'); release.title = query.content('.update_title_small') || query.content('a:nth-child(2)'); release.description = query.attribute('a', 'title'); release.date = query.date('.date_small, .update_date', 'MM/DD/YYYY'); release.duration = query.number('.update_counts') * 60; const actors = query.contents('.update_models a'); release.actors = actors.length === 0 ? query.content('.update_models')?.split(/,\s*/) // not all entries have models listed : actors; // native videothumb entry ID does not occur on scene page, date is not available on all sites release.entryId = slugify([...[].concat(release.actors || []).sort(), release.title]); const photoCount = query.number('.update_thumb', { attribute: 'cnt' }); [release.poster, ...release.photos] = Array.from({ length: photoCount }) .map((value, index) => query.attribute('.update_thumb', `src${index}_3x`) || query.attribute('.update_thumb', `src${index}_2x`) || query.attribute('.update_thumb', `src${index}_1x`)); return release; }); } function scrapeSceneBlock({ query }) { const release = {}; release.title = query.content('.indScene h2, .indScene h1'); release.description = query.content('.description'); release.date = query.date('.sceneDateP span', 'MM/DD/YYYY'); release.actors = query.all('.sceneTextLink .tour_update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); release.duration = query.number('.sceneDateP', { match: /(\d+)\s+min/i, matchIndex: 1 }) * 60; release.entryId = slugify([...release.actors.map((actor) => actor.name).sort(), release.title]); release.stars = query.number('.sceneRating'); release.poster = query.img('#trailer_thumb img[src*=content]'); release.trailer = query.video('#trailerVideo source'); release.photoCount = query.number('.sceneDateP', { match: /(\d+)\s+(photo|pic)/i, matchIndex: 1 }); return release; } async function fetchPhotos(url) { const res = await unprint.get(url); if (res.ok) { return res.context.query.imgs('.grid-gallery img'); } return []; } async function scrapeSceneClassic({ query }, context, options) { const release = {}; release.title = query.content('.update_title'); release.description = query.content('.update_description'); release.date = query.date('.update_date', 'MM/DD/YYYY'); release.actors = query.all('.update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); release.entryId = slugify([...release.actors.map((actor) => actor.name).sort(), release.title]); release.tags = query.contents('.update_tags a'); release.stars = query.number('.gallery_info', { match: /average\s+rating:\s+(\d+\.\d+)/i, matchIndex: 1 }); release.poster = query.img('#vidplayer', { attribute: 'poster' }) || query.img('#postroll_url img'); release.trailer = query.video('#vidplayer source'); const gallery = query.url('//a[img[contains(@src, "gallery.gif")]]'); if (gallery && options.includePhotos) { release.photos = await fetchPhotos(gallery); } return release; } async function fetchLatest(site, page = 1) { const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`; const res = await unprint.get(url, { selectAll: '.movieBlock, .videoBlock, .update_details, .update_details' }); return res; } async function fetchLatestClassic(site, page) { if (!site.parameters) { return null; } const res = await fetchLatest(site, page); if (res.ok) { return scrapeLatestClassic(res.context, site); } return res.status; } async function fetchLatestBlock(site, page) { if (!site.parameters) { return null; } const res = await fetchLatest(site, page); if (res.ok) { return scrapeLatestBlock(res.context, site); } return res.status; } module.exports = { fetchLatest: fetchLatestClassic, scrapeScene: scrapeSceneClassic, useUnprint: true, block: { scrapeScene: scrapeSceneBlock, fetchLatest: fetchLatestBlock, useUnprint: true, }, };