'use strict'; // ALSO USED BY THE FLOURISH const unprint = require('unprint'); const slugify = require('../utils/slugify'); const { feetInchesToCm } = require('../utils/convert'); const placeholder = /images\/p\d+\.jpe?g/i; function getEntryId(release) { return slugify(new URL(release.url).pathname.match(/\/([\w-]+)\.html/)?.[1] || [unprint.formatDate(release.date, 'YYYY-MM-DD'), release.title, ...release.actors]); } function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.url = query.url('a'); release.title = query.content('a span'); release.date = query.date('.timeDate', 'YYYY-MM-DD'); release.duration = query.duration('.timeDate'); release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); const poster = query.img('img.mainThumb'); const previewCount = query.number('img.mainThumb', { attribute: 'cnt' }); if (poster && !placeholder.test(poster)) { const posterFallbacks = [ poster.replace('-1x', '-3x'), poster.replace('-1x', '-2x'), poster.replace('-1x', '-4x'), poster, ]; release.poster = posterFallbacks; } if (previewCount) { release.photos = Array.from( { length: previewCount - 1 }, (value, index) => [3, 2, 4, 1].map((scale) => unprint.prefixUrl(query.img('img.mainThumb', { attribute: `src${index + 1}_${scale}x` }))).filter(Boolean), // 4x is unnecessarily big and possibly upscaled ).filter(Boolean); } release.photoCount = query.number('.timeDate', { match: /(\d+) photos/i, matchIndex: 1 }); release.entryId = getEntryId(release); return release; }); } function scrapeScene({ query, html }, { url, entity, baseRelease }) { const release = { url }; release.title = query.content('.title h2'); release.description = query.content('.description p'); release.date = query.date('.info p', 'MMMM D, YYYY'); release.duration = query.duration('.info p'); release.actors = query.all('.info a[href*="models/"], .info a[href*="sets.php"]').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), })); const poster = unprint.prefixUrl(query.img('.update_thumb') || html.match(/poster="(.*\.jpg)"/)?.[1], entity.url); if (poster && !placeholder.test(poster)) { const posterFallbacks = [ poster.replace('-1x', '-3x'), poster.replace('-1x', '-2x'), poster.replace('-1x', '-4x'), poster, ]; // scene page poster usually different from overview page, don't replace if (baseRelease?.poster && baseRelease.poster !== poster) { release.photos = baseRelease.photos ? [posterFallbacks, ...baseRelease.photos] : [posterFallbacks]; } else { release.poster = posterFallbacks; } } const trailer = html.match(/src="(.*\.mp4)"/)?.[1]; if (trailer) { release.trailer = unprint.prefixUrl(encodeURI(trailer), entity.url); } release.tags = query.contents('.info .tags a'); release.photoCount = query.number('.info', { match: /(\d+) photos/i, matchIndex: 1 }); release.entryId = getEntryId(release); return release; } function scrapeMovie({ query, element }, { entity, url }) { const release = { url }; release.title = query.content('.title h2'); release.description = query.content('.aboutArea p'); release.covers = [[ query.img('.update_thumb', { attribute: 'src0_2x', origin: entity.url }), query.img('.update_thumb', { attribute: 'src0_1x', origin: entity.url }), query.img('.update_thumb', { attribute: 'src0', origin: entity.url }), // usually upscaled query.img('.update_thumb', { attribute: 'src0_4x', origin: entity.url }), query.img('.update_thumb', { attribute: 'src0_3x', origin: entity.url }), ].filter(Boolean)]; release.entryId = getEntryId(release); release.scenes = scrapeAll(unprint.initAll(element, '.item-video')); return release; } function scrapeProfile({ query, element }, { url, entity }) { const profile = { url }; const bio = Object.fromEntries(query.all('.stats li') .map((row) => [ slugify(unprint.query.content(row, '.data-name, span'), '_'), unprint.query.text(row), ]) .filter(([key, value]) => key && value)); profile.description = query.content('.aboutArea p'); profile.birthPlace = bio.place_of_birth; profile.dateOfBirth = unprint.extractDate(bio.age, 'MMMM D, YYYY'); profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || (/\d fe*t \d+ inch/i.test(bio.height) && feetInchesToCm(bio.height)) || null; profile.measurements = bio.measurements; profile.hairColor = bio.hair_color; profile.eyes = bio.eye_color; profile.avatar = [ query.img('.model_bio_thumb', { attribute: 'src0_4x', origin: entity.url }), query.img('.model_bio_thumb', { attribute: 'src0_3x', origin: entity.url }), query.img('.model_bio_thumb', { attribute: 'src0_2x', origin: entity.url }), query.img('.model_bio_thumb', { attribute: 'src0_1x', origin: entity.url }), query.img('.model_bio_thumb', { attribute: 'src0', origin: entity.url }), ].filter((avatar) => avatar && !placeholder.test(avatar)); profile.scenes = scrapeAll(unprint.initAll(element, '.item-video')); return profile; } async function fetchLatest(channel, page = 1, context) { const url = `${channel.url}${context.parameters.path || ''}/categories/movies_${page}_d.html`; const res = await unprint.get(url, { selectAll: '.item-video' }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include, parameters }) { const res = await [ actorUrl, `${entity.url}${parameters.path || ''}/models/${slugify(actorName, '-')}.html`, `${entity.url}${parameters.path || ''}/models/${slugify(actorName, '')}.html`, ].reduce(async (chain, url) => { const prevRes = await chain; if (prevRes.ok || !url) { return prevRes; } const actorRes = await unprint.get(url); if (actorRes.ok) { return { ...actorRes, url, }; } return prevRes; }, Promise.resolve({ ok: false, status: null })); if (res.ok) { return scrapeProfile(res.context, { entity, include, url: res.url }); } return res.status; } module.exports = { fetchLatest, fetchProfile, scrapeScene: { scraper: scrapeScene, unprint: true, }, scrapeMovie: { scraper: scrapeMovie, unprint: true, }, };