'use strict'; const unprint = require('unprint'); const slugify = require('../utils/slugify'); function scrapeSceneApi(scene, channel, parameters) { const release = {}; release.entryId = scene.id; release.shootId = scene.serial_number; release.url = `${channel.origin}${parameters.basePath || ''}/videos/${release.shootId}`; release.title = scene.title; release.altTitles = [scene.title_cn].filter(Boolean); release.description = scene.description; release.altDescriptions = [scene.description_cn].filter(Boolean); release.date = new Date(scene.published_at); release.duration = scene.duration; release.actors = scene.models?.map((model) => ({ name: model.name, alias: [model.name_cn].filter(Boolean), gender: model.gender, entryId: model.id, avatar: Array.from(new Set([ model.avatar, model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed ])).filter(Boolean), })).filter((actor) => actor.name?.toLowerCase() === 'amateur'); // generic name for various amateur models release.tags = scene.tags?.map((tag) => tag.name); release.poster = scene.cover; release.trailer = scene.preview_video; return release; } async function fetchLatestApi(channel, page, { parameters }) { const res = await unprint.get(`${parameters.api}/videos?page=${page}&pageSize=12&sort=published_at`); if (res.ok && res.data?.status) { return res.data.data.list.map((scene) => scrapeSceneApi(scene, channel, parameters)); } return res.status; } async function fetchSceneApi(url, channel, _baseRelease, { parameters }) { // shallow data missing actors and tags const shootId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)?.[1]; if (!shootId) { return null; } const res = await unprint.get(`${parameters.api}/videos/${shootId}`); if (res.ok) { return scrapeSceneApi(res.data.data, channel, parameters); } return res.status; } function scrapeAll(scenes) { return scenes.map(({ query }) => { const release = {}; release.entryId = query.attribute(null, 'video-id'); const url = query.url(null); if (url && !url.includes('/plans')) { const { origin, pathname, searchParams } = new URL(url); release.url = `${origin}${pathname}`; release.shootId = pathname.match(/((HP)|(LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))\w*-\w+((EP)?\d+)?/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives release.actors = searchParams.get('models_name')?.split(',').map((actor) => { const [han, english] = actor.split('/').map((name) => name.trim()); if (/amateur/i.test(english)) { // not a name return null; } return { name: english || han, alias: english && han, }; }).filter(Boolean); } const rawTitle = query.content('.video-title div')?.replace(release.shootId, ''); if (rawTitle) { // find / closest to Han in case there are multiple, account for no / at all const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index; const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex; if (hanIndex && splitIndex > -1) { release.title = rawTitle.slice(0, splitIndex).trim(); release.altTitles = [rawTitle.slice(splitIndex + 1).trim()]; } else { release.title = rawTitle; } } release.duration = query.duration('.timestamp'); const poster = query.img('img', { attribute: 'data-src' }); if (poster) { release.poster = [ poster.replace(/w=\d+/, 'w=1920').replace(/h=\d+/, 'h=1080'), poster, ]; } release.teaser = query.video(null, { attribute: 'data-video-src' }); return release; }); } function scrapeProfileApi(model, channel, parameters) { const profile = {}; if (model.name?.toLowerCase() === 'amateur') { return null; // generic profile for various amateur models } profile.entryId = model.id; profile.url = `${channel.origin}${parameters.basePath || ''}/models/${model.id}`; profile.description = model.description || null; profile.gender = model.gender; profile.alias = [model.name_cn].filter(Boolean); if (!model.birth_day?.includes('0001')) { profile.dateOfBirth = unprint.extractDate(model.birth_day, 'YYYY-MM-DD'); } profile.birthPlace = model.birth_place || null; profile.height = model.height_cm || null; profile.weight = model.weight_kg || null; profile.bust = model.measurements_chest; profile.waist = model.measurements_waist; profile.hip = model.measurements_hips; profile.avatar = Array.from(new Set([ model.avatar, model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed ])).filter(Boolean); profile.socials = model.socialmedia; profile.scenes = model.videos.map((scene) => scrapeSceneApi(scene, channel, parameters)); return profile; } async function getModelId(actor, parameters) { if (actor.url) { const modelId = new URL(actor.url).pathname.match(/\/models\/\d+/)?.[1]; if (modelId) { return Number(modelId); } } const res = await unprint.get(`${parameters.api}/search?keyword=${slugify(actor.name, '+')}`); if (res.ok) { const model = res.data.data?.models?.find((modelResult) => slugify(modelResult.name) === actor.slug); if (model) { return model.id; } } return null; } async function fetchProfileApi(actor, { entity, parameters }) { const modelId = await getModelId(actor, parameters); if (modelId) { const res = await unprint.get(`${parameters.api}/models/${modelId}`); if (res.ok && res.data.data) { return scrapeProfileApi(res.data.data, entity, parameters); } } return null; } function getBioXPath(field) { return [ `//span[text()="${field}"]/following-sibling::span`, `//span[text()="${field}"]/following-sibling::text()`, ]; } function scrapeProfile({ query }, url) { const profile = { url }; const avatar = query.img('div[class*="prof-pic"] > img'); if (avatar) { profile.avatar = [ avatar.replace(/w=\d+/, 'w=720').replace(/h=\d+/, 'h=1080'), avatar, ]; } profile.description = query.content('h2') || null; // ::node()[self::span or self::text()] not supported by unprint/JSDOM profile.height = query.number(getBioXPath('Height'), { match: /(\d+) cm/, matchIndex: 1 }) || null; profile.weight = query.number(getBioXPath('Weight'), { match: /(\d+) kg/, matchIndex: 1 }) || null; profile.measurements = query.content(getBioXPath('Measurements')) || null; profile.birthPlace = query.content(getBioXPath('Birth Place')) || null; profile.banner = query.img('div[class*="banner"] > img'); profile.photos = query.imgs('#MusModelSwiper img'); return profile; } async function getCookie(channel, _parameters) { const tokenRes = await unprint.get(channel.url); if (!tokenRes.ok) { return tokenRes.status; } const csrfToken = tokenRes.context?.query.attribute('meta[name="csrf-token"]', 'content'); const cookie = tokenRes.response.headers['set-cookie']?.join(';'); if (!csrfToken || !cookie) { return null; } const confirmAdultRes = await unprint.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, { headers: { cookie, 'x-csrf-token': csrfToken, }, }); if (!confirmAdultRes.ok) { return confirmAdultRes.status; } return cookie; } async function fetchLatest(channel, page, context) { const cookie = await getCookie(channel, context.parameters); const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, { selectAll: '.row a[video-id]', headers: { cookie, }, }); if (res.ok) { return scrapeAll(res.context, channel); } return res.status; } // deep pages are paywalled async function searchProfile(actor, context, cookie) { const searchRes = await unprint.get(`${context.channel.url}${context.parameters.searchPath || '/livesearch'}?${context.parameters.searchParameter || 'keyword'}=${actor.name}`, { headers: { cookie, }, }); if (!searchRes.ok) { return searchRes.status; } return searchRes.context.query.url(`a[title="${actor.name}"]`); } async function fetchProfile(actor, context) { const cookie = await getCookie(context.entity, context.parameters); const actorUrl = actor.url || await searchProfile(actor, context, cookie); if (!actorUrl) { return null; } const res = await unprint.get(actorUrl, { headers: { cookie, }, }); if (res.ok) { return scrapeProfile(res.context, actorUrl); } return null; } module.exports = { fetchLatest, fetchProfile, api: { fetchLatest: fetchLatestApi, fetchScene: fetchSceneApi, fetchProfile: fetchProfileApi, }, };