331 lines
8.4 KiB
JavaScript
331 lines
8.4 KiB
JavaScript
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
function scrapeSceneApi(scene, channel, parameters) {
|
|
const release = {};
|
|
|
|
release.entryId = scene.id;
|
|
release.shootId = scene.serial_number;
|
|
|
|
release.url = `${channel.origin}${parameters.basePath || ''}/videos/${release.shootId}`;
|
|
|
|
release.title = scene.title;
|
|
release.altTitles = [scene.title_cn].filter(Boolean);
|
|
|
|
release.description = scene.description;
|
|
release.altDescriptions = [scene.description_cn].filter(Boolean);
|
|
|
|
release.date = new Date(scene.published_at);
|
|
release.duration = scene.duration;
|
|
|
|
release.actors = scene.models?.map((model) => ({
|
|
name: model.name,
|
|
alias: [model.name_cn].filter(Boolean),
|
|
gender: model.gender,
|
|
entryId: model.id,
|
|
avatar: Array.from(new Set([
|
|
model.avatar,
|
|
model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed
|
|
])).filter(Boolean),
|
|
})).filter((actor) => actor.name?.toLowerCase() === 'amateur'); // generic name for various amateur models
|
|
|
|
release.tags = scene.tags?.map((tag) => tag.name);
|
|
|
|
release.poster = scene.cover;
|
|
release.trailer = scene.preview_video;
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchLatestApi(channel, page, { parameters }) {
|
|
const res = await unprint.get(`${parameters.api}/videos?page=${page}&pageSize=12&sort=published_at`);
|
|
|
|
if (res.ok && res.data?.status) {
|
|
return res.data.data.list.map((scene) => scrapeSceneApi(scene, channel, parameters));
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
async function fetchSceneApi(url, channel, _baseRelease, { parameters }) {
|
|
// shallow data missing actors and tags
|
|
const shootId = new URL(url).pathname.match(/\/videos\/([\w-]+)/)?.[1];
|
|
|
|
if (!shootId) {
|
|
return null;
|
|
}
|
|
|
|
const res = await unprint.get(`${parameters.api}/videos/${shootId}`);
|
|
|
|
if (res.ok) {
|
|
return scrapeSceneApi(res.data.data, channel, parameters);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
function scrapeAll(scenes) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.entryId = query.attribute(null, 'video-id');
|
|
|
|
const url = query.url(null);
|
|
|
|
if (url && !url.includes('/plans')) {
|
|
const { origin, pathname, searchParams } = new URL(url);
|
|
|
|
release.url = `${origin}${pathname}`;
|
|
release.shootId = pathname.match(/((HP)|(LA)|(LT)|(MA)|(MD)|(MM)|(MS)|(MT)|(RR))\w*-\w+((EP)?\d+)?/)?.[0]; // pathname sometimes contains other text, match at least two letters to prevent false positives
|
|
|
|
release.actors = searchParams.get('models_name')?.split(',').map((actor) => {
|
|
const [han, english] = actor.split('/').map((name) => name.trim());
|
|
|
|
if (/amateur/i.test(english)) {
|
|
// not a name
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
name: english || han,
|
|
alias: english && han,
|
|
};
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
const rawTitle = query.content('.video-title div')?.replace(release.shootId, '');
|
|
|
|
if (rawTitle) {
|
|
// find / closest to Han in case there are multiple, account for no / at all
|
|
const hanIndex = rawTitle.match(/\p{Script_Extensions=Han}/u)?.index;
|
|
const splitIndex = rawTitle.slice(0, hanIndex).lastIndexOf('/') || hanIndex;
|
|
|
|
if (hanIndex && splitIndex > -1) {
|
|
release.title = rawTitle.slice(0, splitIndex).trim();
|
|
release.altTitles = [rawTitle.slice(splitIndex + 1).trim()];
|
|
} else {
|
|
release.title = rawTitle;
|
|
}
|
|
}
|
|
|
|
release.duration = query.duration('.timestamp');
|
|
|
|
const poster = query.img('img', { attribute: 'data-src' });
|
|
|
|
if (poster) {
|
|
release.poster = [
|
|
poster.replace(/w=\d+/, 'w=1920').replace(/h=\d+/, 'h=1080'),
|
|
poster,
|
|
];
|
|
}
|
|
|
|
release.teaser = query.video(null, { attribute: 'data-video-src' });
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeProfileApi(model, channel, parameters) {
|
|
const profile = {};
|
|
|
|
if (model.name?.toLowerCase() === 'amateur') {
|
|
return null; // generic profile for various amateur models
|
|
}
|
|
|
|
profile.entryId = model.id;
|
|
profile.url = `${channel.origin}${parameters.basePath || ''}/models/${model.id}`;
|
|
|
|
profile.description = model.description || null;
|
|
|
|
profile.gender = model.gender;
|
|
profile.alias = [model.name_cn].filter(Boolean);
|
|
|
|
if (!model.birth_day?.includes('0001')) {
|
|
profile.dateOfBirth = unprint.extractDate(model.birth_day, 'YYYY-MM-DD');
|
|
}
|
|
|
|
profile.birthPlace = model.birth_place || null;
|
|
|
|
profile.height = model.height_cm || null;
|
|
profile.weight = model.weight_kg || null;
|
|
|
|
profile.bust = model.measurements_chest;
|
|
profile.waist = model.measurements_waist;
|
|
profile.hip = model.measurements_hips;
|
|
|
|
profile.avatar = Array.from(new Set([
|
|
model.avatar,
|
|
model.avatar?.replace('_compressed', ''), // this is often a wider image, not just uncompressed
|
|
])).filter(Boolean);
|
|
|
|
profile.socials = model.socialmedia;
|
|
|
|
profile.scenes = model.videos.map((scene) => scrapeSceneApi(scene, channel, parameters));
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function getModelId(actor, parameters) {
|
|
if (actor.url) {
|
|
const modelId = new URL(actor.url).pathname.match(/\/models\/\d+/)?.[1];
|
|
|
|
if (modelId) {
|
|
return Number(modelId);
|
|
}
|
|
}
|
|
|
|
const res = await unprint.get(`${parameters.api}/search?keyword=${slugify(actor.name, '+')}`);
|
|
|
|
if (res.ok) {
|
|
const model = res.data.data?.models?.find((modelResult) => slugify(modelResult.name) === actor.slug);
|
|
|
|
if (model) {
|
|
return model.id;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function fetchProfileApi(actor, { entity, parameters }) {
|
|
const modelId = await getModelId(actor, parameters);
|
|
|
|
if (modelId) {
|
|
const res = await unprint.get(`${parameters.api}/models/${modelId}`);
|
|
|
|
if (res.ok && res.data.data) {
|
|
return scrapeProfileApi(res.data.data, entity, parameters);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function getBioXPath(field) {
|
|
return [
|
|
`//span[text()="${field}"]/following-sibling::span`,
|
|
`//span[text()="${field}"]/following-sibling::text()`,
|
|
];
|
|
}
|
|
|
|
function scrapeProfile({ query }, url) {
|
|
const profile = { url };
|
|
const avatar = query.img('div[class*="prof-pic"] > img');
|
|
|
|
if (avatar) {
|
|
profile.avatar = [
|
|
avatar.replace(/w=\d+/, 'w=720').replace(/h=\d+/, 'h=1080'),
|
|
avatar,
|
|
];
|
|
}
|
|
|
|
profile.description = query.content('h2') || null;
|
|
|
|
// ::node()[self::span or self::text()] not supported by unprint/JSDOM
|
|
profile.height = query.number(getBioXPath('Height'), { match: /(\d+) cm/, matchIndex: 1 }) || null;
|
|
profile.weight = query.number(getBioXPath('Weight'), { match: /(\d+) kg/, matchIndex: 1 }) || null;
|
|
|
|
profile.measurements = query.content(getBioXPath('Measurements')) || null;
|
|
profile.birthPlace = query.content(getBioXPath('Birth Place')) || null;
|
|
|
|
profile.banner = query.img('div[class*="banner"] > img');
|
|
profile.photos = query.imgs('#MusModelSwiper img');
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function getCookie(channel, _parameters) {
|
|
const tokenRes = await unprint.get(channel.url);
|
|
|
|
if (!tokenRes.ok) {
|
|
return tokenRes.status;
|
|
}
|
|
|
|
const csrfToken = tokenRes.context?.query.attribute('meta[name="csrf-token"]', 'content');
|
|
const cookie = tokenRes.response.headers['set-cookie']?.join(';');
|
|
|
|
if (!csrfToken || !cookie) {
|
|
return null;
|
|
}
|
|
|
|
const confirmAdultRes = await unprint.post(`${channel.url}/adult_confirmation_and_accept_cookie`, null, {
|
|
headers: {
|
|
cookie,
|
|
'x-csrf-token': csrfToken,
|
|
},
|
|
});
|
|
|
|
if (!confirmAdultRes.ok) {
|
|
return confirmAdultRes.status;
|
|
}
|
|
|
|
return cookie;
|
|
}
|
|
|
|
async function fetchLatest(channel, page, context) {
|
|
const cookie = await getCookie(channel, context.parameters);
|
|
|
|
const res = await unprint.get(`${channel.url}/videos?sort=published_at&page=${page}`, {
|
|
selectAll: '.row a[video-id]',
|
|
headers: {
|
|
cookie,
|
|
},
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
// deep pages are paywalled
|
|
|
|
async function searchProfile(actor, context, cookie) {
|
|
const searchRes = await unprint.get(`${context.channel.url}${context.parameters.searchPath || '/livesearch'}?${context.parameters.searchParameter || 'keyword'}=${actor.name}`, {
|
|
headers: {
|
|
cookie,
|
|
},
|
|
});
|
|
|
|
if (!searchRes.ok) {
|
|
return searchRes.status;
|
|
}
|
|
|
|
return searchRes.context.query.url(`a[title="${actor.name}"]`);
|
|
}
|
|
|
|
async function fetchProfile(actor, context) {
|
|
const cookie = await getCookie(context.entity, context.parameters);
|
|
const actorUrl = actor.url || await searchProfile(actor, context, cookie);
|
|
|
|
if (!actorUrl) {
|
|
return null;
|
|
}
|
|
|
|
const res = await unprint.get(actorUrl, {
|
|
headers: {
|
|
cookie,
|
|
},
|
|
});
|
|
|
|
if (res.ok) {
|
|
return scrapeProfile(res.context, actorUrl);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchProfile,
|
|
api: {
|
|
fetchLatest: fetchLatestApi,
|
|
fetchScene: fetchSceneApi,
|
|
fetchProfile: fetchProfileApi,
|
|
},
|
|
};
|