Files
traxxx/src/scrapers/acam.js
2026-06-03 05:31:02 +02:00

238 lines
5.5 KiB
JavaScript
Executable File

'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
function extractEntryId(poster) {
try {
return slugify(new URL(poster).pathname.match(/\/images\/(.*?)\.[a-z]{3,4}/i)?.[1]?.replace(/smak.*/i, ''), '');
} catch (error) {
return null;
}
}
function extractTags(title, titleComment) {
if (!title && !titleComment) {
return [];
}
if (titleComment?.includes('<i>')) {
const tagsMatch = titleComment.match(/<i>(.*?)<\/i>/)?.[1];
if (tagsMatch) {
return tagsMatch.split('-').map((tag) => tag.trim().toLowerCase());
}
}
const firstTagIndex = title.match(/[A-Z]{2}/)?.index;
if (firstTagIndex) {
const tagSection = title
.slice(firstTagIndex)
.match(/([A-Z0-9\s]{2,})/g);
if (tagSection) {
return tagSection
.map((tag) => tag.trim().toLowerCase())
.filter(Boolean) || [];
}
}
return [];
}
// derived photo is usually uncensored and preferred as poster, but not guaranteed to exist, so fall back to original image
function getPhotos(poster) {
const photo = poster?.replace(/(s[ma]{2}kprov\d*)|([._]preview)/i, ''); // sic
if (photo === poster) {
return {
poster,
photos: [],
};
}
return {
poster: [photo, poster],
photos: [poster],
};
}
function scrapeAll(scenes, channel, parameters) {
return scenes.map(({ query }) => {
const release = {};
// Vilde URLs are temporary tokens for some reason, seem to be handled entirely back-end
const url = query.url('a[href*="/show-video"]');
release.token = new URL(url).pathname.match(/\/show-video\/([a-z0-9]+)/)?.[1];
release.forceDeep = true;
release.title = query.content('a h5, .product-content p, .video_text');
release.tags = extractTags(release.title, query.content('//a/comment()'));
const { poster, photos } = getPhotos(query.img('img[src*="/videos/images"], img[src*="/uploads/images"]'));
release.poster = poster;
release.photos = photos;
if (parameters.staticUrl) {
release.url = url;
release.entryId = release.token;
} else {
release.entryId = extractEntryId(release.poster);
}
return release;
});
}
async function setLanguage(parameters) {
if (parameters.languageUrl) {
const langRes = await unprint.post(parameters.languageUrl, {
[parameters.languageKey || 'select_language']: parameters.language || 'english',
}, {
form: true,
});
return langRes.cookies;
}
return null;
}
async function fetchLatest(channel, page = 1, { parameters }) {
const cookies = await setLanguage(parameters);
const res = await unprint.post(`${channel.origin}/pagination`, {
i: page,
status: true,
}, {
selectAll: '.movi-area',
form: true,
cookies,
});
if (res.ok) {
return scrapeAll(res.context, channel, parameters);
}
return res.status;
}
async function fetchLatestHooked(channel, page = 1, { parameters }) {
const cookies = await setLanguage(parameters);
const res = await unprint.get(`${channel.origin}/Welcome/index/${(page - 1) * 9}`, {
selectAll: '.product-main',
cookies,
});
if (res.ok) {
return scrapeAll(res.context, channel, parameters);
}
return res.status;
}
async function fetchLatestKanal(channel, page = 1, { parameters }) {
const cookies = await setLanguage(parameters);
const res = await unprint.post(`${channel.origin}/pagination`, {
k: page,
hidden_page_no: page - 1,
status: true,
}, {
selectAll: '.video_bx',
form: true,
cookies,
});
if (res.ok) {
return scrapeAll(res.context, channel, parameters);
}
return res.status;
}
function scrapeScene({ query }, { url, baseRelease, parameters }) {
const release = {};
// URL is temporary token
if (!query.exists('.login-sec.for-browser, .video-description, .video_co_title')) {
// URL likely expired, still returns 200
return null;
}
if (query.exists('.video-description')) {
const descriptions = query.contents('.video-description p').filter(Boolean);
release.title = descriptions[0];
release.description = descriptions.slice(1).join(' ') || null;
} else {
release.title = query.content('.login-sec.for-browser h3, .video_co_title h3');
release.description = query.contents('.login-sec.for-browser h3 ~ *').join(' ') || null;
}
release.tags = extractTags(release.title);
const { poster, photos } = getPhotos(query.poster('.play_video_cont video'));
release.poster = poster;
release.photos = photos;
release.trailer = query.all('.play_video_cont source')
.map((videoEl) => ({
src: unprint.query.url(videoEl, null, { attribute: 'src' }),
quality: unprint.query.number(videoEl, null, { attribute: 'size' }),
referer: url,
}))
.toSorted((videoA, videoB) => videoB.quality - videoA.quality);
if (parameters.staticUrl) {
release.url = url;
release.entryId = baseRelease?.token || new URL(url).pathname.match(/\/show-video\/([a-z0-9]+)/)?.[1];
} else {
release.entryId = extractEntryId(release.poster);
}
return release;
}
async function fetchScene(baseUrl, entity, baseRelease, { parameters }) {
const url = baseUrl || (baseRelease?.token && `${entity.origin}/show-video/${baseRelease.token}`) || null;
if (!url) {
return null;
}
const cookies = await setLanguage(parameters);
const res = await unprint.get(url, {
headers: {
'accept-language': 'en-US,en',
},
cookies,
});
if (res.ok || res.status === 500) { // Anal Hooked returns 500 for valid scene pages
return scrapeScene(res.context, { url, baseRelease, parameters });
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
hooked: {
fetchLatest: fetchLatestHooked,
fetchScene,
},
kanal: {
fetchLatest: fetchLatestKanal,
fetchScene,
},
};