forked from DebaucheryLibrarian/traxxx
147 lines
4.2 KiB
JavaScript
147 lines
4.2 KiB
JavaScript
'use strict';
|
|
|
|
const unprint = require('unprint');
|
|
|
|
const http = require('../utils/http');
|
|
const slugify = require('../utils/slugify');
|
|
|
|
function extractTitle(originalTitle) {
|
|
const titleComponents = originalTitle.split(' ');
|
|
// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
|
|
const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
|
|
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
|
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
|
|
|
return { shootId, title };
|
|
}
|
|
|
|
function scrapeAll(scenes, channel) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('.card-scene__view > a');
|
|
release.entryId = query.dataset(null, 'content') || new URL(release.url).pathname.match(/watch\/(\d+)/)?.[1];
|
|
|
|
release.title = query.content('.card-scene__text');
|
|
release.shootId = extractTitle(release.title).shootId;
|
|
|
|
release.date = query.date('.label--time:nth-child(2)', 'YYYY-MM-DD');
|
|
release.duration = query.duration('.label--time:first-child');
|
|
|
|
release.poster = query.img('.card-scene__view img', { attribute: 'data-src' });
|
|
const caps = query.json('.card-scene__view > a', { attribute: 'data-casting' })?.map((timestamp) => `${channel.url}/casting/${release.entryId}/${timestamp}`);
|
|
|
|
if (caps && release.poster) {
|
|
release.caps = caps;
|
|
} else if (caps) {
|
|
[release.poster, ...release.caps] = caps;
|
|
}
|
|
|
|
release.teaser = query.video('.card-scene__view > a', { attribute: 'data-preview' });
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeScene({ query }, { url }) {
|
|
const release = {};
|
|
|
|
release.entryId = new URL(url).pathname.match(/watch\/(\d+)/)?.[1];
|
|
|
|
const featuring = query.content('.watch__title .watch__featuring_models');
|
|
|
|
release.title = query.content('.watch__title').replace(featuring, '');
|
|
release.description = query.content('.text-mob-more');
|
|
release.shootId = extractTitle(release.title).shootId;
|
|
|
|
release.date = query.date('.bi-calendar3', 'YYYY-MM-DD');
|
|
release.duration = query.duration('.bi-clock');
|
|
|
|
release.actors = query.all('.watch__title a[href*="/model"]').map((el) => ({
|
|
name: unprint.query.content(el),
|
|
url: unprint.query.url(el, null),
|
|
}));
|
|
|
|
release.tags = query.contents('.genres-list a[href*="/genre"]');
|
|
|
|
release.studio = slugify(query.content('.genres-list a[href*="/studios"]'), '', { removePunctuation: true });
|
|
|
|
release.poster = query.img('.watch__video video', { attribute: 'data-poster' });
|
|
release.photoCount = query.number('.bi-eye');
|
|
|
|
release.trailer = query.all('.watch__video source').map((el) => ({
|
|
src: unprint.query.video(el, null, { attribute: 'src' }),
|
|
quality: unprint.query.number(el, null, { attribute: 'size' }),
|
|
}));
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeProfile({ query }, url, channel) {
|
|
const profile = { url };
|
|
|
|
profile.nationality = query.content('.model__info a[href*="/nationality"]');
|
|
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
|
|
|
|
profile.avatar = query.img('.model__left img');
|
|
|
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchLatest(channel, page) {
|
|
const res = await unprint.get(`${channel.url}/new-videos/${page}`, { selectAll: '.card-scene' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, channel);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
async function getActorUrl(actor, channel) {
|
|
if (actor.url) {
|
|
return actor.url;
|
|
}
|
|
|
|
const searchRes = await http.get(`${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`);
|
|
|
|
if (!searchRes.ok) {
|
|
return searchRes.status;
|
|
}
|
|
|
|
const result = searchRes.body.terms.find((item) => item.type === 'model');
|
|
|
|
if (result) {
|
|
return result.url;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function fetchProfile(actor, { channel }) {
|
|
const actorUrl = await getActorUrl(actor, channel);
|
|
|
|
if (typeof actorUrl !== 'string') {
|
|
return actorUrl;
|
|
}
|
|
|
|
const bioRes = await unprint.get(actorUrl);
|
|
|
|
if (bioRes.ok) {
|
|
return scrapeProfile(bioRes.context, actorUrl, channel);
|
|
}
|
|
|
|
return bioRes.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
scrapeScene: {
|
|
scraper: scrapeScene,
|
|
unprint: true,
|
|
},
|
|
fetchProfile,
|
|
};
|