traxxx/src/scrapers/analvids.js

147 lines
4.2 KiB
JavaScript

'use strict';
const unprint = require('unprint');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { shootId, title };
}
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.card-scene__view > a');
release.entryId = query.dataset(null, 'content') || new URL(release.url).pathname.match(/watch\/(\d+)/)?.[1];
release.title = query.content('.card-scene__text');
release.shootId = extractTitle(release.title).shootId;
release.date = query.date('.label--time:nth-child(2)', 'YYYY-MM-DD');
release.duration = query.duration('.label--time:first-child');
release.poster = query.img('.card-scene__view img', { attribute: 'data-src' });
const caps = query.json('.card-scene__view > a', { attribute: 'data-casting' })?.map((timestamp) => `${channel.url}/casting/${release.entryId}/${timestamp}`);
if (caps && release.poster) {
release.caps = caps;
} else if (caps) {
[release.poster, ...release.caps] = caps;
}
release.teaser = query.video('.card-scene__view > a', { attribute: 'data-preview' });
return release;
});
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = new URL(url).pathname.match(/watch\/(\d+)/)?.[1];
const featuring = query.content('.watch__title .watch__featuring_models');
release.title = query.content('.watch__title').replace(featuring, '');
release.description = query.content('.text-mob-more');
release.shootId = extractTitle(release.title).shootId;
release.date = query.date('.bi-calendar3', 'YYYY-MM-DD');
release.duration = query.duration('.bi-clock');
release.actors = query.all('.watch__title a[href*="/model"]').map((el) => ({
name: unprint.query.content(el),
url: unprint.query.url(el, null),
}));
release.tags = query.contents('.genres-list a[href*="/genre"]');
release.studio = slugify(query.content('.genres-list a[href*="/studios"]'), '', { removePunctuation: true });
release.poster = query.img('.watch__video video', { attribute: 'data-poster' });
release.photoCount = query.number('.bi-eye');
release.trailer = query.all('.watch__video source').map((el) => ({
src: unprint.query.video(el, null, { attribute: 'src' }),
quality: unprint.query.number(el, null, { attribute: 'size' }),
}));
return release;
}
function scrapeProfile({ query }, url, channel) {
const profile = { url };
profile.nationality = query.content('.model__info a[href*="/nationality"]');
profile.age = query.number('//td[contains(text(), "Age")]/following-sibling::td');
profile.avatar = query.img('.model__left img');
profile.scenes = scrapeAll(unprint.initAll(query.all('.card-scene')), channel);
return profile;
}
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}/new-videos/${page}`, { selectAll: '.card-scene' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
async function getActorUrl(actor, channel) {
if (actor.url) {
return actor.url;
}
const searchRes = await http.get(`${channel.url}/api/autocomplete/search?q=${slugify(actor.name, '+')}`);
if (!searchRes.ok) {
return searchRes.status;
}
const result = searchRes.body.terms.find((item) => item.type === 'model');
if (result) {
return result.url;
}
return null;
}
async function fetchProfile(actor, { channel }) {
const actorUrl = await getActorUrl(actor, channel);
if (typeof actorUrl !== 'string') {
return actorUrl;
}
const bioRes = await unprint.get(actorUrl);
if (bioRes.ok) {
return scrapeProfile(bioRes.context, actorUrl, channel);
}
return bioRes.status;
}
module.exports = {
fetchLatest,
scrapeScene: {
scraper: scrapeScene,
unprint: true,
},
fetchProfile,
};