traxxx/src/scrapers/mikeadriano.js

150 lines
4.0 KiB
JavaScript

'use strict';
const qu = require('../utils/qu');
const http = require('../utils/http');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('h3.title a, .content-title-wrap a');
release.url = query.url('h3.title a, .content-title-wrap a');
const pathname = new URL(release.url).pathname;
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
release.description = query.cnt('.desc, .content-description');
release.date = query.date('.date, time, .hide', 'Do MMM YYYY');
release.actors = query.cnts('h4.models a, .content-models a');
release.duration = query.dur('.total-time');
const [poster, ...primaryPhotos] = query.imgs('a img');
const secondaryPhotos = query.styles('.thumb-top, .thumb-bottom, .thumb-mouseover', 'background-image').map((style) => style.match(/url\((.*)\)/)[1]);
release.poster = poster;
release.photos = primaryPhotos.concat(secondaryPhotos);
return release;
});
}
async function scrapeScene({ query }, url) {
const release = {};
const pathname = new URL(url).pathname;
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
release.title = query.cnt('.content-page-info .title');
release.description = query.cnt('.content-page-info .desc');
release.date = query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY');
release.actors = query.cnts('.content-page-info .models a');
release.duration = query.dur('.content-page-info .total-time:last-child');
release.poster = query.poster('.content-page-header video, .content-page-header-inner video') || query.poster('#main-player', 'data-screenshot');
release.trailer = query.video('.content-page-header source, .content-page-header-inner source') || query.q('#main-player', 'data-url');
return release;
}
async function fetchLatest(channel, page = 1) {
const { host } = new URL(channel.url);
const url = `https://tour.${host}/videos?page=${page}`;
const res = await qu.get(url);
if (res.ok) {
if (res.item.query.exists('a[href*="stackpath.com"]')) {
throw new Error('URL blocked by StackPath');
}
return scrapeAll(qu.initAll(res.item.el, '.content-item-large, .content-item, .content-border'), channel);
}
return res.status;
}
async function fetchUpcoming(channel) {
const { host } = new URL(channel.url);
const url = `https://tour.${host}`;
const res = await qu.get(url);
if (res.ok) {
if (res.item.query.exists('a[href*="stackpath.com"]')) {
throw new Error('URL blocked by StackPath');
}
const sceneItem = qu.init(res.item.el, '#upcoming-content');
if (sceneItem) {
return scrapeAll([sceneItem], channel);
}
return null;
}
return res.status;
}
async function fetchScene(url, channel) {
const cookieJar = http.cookieJar();
const session = http.session({ cookieJar });
/* not working
const resA = await http.get(url, {
session,
extract: {
runScripts: 'dangerously',
},
});
cookieJar.setCookieSync(http.toughCookie.Cookie.parse(resA.document.cookie), url);
console.log(res.req);
*/
const res = await http.get(url, {
session,
});
if (res.ok) {
const item = qu.init(res.document);
if (item.query.exists('a[href*="stackpath.com"]')) {
throw new Error('URL blocked by StackPath');
}
return scrapeScene(item, url, channel);
}
return res.status;
}
/* API protected
async function fetchProfile({ name: actorName }, context , site) {
const session = http.session();
await http.get(`https://tour.${site.slug}.com`, { session });
const url = `https://tour.${site.slug}.com/search-preview`;
const res = await http.post(url, { q: actorName }, {
session,
headers: {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
origin: `https://tour.${site.slug}.com`,
},
});
console.log(res.body.toString());
}
*/
module.exports = {
fetchLatest,
fetchUpcoming,
// fetchProfile,
fetchScene,
};