2019-11-09 00:22:50 +00:00
|
|
|
'use strict';
|
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
const qu = require('../utils/qu');
|
2020-12-28 23:42:02 +00:00
|
|
|
const http = require('../utils/http');
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-12-28 00:29:34 +00:00
|
|
|
function scrapeAll(scenes) {
|
2020-07-16 01:47:07 +00:00
|
|
|
return scenes.map(({ query }) => {
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = {
|
|
|
|
director: 'Mike Adriano',
|
|
|
|
};
|
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
release.title = query.cnt('h3.title a, .content-title-wrap a');
|
|
|
|
release.url = query.url('h3.title a, .content-title-wrap a');
|
2020-12-27 22:36:15 +00:00
|
|
|
|
2020-12-28 00:29:34 +00:00
|
|
|
const pathname = new URL(release.url).pathname;
|
|
|
|
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
release.description = query.cnt('.desc, .content-description');
|
|
|
|
release.date = query.date('.date, time, .hide', 'Do MMM YYYY');
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
release.actors = query.cnts('h4.models a, .content-models a');
|
|
|
|
release.duration = query.dur('.total-time');
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
const [poster, ...primaryPhotos] = query.imgs('a img');
|
|
|
|
const secondaryPhotos = query.styles('.thumb-top, .thumb-bottom, .thumb-mouseover', 'background-image').map(style => style.match(/url\((.*)\)/)[1]);
|
2020-05-14 02:26:05 +00:00
|
|
|
|
|
|
|
release.poster = poster;
|
2020-07-16 01:47:07 +00:00
|
|
|
release.photos = primaryPhotos.concat(secondaryPhotos);
|
2020-05-14 02:26:05 +00:00
|
|
|
|
|
|
|
return release;
|
2020-07-16 01:47:07 +00:00
|
|
|
});
|
2019-11-09 00:22:50 +00:00
|
|
|
}
|
|
|
|
|
2020-12-28 00:29:34 +00:00
|
|
|
async function scrapeScene({ query }, url) {
|
2020-07-16 01:47:07 +00:00
|
|
|
const release = { director: 'Mike Adriano' };
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-12-28 00:29:34 +00:00
|
|
|
const pathname = new URL(url).pathname;
|
|
|
|
release.entryId = pathname.match(/\/view\/(\d+)/)?.[1] || pathname.match(/\/view\/([\w-]+)/)?.[1];
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-12-28 23:42:02 +00:00
|
|
|
console.log(release);
|
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
release.title = query.cnt('.content-page-info .title');
|
|
|
|
release.description = query.cnt('.content-page-info .desc');
|
2020-12-27 22:36:15 +00:00
|
|
|
release.date = query.date('.content-page-info .date, .content-page-info .hide, .post-date', 'Do MMM YYYY');
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
release.actors = query.cnts('.content-page-info .models a');
|
|
|
|
release.duration = query.dur('.content-page-info .total-time:last-child');
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-12-28 00:29:34 +00:00
|
|
|
release.poster = query.poster('.content-page-header video, .content-page-header-inner video') || query.poster('#main-player', 'data-screenshot');
|
|
|
|
release.trailer = query.video('.content-page-header source, .content-page-header-inner source') || query.q('#main-player', 'data-url');
|
2020-05-14 02:26:05 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
return release;
|
2019-11-09 00:22:50 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
async function fetchLatest(channel, page = 1) {
|
|
|
|
const { host } = new URL(channel.url);
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = `https://tour.${host}/videos?page=${page}`;
|
2020-03-21 01:48:24 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
const res = await qu.get(url);
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
if (res.ok) {
|
|
|
|
if (res.item.query.exists('a[href*="stackpath.com"]')) {
|
|
|
|
throw new Error('URL blocked by StackPath');
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
return scrapeAll(qu.initAll(res.item.el, '.content-item-large, .content-item, .content-border'), channel);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
return res.status;
|
2019-11-09 00:22:50 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
async function fetchScene(url, channel) {
|
2020-12-28 23:42:02 +00:00
|
|
|
const cookieJar = http.cookieJar();
|
|
|
|
const session = http.session({ cookieJar });
|
|
|
|
|
|
|
|
console.log(cookieJar);
|
|
|
|
|
|
|
|
const resA = await http.get(url, {
|
|
|
|
session,
|
|
|
|
extract: {
|
|
|
|
cookieJar,
|
|
|
|
// runScripts: 'dangerously',
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log(resA.headers, cookieJar.getCookiesSync(url));
|
|
|
|
const cookie = cookieJar.getCookieStringSync(url);
|
|
|
|
|
|
|
|
console.log(cookie);
|
|
|
|
|
|
|
|
const res = await http.get(url, {
|
|
|
|
headers: {
|
|
|
|
cookie,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
// console.log(res.req);
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
if (res.ok) {
|
2020-12-28 23:42:02 +00:00
|
|
|
const item = qu.init(res.document);
|
|
|
|
|
|
|
|
if (item.query.exists('a[href*="stackpath.com"]')) {
|
|
|
|
throw new Error('URL blocked by StackPath');
|
|
|
|
}
|
|
|
|
|
|
|
|
return scrapeScene(item, url, channel);
|
2020-05-14 02:26:05 +00:00
|
|
|
}
|
2019-11-09 00:22:50 +00:00
|
|
|
|
2020-07-16 01:47:07 +00:00
|
|
|
return res.status;
|
2019-11-09 00:22:50 +00:00
|
|
|
}
|
|
|
|
|
2020-05-17 02:59:09 +00:00
|
|
|
/* API protected
|
2020-07-20 23:44:51 +00:00
|
|
|
async function fetchProfile({ name: actorName }, context , site) {
|
2020-11-22 23:05:02 +00:00
|
|
|
const session = http.session();
|
2020-05-17 02:59:09 +00:00
|
|
|
|
2020-11-22 23:05:02 +00:00
|
|
|
await http.get(`https://tour.${site.slug}.com`, { session });
|
2020-05-17 02:59:09 +00:00
|
|
|
|
|
|
|
const url = `https://tour.${site.slug}.com/search-preview`;
|
2020-11-22 23:05:02 +00:00
|
|
|
const res = await http.post(url, { q: actorName }, {
|
|
|
|
session,
|
2020-05-17 02:59:09 +00:00
|
|
|
headers: {
|
|
|
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
|
|
|
origin: `https://tour.${site.slug}.com`,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log(res.body.toString());
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
2019-11-09 00:22:50 +00:00
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
2020-05-17 02:59:09 +00:00
|
|
|
// fetchProfile,
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchScene,
|
2019-11-09 00:22:50 +00:00
|
|
|
};
|