traxxx/src/scrapers/killergram.js

120 lines
3.6 KiB
JavaScript

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll({ query }) {
const urls = query.urls('td > a:not([href*=joinnow])').map((pathname) => `http://killergram.com/${encodeURI(pathname)}`);
const posters = query.imgs('td > a img');
const titles = query.all('.episodeheadertext', true);
const actors = query.all('.episodetextinfo:nth-child(3)').map((el) => query.all(el, 'a', true));
const channels = query.all('.episodetextinfo:nth-child(2) a', true).map((channel) => slugify(channel, ''));
if ([urls.length, posters.length, titles.length, actors.length, channels.length].every((value, index, array) => value === array[0])) { // make sure every set has the same number of items
const releases = urls.map((url, index) => ({
url,
entryId: new URL(url).searchParams.get('id'),
title: titles[index],
actors: actors[index],
channel: channels[index],
poster: posters[index],
}));
return releases;
}
return [];
}
function scrapeScene({ query, html }, url) {
const release = {};
release.entryId = new URL(url).searchParams.get('id');
release.date = query.date('.episodetext', 'DD MMMM YYYY', /\d{2} \w+ \d{4}/);
release.description = query.q('.episodetext tr:nth-child(5) td:nth-child(2)', true);
release.actors = query.all('.modelstarring a', true);
const duration = html.match(/(\d+) minutes/)?.[1];
const channelUrl = query.url('a[href*="ct=site"]');
if (duration) release.duration = Number(duration) * 60;
if (channelUrl) {
const siteName = new URL(`https://killergram.com/${channelUrl}`).searchParams.get('site');
release.channel = slugify(siteName, '');
}
[release.poster, ...release.photos] = query.imgs('img[src*="/models"]');
return release;
}
async function fetchActorReleases({ query }, url, remainingPages, actorName, accReleases = []) {
const releases = scrapeAll({ query }).filter((release) => release.actors.includes(actorName));
if (remainingPages.length > 0) {
const { origin, pathname, searchParams } = new URL(url);
searchParams.set('p', remainingPages[0]);
const nextPage = `${origin}${pathname}?${searchParams}`;
const res = await qu.get(nextPage, '#episodes > table');
if (res.ok) {
return fetchActorReleases(res.item, url, remainingPages.slice(1), actorName, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
}
async function scrapeProfile({ query }, actorName, url, include) {
const profile = {};
profile.avatar = {
src: `http://thumbs.killergram.com/models/${encodeURI(actorName)}/modelprofilethumb.jpg`,
process: {
crop: {
top: 4,
left: 4,
width: 289,
height: 125,
},
},
};
if (include.releases) {
const availablePages = query.all('.pageboxdropdown option', 'value');
profile.releases = await fetchActorReleases(qu.init(query.q('#episodes > table')), url, availablePages.slice(1), actorName);
}
return profile;
}
async function fetchLatest(channel, page = 1) {
const res = await qu.get(`${channel.url}&p=${((page - 1) * 15) + 1}`, '#episodes > table');
return res.ok ? scrapeAll(res.item, channel) : res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, '#episodes > table');
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
async function fetchProfile({ name: actorName }, entity, include) {
const url = `http://killergram.com/episodes.asp?page=episodes&model=${encodeURI(actorName)}&ct=model`;
const res = await qu.get(url, '#content', null, {
followRedirects: false,
});
return res.ok ? scrapeProfile(res.item, actorName, url, include) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};