traxxx/src/scrapers/insex.js

108 lines
3.2 KiB
JavaScript
Raw Normal View History

'use strict';
const bhttp = require('bhttp');
const { get, exa, ed } = require('../utils/q');
function scrapeLatest(html, site) {
2020-02-12 15:26:08 +00:00
const scenes = site.slug === 'paintoy'
? exa(html, '#articleTable table[cellspacing="2"]')
: exa(html, 'body > table');
return scenes.map(({ qu }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
if (date) {
release.title = title.trim();
release.date = date;
} else {
2020-02-12 03:59:15 +00:00
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
release.actors = actors.map(actor => actor.trim());
const description = qu.q('.articleCopyText', true);
2020-02-12 03:59:15 +00:00
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const duration = qu.dur('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
2020-02-12 03:59:15 +00:00
const cover = qu.img('a img');
2020-02-12 03:59:15 +00:00
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
return release;
});
}
function scrapeScene({ qu }, site) {
const release = {};
const titleEl = qu.q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
release.title = title.trim();
release.description = qu.q('.articleCopyText', true);
release.actors = actors.map(actor => actor.trim());
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
const [cover, ...photos] = qu.imgs('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
release.poster = qu.poster();
const trailer = qu.trailer();
if (trailer) release.trailer = { src: trailer };
return release;
}
async function fetchLatest(site, page = 1) {
2020-02-12 15:26:08 +00:00
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await bhttp.get(url, {
2020-02-12 03:59:15 +00:00
type: 'brief',
page,
});
if (res.statusCode === 200) {
2020-02-12 15:26:08 +00:00
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
}
return null;
}
async function fetchScene(url, site) {
const res = await get(url);
return res.ok ? scrapeScene(res.item, site) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
};