traxxx/src/scrapers/insex.js

103 lines
2.9 KiB
JavaScript
Raw Normal View History

'use strict';
const bhttp = require('bhttp');
const { get, exa, fd } = require('../utils/q');
function scrapeLatest(html, site) {
const scenes = exa(html, 'body > table');
return scenes.map(({ q, qd, qi, qu, ql }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
const titleEl = q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
2020-02-12 03:59:15 +00:00
const date = qd('.articlePostDateText td', 'MMM D, YYYY');
const url = qu(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
if (date) {
release.title = title.trim();
release.date = date;
} else {
2020-02-12 03:59:15 +00:00
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = fd(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
release.actors = actors.map(actor => actor.trim());
2020-02-12 03:59:15 +00:00
const description = q('.articleCopyText', true);
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const duration = ql('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
2020-02-12 03:59:15 +00:00
release.likes = parseInt(q('.articlePostDateText td:nth-child(3)', true), 10);
const cover = qi('a img');
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
return release;
});
}
function scrapeScene({ q, qd, ql, qu, qis, qp, qt }, site) {
const release = {};
const titleEl = q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const url = qu(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
release.title = title.trim();
release.description = q('.articleCopyText', true);
release.actors = actors.map(actor => actor.trim());
release.date = qd('.articlePostDateText', 'MMMM D, YYYY');
release.duration = ql('.articlePostDateText a:nth-child(2)');
const [cover, ...photos] = qis('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
release.poster = qp();
const trailer = qt();
release.trailer = { src: trailer };
return release;
}
async function fetchLatest(site, page = 1) {
2020-02-12 03:59:15 +00:00
const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await bhttp.get(url, {
2020-02-12 03:59:15 +00:00
type: 'brief',
page,
});
if (res.statusCode === 200) {
return scrapeLatest(res.body.html, site);
}
return null;
}
async function fetchScene(url, site) {
const qScene = await get(url);
return qScene && scrapeScene(qScene, site);
}
module.exports = {
fetchLatest,
fetchScene,
};