2020-02-12 03:39:57 +00:00
|
|
|
'use strict';
|
|
|
|
|
|
|
|
const bhttp = require('bhttp');
|
2020-02-13 03:11:32 +00:00
|
|
|
const { get, exa, ed } = require('../utils/q');
|
2020-02-12 03:39:57 +00:00
|
|
|
|
|
|
|
function scrapeLatest(html, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const scenes = site.slug === 'paintoy'
|
|
|
|
? exa(html, '#articleTable table[cellspacing="2"]')
|
|
|
|
: exa(html, 'body > table');
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return scenes.map(({ qu }) => {
|
|
|
|
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
|
|
|
const release = {};
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
|
|
|
const [title, ...actors] = titleEl.textContent.split('|');
|
|
|
|
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = qu.url(titleEl, 'a');
|
|
|
|
[release.entryId] = url.split('/').slice(-2);
|
|
|
|
release.url = `${site.url}${url}`;
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (date) {
|
|
|
|
release.title = title.trim();
|
|
|
|
release.date = date;
|
|
|
|
} else {
|
|
|
|
// title should contain date instead, not applicable in brief mode
|
|
|
|
release.title = title.slice(title.indexOf(':') + 1).trim();
|
|
|
|
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
|
|
|
}
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = actors.map(actor => actor.trim());
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const description = qu.q('.articleCopyText', true);
|
|
|
|
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
|
|
|
if (duration) release.duration = duration;
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
2020-02-12 03:59:15 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const cover = qu.img('a img');
|
|
|
|
release.covers = [[
|
|
|
|
cover.replace('_thumbnail', ''),
|
|
|
|
cover,
|
|
|
|
]];
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
|
|
|
});
|
2020-02-12 03:39:57 +00:00
|
|
|
}
|
|
|
|
|
2020-03-09 01:02:29 +00:00
|
|
|
function scrapeScene({ qu }, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const release = {};
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const titleEl = qu.q('.articleTitleText');
|
|
|
|
const [title, ...actors] = titleEl.textContent.split('|');
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = qu.url(titleEl, 'a');
|
|
|
|
[release.entryId] = url.split('/').slice(-2);
|
|
|
|
release.url = `${site.url}${url}`;
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.title = title.trim();
|
|
|
|
release.description = qu.q('.articleCopyText', true);
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.actors = actors.map(actor => actor.trim());
|
|
|
|
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
|
|
|
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
|
|
|
release.covers = [cover];
|
|
|
|
release.photos = photos;
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
release.poster = qu.poster();
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const trailer = qu.trailer();
|
|
|
|
if (trailer) release.trailer = { src: trailer };
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return release;
|
2020-02-12 03:39:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
|
|
|
|
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
|
|
|
|
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
2020-02-12 15:26:08 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
const res = await bhttp.get(url, {
|
|
|
|
type: 'brief',
|
|
|
|
page,
|
|
|
|
});
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
if (res.statusCode === 200) {
|
|
|
|
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
|
|
|
|
}
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return null;
|
2020-02-12 03:39:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async function fetchScene(url, site) {
|
2020-05-14 02:26:05 +00:00
|
|
|
const res = await get(url);
|
2020-02-12 03:39:57 +00:00
|
|
|
|
2020-05-14 02:26:05 +00:00
|
|
|
return res.ok ? scrapeScene(res.item, site) : res.status;
|
2020-02-12 03:39:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2020-05-14 02:26:05 +00:00
|
|
|
fetchLatest,
|
|
|
|
fetchScene,
|
2020-02-12 03:39:57 +00:00
|
|
|
};
|