173 lines
5.0 KiB
JavaScript
173 lines
5.0 KiB
JavaScript
'use strict';
|
|
|
|
const qu = require('../utils/qu');
|
|
const http = require('../utils/http');
|
|
const slugify = require('../utils/slugify');
|
|
|
|
function scrapeLatest(scenes, site) {
|
|
return scenes.map(({ query }) => {
|
|
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
|
const release = {};
|
|
|
|
const titleEl = query.q('.galleryTitleText, .articleTitleText');
|
|
const [title, ...actors] = titleEl.textContent.split('|');
|
|
const date = query.date('.articlePostDateText td', 'MMM D, YYYY');
|
|
|
|
const url = query.url(titleEl, 'a');
|
|
[release.entryId] = url.split('/').slice(-2);
|
|
release.url = `${site.url}${url}`;
|
|
|
|
if (date) {
|
|
release.title = title.trim();
|
|
release.date = date;
|
|
} else {
|
|
// title should contain date instead, not applicable in brief mode
|
|
release.title = title.slice(title.indexOf(':') + 1).trim();
|
|
release.date = qu.ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
|
}
|
|
|
|
release.actors = actors.map(actor => actor.trim());
|
|
|
|
const description = query.q('.articleCopyText', true);
|
|
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
|
|
|
const duration = query.dur('.articleCopyText a:nth-child(2)');
|
|
if (duration) release.duration = duration;
|
|
|
|
release.likes = parseInt(query.q('.articlePostDateText td:nth-child(3)', true), 10);
|
|
|
|
const cover = query.img('a img');
|
|
|
|
release.covers = [[
|
|
cover.replace('_thumbnail', ''),
|
|
cover,
|
|
]];
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeLatestAlt(scenes, site) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
|
|
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
|
|
|
|
release.title = query.cnt('.has-text-weight-bold');
|
|
release.date = query.date('span.tag', 'YYYY-MM-DD');
|
|
release.actors = query.cnts('a.tag');
|
|
|
|
const cover = query.img('.image img');
|
|
|
|
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
|
|
release.covers = [cover];
|
|
|
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
function scrapeScene({ query }, site) {
|
|
const release = {};
|
|
|
|
const titleEl = query.q('.articleTitleText');
|
|
const [title, ...actors] = titleEl.textContent.split('|');
|
|
|
|
const url = query.url(titleEl, 'a');
|
|
[release.entryId] = url.split('/').slice(-2);
|
|
release.url = `${site.url}${url}`;
|
|
|
|
release.title = title.trim();
|
|
release.description = query.q('.articleCopyText', true);
|
|
|
|
release.actors = actors.map(actor => actor.trim());
|
|
release.date = query.date('.articlePostDateText', 'MMMM D, YYYY');
|
|
release.duration = query.dur('.articlePostDateText a:nth-child(2)');
|
|
|
|
const [cover, ...photos] = query.imgs('img[src*="images"]');
|
|
release.covers = [cover];
|
|
release.photos = photos;
|
|
|
|
release.poster = query.poster();
|
|
|
|
const trailer = query.trailer();
|
|
if (trailer) release.trailer = { src: trailer };
|
|
|
|
return release;
|
|
}
|
|
|
|
async function scrapeSceneAlt({ query }, url, channel, session) {
|
|
const release = {};
|
|
|
|
release.title = query.cnt('.columns div.is-size-5');
|
|
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
|
|
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
|
|
|
|
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
|
|
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
|
|
|
|
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
|
|
release.photos = query.imgs('body > div:nth-child(6) img');
|
|
|
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
|
|
|
release.trailer = query.video();
|
|
|
|
if (!release.trailer) {
|
|
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
|
|
|
if (trailerRes.ok) {
|
|
release.trailer = trailerRes.body;
|
|
}
|
|
}
|
|
|
|
return release;
|
|
}
|
|
|
|
async function fetchLatest(site, page = 1) {
|
|
const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`)
|
|
// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
|
|
|| `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
|
|
|
const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column'))
|
|
// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
|
|
|| qu.get(url)); // JSON containing html as a property
|
|
|
|
if (res.ok) {
|
|
if (site.parameters?.scraper === 'alt') {
|
|
return scrapeLatestAlt(res.items, site);
|
|
}
|
|
|
|
/*
|
|
if (site.slug === 'paintoy') {
|
|
return scrapeLatest(res.items, site);
|
|
}
|
|
*/
|
|
|
|
return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
async function fetchScene(url, site) {
|
|
const session = http.session();
|
|
const res = await qu.get(url, null, null, { session });
|
|
|
|
if (res.ok) {
|
|
if (site.parameters?.scraper === 'alt') {
|
|
return scrapeSceneAlt(res.item, url, site, session);
|
|
}
|
|
|
|
return scrapeScene(res.item, site);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchScene,
|
|
};
|