forked from DebaucheryLibrarian/traxxx
Updated Full Porn Network scraper.
This commit is contained in:
@@ -3,85 +3,103 @@
|
||||
const { get, geta, ctxa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ _el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
// release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = `${site.url}${qu.url('.scene-title a')}`;
|
||||
release.entryId = new URL(release.url).pathname
|
||||
.toLowerCase()
|
||||
.replace(/\/$/, '')
|
||||
.split('/')
|
||||
.slice(-1)[0];
|
||||
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
release.title = qu.q('.scene-title', true);
|
||||
// release.description = qu.q('.title', 'title');
|
||||
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
// release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
const minutes = qu.q('.scene-details', true).match(/(\d+) minutes/)[1];
|
||||
release.duration = Number(minutes) * 60;
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.text('.update-models').trim().split(/\s*,\s*/g);
|
||||
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.img('.scene-thumb img');
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
|
||||
function scrapeScene({ qu }, url, site) {
|
||||
const release = { url };
|
||||
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.entryId = new URL(url).pathname
|
||||
.toLowerCase()
|
||||
.replace(/\/$/, '')
|
||||
.split('/')
|
||||
.slice(-1)[0];
|
||||
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
release.title = qu.q('h4.text-center', true);
|
||||
release.description = qu.q('p.hide-for-small-only', true);
|
||||
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('a[href*="/category"]', true);
|
||||
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const trailer = qu.video('source');
|
||||
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, q, qtx }) {
|
||||
function scrapeProfile({ el, qu }, actorName) {
|
||||
if (slugify(qu.q('h1', true)) !== slugify(actorName)) {
|
||||
// no 404 when actor is not found
|
||||
return null;
|
||||
}
|
||||
|
||||
const profile = {};
|
||||
|
||||
const description = qtx('.model-bio');
|
||||
const description = qu.q('h4 + p', true);
|
||||
if (description) profile.description = description;
|
||||
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
const avatar = qu.img('main img');
|
||||
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
if (avatar) {
|
||||
profile.avatar = [
|
||||
avatar.replace('set-1x', 'set-2x'),
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
profile.releases = scrapeAll(ctxa(el, '.update, .scene-update'));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
const url = `${site.url}/1/scenes/recent/${page}/`;
|
||||
const res = await geta(url, '.latest-updates .update, .scene-update');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.content-wrapper');
|
||||
const res = await get(url, 'main');
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok && res.item ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, { site }) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
|
||||
const url = `${site.url}/models/${actorSlug}.html`;
|
||||
const url = `${site.url}/1/model/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
|
||||
Reference in New Issue
Block a user