forked from DebaucheryLibrarian/traxxx
Added basic filename copy. Added HTTP helper to q. Fetching all actor release pages from Naughty America. Added various high res network logos.
This commit is contained in:
@@ -6,7 +6,7 @@ const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
const { ex, get } = require('../utils/q');
|
||||
|
||||
function titleExtractor(pathname) {
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
@@ -100,7 +100,13 @@ function scrapeScene(html, url, site) {
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
async function fetchActorReleases(url) {
|
||||
const { qus } = await get(url);
|
||||
|
||||
return qus('.contain-block:not(.live-scenes) .scene-item > a:first-child'); // live scenes repeat on all pages
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { q, qus } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
@@ -109,7 +115,11 @@ function scrapeProfile(html) {
|
||||
const avatar = q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = qus('.scene-item > a:first-child');
|
||||
const releases = qus('.scene-item > a:first-child');
|
||||
const otherPages = qus('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ async function fetchPhotos(url) {
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAll(html) {
|
||||
function scrapeAll(html, site) {
|
||||
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -45,8 +45,8 @@ function scrapeAll(html) {
|
||||
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
||||
|| qd('.dt-box', 'MMM.DD YYYY');
|
||||
release.actors = qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount');
|
||||
release.actors = site.parameters?.actors || qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount, .amount');
|
||||
|
||||
const posterEl = q('.item-img img');
|
||||
|
||||
@@ -64,20 +64,40 @@ function scrapeAll(html) {
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url) {
|
||||
const { q, qa, qtext, qi, qd, ql, qu, qis, qp, qt } = ex(html, '#videos-page, #content');
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { q, qa, qtext, qi, qd, ql, qu, qis, qp } = ex(html, '#videos-page, #content');
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
|
||||
release.title = q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true);
|
||||
release.title = q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||
|| q('h1.m-title', true)?.split('»').slice(-1)[0].trim();
|
||||
release.description = qtext('.p-desc, .desc');
|
||||
|
||||
release.actors = qa('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||
|
||||
if (release.actors.length === 0) {
|
||||
const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent))
|
||||
const actorString = qtext(actorEl);
|
||||
|
||||
console.log(actorString);
|
||||
/*
|
||||
?.split(/, and|,/g)
|
||||
.map(actor => actor.trim())
|
||||
|| [];
|
||||
*/
|
||||
}
|
||||
|
||||
console.log(release.actors);
|
||||
|
||||
if (release.actors.length === 0) release.actors = site.parameters?.actors;
|
||||
|
||||
release.tags = qa('a[href*=tag]', true);
|
||||
|
||||
const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qd(dateEl, null, 'MMMM Do, YYYY');
|
||||
release.date = qd(dateEl, null, 'MMMM Do, YYYY')
|
||||
|| qd('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
||||
|| qd('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
|
||||
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = ql(durationEl);
|
||||
@@ -94,25 +114,23 @@ async function scrapeScene(html, url) {
|
||||
]);
|
||||
}
|
||||
|
||||
const trailer = qt();
|
||||
const trailers = qa('a[href*=Trailers]');
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = [
|
||||
{
|
||||
// don't rely on trailer always being 720p by default
|
||||
src: trailer.replace(/\d+p\.mp4/, '720p.mp4'),
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer.replace(/\d+p\.mp4/, '360p.mp4'),
|
||||
quality: 360,
|
||||
},
|
||||
];
|
||||
if (trailers) {
|
||||
release.trailer = trailers.map((trailer) => {
|
||||
const src = `https:${trailer.href}`;
|
||||
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
||||
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
||||
|
||||
return format === 'mp4' ? { src, quality } : null;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const stars = q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user