Added basic filename copy. Added HTTP helper to q. Fetching all actor release pages from Naughty America. Added various high res network logos.

This commit is contained in:
2020-02-04 00:18:53 +01:00
parent bffa6d2c9e
commit ef602a3a15
42 changed files with 1483 additions and 54 deletions

View File

@@ -6,7 +6,7 @@ const cheerio = require('cheerio');
const moment = require('moment');
const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
const { ex, get } = require('../utils/q');
function titleExtractor(pathname) {
const components = pathname.split('/')[2].split('-');
@@ -100,7 +100,13 @@ function scrapeScene(html, url, site) {
};
}
function scrapeProfile(html) {
async function fetchActorReleases(url) {
const { qus } = await get(url);
return qus('.contain-block:not(.live-scenes) .scene-item > a:first-child'); // live scenes repeat on all pages
}
async function scrapeProfile(html) {
const { q, qus } = ex(html);
const profile = {};
@@ -109,7 +115,11 @@ function scrapeProfile(html) {
const avatar = q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = qus('.scene-item > a:first-child');
const releases = qus('.scene-item > a:first-child');
const otherPages = qus('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
profile.releases = releases.concat(olderReleases.flat());
return profile;
}

View File

@@ -28,7 +28,7 @@ async function fetchPhotos(url) {
return [];
}
function scrapeAll(html) {
function scrapeAll(html, site) {
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
const release = {};
@@ -45,8 +45,8 @@ function scrapeAll(html) {
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|| qd('.dt-box', 'MMM.DD YYYY');
release.actors = qa('.model, .i-model', true);
release.duration = ql('.i-amount');
release.actors = site.parameters?.actors || qa('.model, .i-model', true);
release.duration = ql('.i-amount, .amount');
const posterEl = q('.item-img img');
@@ -64,20 +64,40 @@ function scrapeAll(html) {
}).filter(Boolean);
}
async function scrapeScene(html, url) {
const { q, qa, qtext, qi, qd, ql, qu, qis, qp, qt } = ex(html, '#videos-page, #content');
async function scrapeScene(html, url, site) {
const { q, qa, qtext, qi, qd, ql, qu, qis, qp } = ex(html, '#videos-page, #content');
const release = {};
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
release.title = q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true);
release.title = q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|| q('h1.m-title', true)?.split('»').slice(-1)[0].trim();
release.description = qtext('.p-desc, .desc');
release.actors = qa('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
if (release.actors.length === 0) {
const actorEl = qa('.stat').find(stat => /Featuring/.test(stat.textContent))
const actorString = qtext(actorEl);
console.log(actorString);
/*
?.split(/, and|,/g)
.map(actor => actor.trim())
|| [];
*/
}
console.log(release.actors);
if (release.actors.length === 0) release.actors = site.parameters?.actors;
release.tags = qa('a[href*=tag]', true);
const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
release.date = qd(dateEl, null, 'MMMM Do, YYYY');
release.date = qd(dateEl, null, 'MMMM Do, YYYY')
|| qd('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|| qd('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = ql(durationEl);
@@ -94,25 +114,23 @@ async function scrapeScene(html, url) {
]);
}
const trailer = qt();
const trailers = qa('a[href*=Trailers]');
if (trailer) {
release.trailer = [
{
// don't rely on trailer always being 720p by default
src: trailer.replace(/\d+p\.mp4/, '720p.mp4'),
quality: 720,
},
{
src: trailer.replace(/\d+p\.mp4/, '360p.mp4'),
quality: 360,
},
];
if (trailers) {
release.trailer = trailers.map((trailer) => {
const src = `https:${trailer.href}`;
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
return format === 'mp4' ? { src, quality } : null;
}).filter(Boolean);
}
const stars = q('.rate-box').dataset.score;
if (stars) release.rating = { stars };
console.log(release);
return release;
}