forked from DebaucheryLibrarian/traxxx
Fixed qu issues. Fixed media issues. Simplified and expanded date component in search query.
This commit is contained in:
@@ -5,6 +5,7 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
@@ -105,7 +106,10 @@ function scrapeScene(html, url, _site) {
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
release.channel = channel === 'bangcasting' ? 'bangbroscasting' : channel;
|
||||
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
else release.channel = channel;
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -123,8 +127,8 @@ function scrapeProfile(html) {
|
||||
}
|
||||
|
||||
function scrapeProfileSearch(html, actorName) {
|
||||
const { q } = ex(html);
|
||||
const actorLink = q(`a[title="${actorName}"]`, 'href');
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
}
|
||||
@@ -145,7 +149,7 @@ async function fetchUpcoming(site) {
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
if (!release?.date) {
|
||||
throw new Error(`Cannot fetch Bang Bros scenes from argument URL, as scene pages do not have release dates: ${url}`);
|
||||
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
|
||||
@@ -5,11 +5,11 @@ const bhttp = require('bhttp');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q, qa, qd, qi, qus } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const profile = {};
|
||||
|
||||
const bio = qa('.infobox tr[valign="top"]')
|
||||
.map(detail => qa(detail, 'td', true))
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map(detail => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
|
||||
|
||||
@@ -19,9 +19,9 @@ function scrapeProfile(html) {
|
||||
profile.gender = isTrans ? 'transsexual' : 'female';
|
||||
*/
|
||||
|
||||
profile.birthdate = qd('.bday', 'YYYY-MM-DD');
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
|
||||
profile.description = q('#mw-content-text > p', true);
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
@@ -62,7 +62,7 @@ function scrapeProfile(html) {
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
|
||||
const avatarThumbPath = qi('.image img');
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
@@ -73,7 +73,7 @@ function scrapeProfile(html) {
|
||||
};
|
||||
}
|
||||
|
||||
profile.social = qus('.infobox a.external');
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -27,26 +27,26 @@ function scrapeAll(html, site, origin) {
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const { q, qa, qd, qm, qp, qus } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
|
||||
release.title = qm('itemprop=name');
|
||||
release.description = q('.descr-box p', true);
|
||||
release.date = qd('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
|| qd('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
||||
release.title = qu.meta('itemprop=name');
|
||||
release.description = qu.q('.descr-box p', true);
|
||||
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
||||
|
||||
release.actors = qa('.pornstar-card > a', 'title');
|
||||
release.tags = qa('.tags-tab .tags a', true);
|
||||
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||
release.tags = qu.all('.tags-tab .tags a', true);
|
||||
|
||||
release.duration = parseInt(q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(q('.icon-like-red + span', true));
|
||||
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||
|
||||
release.poster = qp();
|
||||
release.photos = qus('.photo-slider-guest .card a');
|
||||
release.poster = qu.poster();
|
||||
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||
|
||||
release.trailer = qa('source[type="video/mp4"]').map(trailer => ({
|
||||
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||
src: trailer.src,
|
||||
quality: Number(trailer.attributes.res.value),
|
||||
}));
|
||||
@@ -72,10 +72,10 @@ async function fetchActorReleases(urls) {
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { q, qa, qus } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
|
||||
const keys = qa('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
|
||||
const values = qa('.about-info').map((el) => {
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
}
|
||||
@@ -96,7 +96,7 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = q('.description-box', true);
|
||||
profile.description = qu.q('.description-box', true);
|
||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
@@ -118,10 +118,10 @@ async function scrapeProfile(html, _url, actorName) {
|
||||
|
||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||
|
||||
const avatarEl = q('.pornstar-details .card-img-top');
|
||||
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qus('.find-me-tab li a'));
|
||||
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -101,22 +101,24 @@ function scrapeScene(html, url, site) {
|
||||
}
|
||||
|
||||
async function fetchActorReleases(url) {
|
||||
const { qus } = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return qus('.contain-block:not(.live-scenes) .scene-item > a:first-child'); // live scenes repeat on all pages
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { q, qus } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
profile.description = q('.bio_about_text', true);
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
|
||||
const avatar = q('img.performer-pic', 'src');
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
const releases = qus('.scene-item > a:first-child');
|
||||
const otherPages = qus('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
|
||||
@@ -58,7 +58,7 @@ function scrapeAll(scenes, site, origin) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(qu, url, site) {
|
||||
async function scrapeScene({ qu }, url, site) {
|
||||
const release = {};
|
||||
|
||||
const { origin, pathname } = new URL(url);
|
||||
|
||||
@@ -116,23 +116,23 @@ function scrapeLatest(html) {
|
||||
}
|
||||
|
||||
function scrapeScene(html, url) {
|
||||
const { q, qa, qd, qus, ql, qm } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = q('link[rel="canonical"]').href.match(/\d+/);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
|
||||
release.title = qm('meta[property="og:title"]') || q('.video-page-header h1', true);
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
|
||||
release.description = qm('meta[property="og:description"]') || q('.info-video-description', true);
|
||||
release.date = qd('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = ql('.info-video-details li:nth-child(2) span');
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
|
||||
release.actors = qa('.info-video-models a', true);
|
||||
release.tags = qa('.info-video-category a', true);
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
|
||||
release.photos = qus('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qm('meta[property="og:image"');
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"');
|
||||
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
|
||||
Reference in New Issue
Block a user