forked from DebaucheryLibrarian/traxxx
Drastic actor page redesign. Storing one avatar per actor, other profile photos as 'photo' role; no longer assuming first photo is avatar.
This commit is contained in:
@@ -128,7 +128,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}"]`);
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
|
||||
return actorLink;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,17 @@ function scrapePhotos(html) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.map((photoIndex, photoElement) => $(photoElement).attr('src').replace('thumbs/', 'photos/'))
|
||||
.map((photoIndex, photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
if (src.match(/dl\d+/)) {
|
||||
// thumbnail URLs containing dl02/ or dl03/ don't appear to have
|
||||
// a full photo available, fall back to thumbnail
|
||||
return src;
|
||||
}
|
||||
|
||||
return src.replace('thumbs/', 'photos/');
|
||||
})
|
||||
.toArray();
|
||||
|
||||
return photos;
|
||||
@@ -34,16 +44,19 @@ async function getPhotos(entryId, site, page = 1) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = scrapePhotos(html);
|
||||
const pages = Number($('.page_totals').text().trim().match(/\d+$/)[0]);
|
||||
const pagesString = $('.page_totals').text().trim();
|
||||
const pages = pagesString.length > 0 ? Number($('.page_totals').text().trim().match(/\d+$/)[0]) : null;
|
||||
|
||||
const otherPhotos = await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
|
||||
@@ -211,11 +224,13 @@ function scrapeProfile(html, url, actorName) {
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
|
||||
|
||||
if (avatarEl) {
|
||||
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
|
||||
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
|
||||
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
|
||||
|
||||
profile.avatar = src3 || src2 || src1;
|
||||
profile.avatar = src3 || src2 || src1 || src0 || src;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
@@ -242,13 +257,26 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
|
||||
const actorSlugA = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlugB = actorName.toLowerCase().replace(/\s+/g, '');
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
const resA = await bhttp.get(urlA);
|
||||
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
const resB = await bhttp.get(urlB);
|
||||
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
Reference in New Issue
Block a user