Drastic actor page redesign. Storing one avatar per actor, other profile photos as 'photo' role; no longer assuming first photo is avatar.

This commit is contained in:
2019-11-28 05:36:22 +01:00
parent 884ef248e4
commit 4be508b388
300 changed files with 1110 additions and 213 deletions

View File

@@ -128,7 +128,7 @@ async function scrapeScene(html, url, site) {
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}"]`);
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
return actorLink;
}

View File

@@ -21,7 +21,17 @@ function scrapePhotos(html) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.map((photoIndex, photoElement) => $(photoElement).attr('src').replace('thumbs/', 'photos/'))
.map((photoIndex, photoElement) => {
const src = $(photoElement).attr('src');
if (src.match(/dl\d+/)) {
// thumbnail URLs containing dl02/ or dl03/ don't appear to have
// a full photo available, fall back to thumbnail
return src;
}
return src.replace('thumbs/', 'photos/');
})
.toArray();
return photos;
@@ -34,16 +44,19 @@ async function getPhotos(entryId, site, page = 1) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = scrapePhotos(html);
const pages = Number($('.page_totals').text().trim().match(/\d+$/)[0]);
const pagesString = $('.page_totals').text().trim();
const pages = pagesString.length > 0 ? Number($('.page_totals').text().trim().match(/\d+$/)[0]) : null;
const otherPhotos = await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
const pageHtml = await fetchPhotos(pageUrl);
const otherPhotos = pages
? await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml);
}, {
concurrency: 2,
});
return scrapePhotos(pageHtml);
}, {
concurrency: 2,
})
: [];
const allPhotos = photos.concat(otherPhotos.flat());
@@ -211,11 +224,13 @@ function scrapeProfile(html, url, actorName) {
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
if (avatarEl) {
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7);
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7);
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
profile.avatar = src3 || src2 || src1;
profile.avatar = src3 || src2 || src1 || src0 || src;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
@@ -242,13 +257,26 @@ async function fetchScene(url, site) {
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
const actorSlugA = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlugB = actorName.toLowerCase().replace(/\s+/g, '');
const res = await bhttp.get(url);
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), url, actorName);
const resA = await bhttp.get(urlA);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
return profile;
}
const resB = await bhttp.get(urlB);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
return profile;
}
return null;