Drastic actor page redesign. Storing one avatar per actor, other profile photos as 'photo' role; no longer assuming first photo is avatar.
This commit is contained in:
@@ -9,7 +9,7 @@ const whereOr = require('./utils/where-or');
|
||||
const { createActorMediaDirectory, storeAvatars } = require('./media');
|
||||
|
||||
async function curateActor(actor) {
|
||||
const [aliases, avatars, social] = await Promise.all([
|
||||
const [aliases, photos, social] = await Promise.all([
|
||||
knex('actors').where({ alias_for: actor.id }),
|
||||
knex('media')
|
||||
.where({ domain: 'actors', target_id: actor.id })
|
||||
@@ -41,14 +41,17 @@ async function curateActor(actor) {
|
||||
: null,
|
||||
ethnicity: actor.ethnicity,
|
||||
height: actor.height,
|
||||
weight: actor.weight,
|
||||
bust: actor.bust,
|
||||
waist: actor.waist,
|
||||
hip: actor.hip,
|
||||
naturalBoobs: actor.natural_boobs,
|
||||
aliases: aliases.map(({ name }) => name),
|
||||
slug: actor.slug,
|
||||
avatars,
|
||||
avatar: photos.find(photo => photo.role === 'avatar'),
|
||||
photos: photos.filter(photo => photo.role === 'photo'),
|
||||
social,
|
||||
scrapedAt: actor.scraped_at,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -97,27 +100,32 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
|
||||
return curatedActor;
|
||||
}
|
||||
|
||||
function curateSocialEntry(url, actor) {
|
||||
function curateSocialEntry(url, actorId) {
|
||||
const { hostname, origin, pathname } = new URL(url);
|
||||
const platform = ['twitter', 'instagram', 'snapchat', 'modelhub', 'youtube'].find(platformName => hostname.match(platformName));
|
||||
const platform = ['facebook', 'twitter', 'instagram', 'tumblr', 'snapchat', 'amazon', 'youtube'].find(platformName => hostname.match(platformName));
|
||||
|
||||
return {
|
||||
url: `${origin}${pathname}`,
|
||||
platform,
|
||||
domain: 'actors',
|
||||
target_id: actor.id,
|
||||
target_id: actorId,
|
||||
};
|
||||
}
|
||||
|
||||
function curateSocialEntries(urls, actor) {
|
||||
async function curateSocialEntries(urls, actorId) {
|
||||
if (!urls) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return urls.reduce((acc, url) => {
|
||||
const socialEntry = curateSocialEntry(url, actor);
|
||||
const existingSocialLinks = await knex('social').where({
|
||||
domain: 'actors',
|
||||
target_id: actorId,
|
||||
});
|
||||
|
||||
if (acc.some(entry => socialEntry.url === entry.url)) {
|
||||
return urls.reduce((acc, url) => {
|
||||
const socialEntry = curateSocialEntry(url, actorId);
|
||||
|
||||
if (acc.some(entry => socialEntry.url === entry.url) || existingSocialLinks.some(entry => socialEntry.url === entry.url)) {
|
||||
// prevent duplicates
|
||||
return acc;
|
||||
}
|
||||
@@ -141,15 +149,10 @@ async function fetchActors(queryObject) {
|
||||
return curateActors(releases);
|
||||
}
|
||||
|
||||
async function storeSocialLinks(actor) {
|
||||
const existingSocialLinks = await knex('social').where({
|
||||
domain: 'actors',
|
||||
target_id: actor.id,
|
||||
});
|
||||
async function storeSocialLinks(urls, actorId) {
|
||||
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
|
||||
|
||||
const newSocialLinks = actor.social.filter(url => !existingSocialLinks.some(existingLink => url === existingLink.url));
|
||||
|
||||
await knex('social').insert(curateSocialEntries(newSocialLinks, actor));
|
||||
await knex('social').insert(curatedSocialEntries);
|
||||
}
|
||||
|
||||
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
@@ -159,7 +162,7 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
.insert(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
await storeSocialLinks({ ...actor, ...actorEntry });
|
||||
await storeSocialLinks(actor.social, actorEntry.id);
|
||||
|
||||
console.log(`Added new entry for actor '${actor.name}'`);
|
||||
|
||||
@@ -174,7 +177,7 @@ async function updateActor(actor, scraped = false, scrapeSuccess = false) {
|
||||
.update(curatedActor)
|
||||
.returning('*');
|
||||
|
||||
await storeSocialLinks({ ...actor, ...curatedActor, ...actorEntry });
|
||||
await storeSocialLinks(actor.social, actor.id);
|
||||
|
||||
console.log(`Updated entry for actor '${actor.name}'`);
|
||||
|
||||
@@ -239,7 +242,6 @@ async function scrapeActors(actorNames) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (actorEntry && profile) {
|
||||
await createActorMediaDirectory(profile, actorEntry);
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const Promise = require('bluebird');
|
||||
|
||||
const argv = require('./argv');
|
||||
const knex = require('./knex');
|
||||
const initServer = require('./web/server');
|
||||
@@ -10,7 +12,10 @@ const { scrapeActors, scrapeBasicActors } = require('./actors');
|
||||
|
||||
async function init() {
|
||||
if (argv.url) {
|
||||
await scrapeRelease(argv.url);
|
||||
await Promise.map(argv.url, async url => scrapeRelease(url), {
|
||||
concurrency: 5,
|
||||
});
|
||||
|
||||
knex.destroy();
|
||||
|
||||
return;
|
||||
|
||||
@@ -36,7 +36,7 @@ const { argv } = yargs
|
||||
})
|
||||
.option('url', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'string',
|
||||
type: 'array',
|
||||
alias: 'fetch',
|
||||
})
|
||||
.option('after', {
|
||||
|
||||
@@ -239,6 +239,8 @@ async function storeAvatars(profile, actor) {
|
||||
return !existingAvatars.some(avatar => file.hash === avatar.hash);
|
||||
});
|
||||
|
||||
const hasAvatar = existingAvatars.some(avatar => avatar.role === 'avatar');
|
||||
|
||||
await knex('media')
|
||||
.insert(newAvatars.map((file, index) => ({
|
||||
path: file.filepath,
|
||||
@@ -249,7 +251,7 @@ async function storeAvatars(profile, actor) {
|
||||
index,
|
||||
domain: 'actors',
|
||||
target_id: actor.id,
|
||||
role: 'avatar',
|
||||
role: index === 0 && !hasAvatar ? 'avatar' : 'photo',
|
||||
})));
|
||||
}
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ async function scrapeScene(html, url, site) {
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}"]`);
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
|
||||
return actorLink;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,17 @@ function scrapePhotos(html) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.map((photoIndex, photoElement) => $(photoElement).attr('src').replace('thumbs/', 'photos/'))
|
||||
.map((photoIndex, photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
if (src.match(/dl\d+/)) {
|
||||
// thumbnail URLs containing dl02/ or dl03/ don't appear to have
|
||||
// a full photo available, fall back to thumbnail
|
||||
return src;
|
||||
}
|
||||
|
||||
return src.replace('thumbs/', 'photos/');
|
||||
})
|
||||
.toArray();
|
||||
|
||||
return photos;
|
||||
@@ -34,16 +44,19 @@ async function getPhotos(entryId, site, page = 1) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = scrapePhotos(html);
|
||||
const pages = Number($('.page_totals').text().trim().match(/\d+$/)[0]);
|
||||
const pagesString = $('.page_totals').text().trim();
|
||||
const pages = pagesString.length > 0 ? Number($('.page_totals').text().trim().match(/\d+$/)[0]) : null;
|
||||
|
||||
const otherPhotos = await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
|
||||
@@ -211,11 +224,13 @@ function scrapeProfile(html, url, actorName) {
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
|
||||
|
||||
if (avatarEl) {
|
||||
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7);
|
||||
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
|
||||
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
|
||||
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
|
||||
|
||||
profile.avatar = src3 || src2 || src1;
|
||||
profile.avatar = src3 || src2 || src1 || src0 || src;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
@@ -242,13 +257,26 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
|
||||
const actorSlugA = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlugB = actorName.toLowerCase().replace(/\s+/g, '');
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
const resA = await bhttp.get(urlA);
|
||||
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
const resB = await bhttp.get(urlB);
|
||||
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -4,6 +4,13 @@ function feetInchesToCm(feet, inches) {
|
||||
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
|
||||
}
|
||||
|
||||
function cmToFeetInches(centimeters) {
|
||||
const feet = Math.floor(centimeters / 30.48);
|
||||
const inches = Math.round((centimeters / 2.54) % (feet * 12));
|
||||
|
||||
return { feet, inches };
|
||||
}
|
||||
|
||||
function heightToCm(height) {
|
||||
const [feet, inches] = height.match(/\d+/g);
|
||||
|
||||
@@ -16,8 +23,16 @@ function lbsToKg(lbs) {
|
||||
return Math.round(Number(pounds) * 0.453592);
|
||||
}
|
||||
|
||||
function kgToLbs(kgs) {
|
||||
const kilos = kgs.toString().match(/\d+/)[0];
|
||||
|
||||
return Math.round(Number(kilos) / 0.453592);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cmToFeetInches,
|
||||
feetInchesToCm,
|
||||
heightToCm,
|
||||
lbsToKg,
|
||||
kgToLbs,
|
||||
};
|
||||
|
||||
@@ -2,7 +2,12 @@
|
||||
|
||||
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
|
||||
function pluckPhotos(photoTotal, photoLimit) {
|
||||
return [1].concat(Array.from({ length: photoLimit - 1 }, (value, index) => Math.round((index + 1) * (photoTotal / (photoLimit - 1)))));
|
||||
const plucked = [1]
|
||||
.concat(
|
||||
Array.from({ length: photoLimit - 1 }, (value, index) => Math.round((index + 1) * (photoTotal / (photoLimit - 1)))),
|
||||
);
|
||||
|
||||
return Array.from(new Set(plucked)); // remove duplicates, may happen when photo total and photo limit are close
|
||||
}
|
||||
|
||||
module.exports = pluckPhotos;
|
||||
|
||||
Reference in New Issue
Block a user