Drastic actor page redesign. Storing one avatar per actor, other profile photos as 'photo' role; no longer assuming first photo is avatar.

This commit is contained in:
2019-11-28 05:36:22 +01:00
parent 884ef248e4
commit 4be508b388
300 changed files with 1110 additions and 213 deletions

View File

@@ -9,7 +9,7 @@ const whereOr = require('./utils/where-or');
const { createActorMediaDirectory, storeAvatars } = require('./media');
async function curateActor(actor) {
const [aliases, avatars, social] = await Promise.all([
const [aliases, photos, social] = await Promise.all([
knex('actors').where({ alias_for: actor.id }),
knex('media')
.where({ domain: 'actors', target_id: actor.id })
@@ -41,14 +41,17 @@ async function curateActor(actor) {
: null,
ethnicity: actor.ethnicity,
height: actor.height,
weight: actor.weight,
bust: actor.bust,
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
aliases: aliases.map(({ name }) => name),
slug: actor.slug,
avatars,
avatar: photos.find(photo => photo.role === 'avatar'),
photos: photos.filter(photo => photo.role === 'photo'),
social,
scrapedAt: actor.scraped_at,
};
}
@@ -97,27 +100,32 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
return curatedActor;
}
function curateSocialEntry(url, actor) {
function curateSocialEntry(url, actorId) {
const { hostname, origin, pathname } = new URL(url);
const platform = ['twitter', 'instagram', 'snapchat', 'modelhub', 'youtube'].find(platformName => hostname.match(platformName));
const platform = ['facebook', 'twitter', 'instagram', 'tumblr', 'snapchat', 'amazon', 'youtube'].find(platformName => hostname.match(platformName));
return {
url: `${origin}${pathname}`,
platform,
domain: 'actors',
target_id: actor.id,
target_id: actorId,
};
}
function curateSocialEntries(urls, actor) {
async function curateSocialEntries(urls, actorId) {
if (!urls) {
return [];
}
return urls.reduce((acc, url) => {
const socialEntry = curateSocialEntry(url, actor);
const existingSocialLinks = await knex('social').where({
domain: 'actors',
target_id: actorId,
});
if (acc.some(entry => socialEntry.url === entry.url)) {
return urls.reduce((acc, url) => {
const socialEntry = curateSocialEntry(url, actorId);
if (acc.some(entry => socialEntry.url === entry.url) || existingSocialLinks.some(entry => socialEntry.url === entry.url)) {
// prevent duplicates
return acc;
}
@@ -141,15 +149,10 @@ async function fetchActors(queryObject) {
return curateActors(releases);
}
async function storeSocialLinks(actor) {
const existingSocialLinks = await knex('social').where({
domain: 'actors',
target_id: actor.id,
});
async function storeSocialLinks(urls, actorId) {
const curatedSocialEntries = await curateSocialEntries(urls, actorId);
const newSocialLinks = actor.social.filter(url => !existingSocialLinks.some(existingLink => url === existingLink.url));
await knex('social').insert(curateSocialEntries(newSocialLinks, actor));
await knex('social').insert(curatedSocialEntries);
}
async function storeActor(actor, scraped = false, scrapeSuccess = false) {
@@ -159,7 +162,7 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) {
.insert(curatedActor)
.returning('*');
await storeSocialLinks({ ...actor, ...actorEntry });
await storeSocialLinks(actor.social, actorEntry.id);
console.log(`Added new entry for actor '${actor.name}'`);
@@ -174,7 +177,7 @@ async function updateActor(actor, scraped = false, scrapeSuccess = false) {
.update(curatedActor)
.returning('*');
await storeSocialLinks({ ...actor, ...curatedActor, ...actorEntry });
await storeSocialLinks(actor.social, actor.id);
console.log(`Updated entry for actor '${actor.name}'`);
@@ -239,7 +242,6 @@ async function scrapeActors(actorNames) {
return;
}
if (actorEntry && profile) {
await createActorMediaDirectory(profile, actorEntry);

View File

@@ -1,5 +1,7 @@
'use strict';
const Promise = require('bluebird');
const argv = require('./argv');
const knex = require('./knex');
const initServer = require('./web/server');
@@ -10,7 +12,10 @@ const { scrapeActors, scrapeBasicActors } = require('./actors');
async function init() {
if (argv.url) {
await scrapeRelease(argv.url);
await Promise.map(argv.url, async url => scrapeRelease(url), {
concurrency: 5,
});
knex.destroy();
return;

View File

@@ -36,7 +36,7 @@ const { argv } = yargs
})
.option('url', {
describe: 'Scrape scene info from URL',
type: 'string',
type: 'array',
alias: 'fetch',
})
.option('after', {

View File

@@ -239,6 +239,8 @@ async function storeAvatars(profile, actor) {
return !existingAvatars.some(avatar => file.hash === avatar.hash);
});
const hasAvatar = existingAvatars.some(avatar => avatar.role === 'avatar');
await knex('media')
.insert(newAvatars.map((file, index) => ({
path: file.filepath,
@@ -249,7 +251,7 @@ async function storeAvatars(profile, actor) {
index,
domain: 'actors',
target_id: actor.id,
role: 'avatar',
role: index === 0 && !hasAvatar ? 'avatar' : 'photo',
})));
}

View File

@@ -128,7 +128,7 @@ async function scrapeScene(html, url, site) {
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}"]`);
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
return actorLink;
}

View File

@@ -21,7 +21,17 @@ function scrapePhotos(html) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.map((photoIndex, photoElement) => $(photoElement).attr('src').replace('thumbs/', 'photos/'))
.map((photoIndex, photoElement) => {
const src = $(photoElement).attr('src');
if (src.match(/dl\d+/)) {
// thumbnail URLs containing dl02/ or dl03/ don't appear to have
// a full photo available, fall back to thumbnail
return src;
}
return src.replace('thumbs/', 'photos/');
})
.toArray();
return photos;
@@ -34,16 +44,19 @@ async function getPhotos(entryId, site, page = 1) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = scrapePhotos(html);
const pages = Number($('.page_totals').text().trim().match(/\d+$/)[0]);
const pagesString = $('.page_totals').text().trim();
const pages = pagesString.length > 0 ? Number($('.page_totals').text().trim().match(/\d+$/)[0]) : null;
const otherPhotos = await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
const pageHtml = await fetchPhotos(pageUrl);
const otherPhotos = pages
? await Promise.map(Array.from({ length: pages - 1 }), async (val, index) => {
const pageUrl = `https://www.julesjordan.com/trial/gallery.php?id=${entryId}&type=highres&page=${index + 2}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml);
}, {
concurrency: 2,
});
return scrapePhotos(pageHtml);
}, {
concurrency: 2,
})
: [];
const allPhotos = photos.concat(otherPhotos.flat());
@@ -211,11 +224,13 @@ function scrapeProfile(html, url, actorName) {
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString[0].split('-');
if (avatarEl) {
const src = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src') + 5, avatarEl.innerHTML.indexOf('set.jpg') + 7);
const src0 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0') + 6, avatarEl.innerHTML.indexOf('set.jpg') + 7);
const src1 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_1x') + 9, avatarEl.innerHTML.indexOf('1x.jpg') + 6);
const src2 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_2x') + 9, avatarEl.innerHTML.indexOf('2x.jpg') + 6);
const src3 = avatarEl.innerHTML.slice(avatarEl.innerHTML.indexOf('src0_3x') + 9, avatarEl.innerHTML.indexOf('3x.jpg') + 6);
profile.avatar = src3 || src2 || src1;
profile.avatar = src3 || src2 || src1 || src0 || src;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
@@ -242,13 +257,26 @@ async function fetchScene(url, site) {
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const url = `https://julesjordan.com/trial/models/${actorSlug}.html`;
const actorSlugA = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlugB = actorName.toLowerCase().replace(/\s+/g, '');
const res = await bhttp.get(url);
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), url, actorName);
const resA = await bhttp.get(urlA);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
return profile;
}
const resB = await bhttp.get(urlB);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
return profile;
}
return null;

View File

@@ -4,6 +4,13 @@ function feetInchesToCm(feet, inches) {
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
}
function cmToFeetInches(centimeters) {
const feet = Math.floor(centimeters / 30.48);
const inches = Math.round((centimeters / 2.54) % (feet * 12));
return { feet, inches };
}
function heightToCm(height) {
const [feet, inches] = height.match(/\d+/g);
@@ -16,8 +23,16 @@ function lbsToKg(lbs) {
return Math.round(Number(pounds) * 0.453592);
}
function kgToLbs(kgs) {
const kilos = kgs.toString().match(/\d+/)[0];
return Math.round(Number(kilos) / 0.453592);
}
module.exports = {
cmToFeetInches,
feetInchesToCm,
heightToCm,
lbsToKg,
kgToLbs,
};

View File

@@ -2,7 +2,12 @@
// pick {photoLimit} photos evenly distributed photos from a set with {photoTotal} photos, return array of indexes starting at 1
function pluckPhotos(photoTotal, photoLimit) {
return [1].concat(Array.from({ length: photoLimit - 1 }, (value, index) => Math.round((index + 1) * (photoTotal / (photoLimit - 1)))));
const plucked = [1]
.concat(
Array.from({ length: photoLimit - 1 }, (value, index) => Math.round((index + 1) * (photoTotal / (photoLimit - 1)))),
);
return Array.from(new Set(plucked)); // remove duplicates, may happen when photo total and photo limit are close
}
module.exports = pluckPhotos;