Added avatars. Added PornHub and LegalPorno actor profile scrapers.

This commit is contained in:
ThePendulum 2019-11-20 04:53:36 +01:00
parent a13d92b84e
commit 9fcc40dd17
13 changed files with 475 additions and 63 deletions

View File

@ -9,10 +9,15 @@
<h2 class="title">{{ actor.name }}</h2> <h2 class="title">{{ actor.name }}</h2>
</div> </div>
<span class="description">{{ actor.description }}</span>
<div class="content-inner"> <div class="content-inner">
<h3 class="heading">Biography</h3> <div class="avatars">
<img
v-for="avatar in actor.avatars"
:key="`avatar-${avatar.id}`"
:src="`/media/${avatar.path}`"
class="avatar"
>
</div>
<ul class="bio"> <ul class="bio">
<li v-if="actor.aliases.length"> <li v-if="actor.aliases.length">
@ -63,7 +68,7 @@
<li v-if="actor.height"> <li v-if="actor.height">
<dfn class="bio-heading">Height</dfn> <dfn class="bio-heading">Height</dfn>
<span>{{ actor.height }}</span> <span>{{ actor.height }} cm</span>
</li> </li>
<li v-if="actor.boobSize || actor.boobsNatural"> <li v-if="actor.boobSize || actor.boobsNatural">
@ -73,6 +78,8 @@
</li> </li>
</ul> </ul>
<span class="description">{{ actor.description }}</span>
<Releases <Releases
:releases="releases" :releases="releases"
:context="actor.name" :context="actor.name"
@ -155,6 +162,19 @@ export default {
} }
} }
.description {
padding: 1rem;
}
.avatars {
padding: 1rem;
}
.avatar {
height: 20rem;
margin: 0 1rem 0 0;
}
.flag { .flag {
height: 1rem; height: 1rem;
border: solid 1px $shadow-weak; border: solid 1px $shadow-weak;

View File

@ -8,7 +8,6 @@
<div class="content-inner"> <div class="content-inner">
<div class="header"> <div class="header">
<a <a
v-if="network.url"
:href="network.url" :href="network.url"
target="_blank" target="_blank"
rel="noopener noreferrer" rel="noopener noreferrer"
@ -20,6 +19,7 @@
> >
<Icon <Icon
v-if="network.url"
icon="new-tab" icon="new-tab"
class="icon-href" class="icon-href"
/> />

View File

@ -97,8 +97,6 @@
<h2 class="row title">{{ release.title }}</h2> <h2 class="row title">{{ release.title }}</h2>
<div class="row"> <div class="row">
<Icon icon="star" />
<ul class="actors nolist"> <ul class="actors nolist">
<li <li
v-for="actor in release.actors" v-for="actor in release.actors"
@ -334,6 +332,11 @@ export default {
font-size: 1rem; font-size: 1rem;
} }
.actors {
display: flex;
flex-wrap: wrap;
}
.link { .link {
display: inline-block; display: inline-block;
color: $link; color: $link;

View File

@ -5,8 +5,21 @@
> >
<a <a
:href="`/actor/${actor.slug}`" :href="`/actor/${actor.slug}`"
class="name" class="link"
>{{ actor.name }}</a> >
<span class="name">{{ actor.name }}</span>
<img
v-if="actor.avatar"
:src="`/media/${actor.avatar}`"
class="avatar"
>
<span
v-else
class="avatar"
>No photo</span>
</a>
</div> </div>
</template> </template>
@ -27,14 +40,34 @@ export default {
.actor { .actor {
background: $background; background: $background;
display: inline-block; display: inline-block;
margin: 0 .25rem .25rem 0; margin: 0 .5rem .5rem 0;
box-shadow: 0 0 3px $shadow-weak; box-shadow: 0 0 3px $shadow-weak;
} }
.name { .link {
color: $link; color: $link;
display: inline-block;
padding: .5rem;
text-decoration: none; text-decoration: none;
text-align: center;
&:hover {
color: $primary;
}
}
.name {
display: block;
padding: .5rem;
font-weight: bold;
}
.avatar {
color: $shadow-weak;
background: $shadow-hint;
height: 12rem;
width: 10rem;
display: flex;
align-items: center;
justify-content: center;
object-fit: cover;
} }
</style> </style>

View File

@ -18,11 +18,13 @@ exports.up = knex => Promise.resolve()
table.date('birthdate'); table.date('birthdate');
table.string('gender', 18); table.string('gender', 18);
table.text('description');
table.string('birth_country_alpha2', 2) table.string('birth_country_alpha2', 2)
.references('alpha2') .references('alpha2')
.inTable('countries'); .inTable('countries');
table.string('ethnicity');
table.string('birth_place'); table.string('birth_place');
table.string('residence_country_alpha2', 2) table.string('residence_country_alpha2', 2)
@ -31,18 +33,18 @@ exports.up = knex => Promise.resolve()
table.string('residence_place'); table.string('residence_place');
table.string('ethnicity');
table.integer('height');
table.string('eyes');
table.string('boobs_size'); table.string('boobs_size');
table.boolean('boobs_natural'); table.boolean('boobs_natural');
table.string('piercings'); table.integer('height', 3);
table.string('tattoos'); table.integer('weight', 3);
table.string('eyes');
table.string('hair'); table.string('hair');
table.text('description'); table.boolean('has_tattoos');
table.boolean('active'); table.boolean('has_piercings');
table.string('piercings');
table.string('tattoos');
table.integer('alias_for', 12) table.integer('alias_for', 12)
.references('id') .references('id')
@ -217,6 +219,8 @@ exports.up = knex => Promise.resolve()
table.string('hash'); table.string('hash');
table.string('source', 1000); table.string('source', 1000);
table.unique(['domain', 'target_id', 'role', 'hash']);
table.datetime('created_at') table.datetime('created_at')
.defaultTo(knex.fn.now()); .defaultTo(knex.fn.now());
})) }))

View File

@ -249,14 +249,32 @@
.actor[data-v-6989dc6f] { .actor[data-v-6989dc6f] {
background: #fff; background: #fff;
display: inline-block; display: inline-block;
margin: 0 .25rem .25rem 0; margin: 0 .5rem .5rem 0;
box-shadow: 0 0 3px rgba(0, 0, 0, 0.2); box-shadow: 0 0 3px rgba(0, 0, 0, 0.2);
} }
.name[data-v-6989dc6f] { .link[data-v-6989dc6f] {
color: #cc4466; color: #cc4466;
display: inline-block;
padding: .5rem;
text-decoration: none; text-decoration: none;
text-align: center;
}
.link[data-v-6989dc6f]:hover {
color: #ff6c88;
}
.name[data-v-6989dc6f] {
display: block;
padding: .5rem;
font-weight: bold;
}
.avatar[data-v-6989dc6f] {
color: rgba(0, 0, 0, 0.2);
background: rgba(0, 0, 0, 0.1);
height: 12rem;
width: 10rem;
display: flex;
align-items: center;
justify-content: center;
-o-object-fit: cover;
object-fit: cover;
} }
/* $primary: #ff886c; */ /* $primary: #ff886c; */
@ -375,6 +393,10 @@
.duration-segment[data-v-2bc41e74] { .duration-segment[data-v-2bc41e74] {
font-size: 1rem; font-size: 1rem;
} }
.actors[data-v-2bc41e74] {
display: flex;
flex-wrap: wrap;
}
.link[data-v-2bc41e74] { .link[data-v-2bc41e74] {
display: inline-block; display: inline-block;
color: #cc4466; color: #cc4466;
@ -574,6 +596,16 @@
.bio-heading[data-v-677a8360]::after { .bio-heading[data-v-677a8360]::after {
content: ':'; content: ':';
} }
.description[data-v-677a8360] {
padding: 1rem;
}
.avatars[data-v-677a8360] {
padding: 1rem;
}
.avatar[data-v-677a8360] {
height: 20rem;
margin: 0 1rem 0 0;
}
.flag[data-v-677a8360] { .flag[data-v-677a8360] {
height: 1rem; height: 1rem;
border: solid 1px rgba(0, 0, 0, 0.2); border: solid 1px rgba(0, 0, 0, 0.2);

View File

@ -1,14 +1,18 @@
'use strict'; 'use strict';
const Promise = require('bluebird'); const Promise = require('bluebird');
const knex = require('./knex'); const knex = require('./knex');
const argv = require('./argv'); const argv = require('./argv');
const scrapers = require('./scrapers/scrapers'); const scrapers = require('./scrapers/scrapers');
const whereOr = require('./utils/where-or'); const whereOr = require('./utils/where-or');
const { createActorMediaDirectory, storeAvatars } = require('./media');
async function curateActor(actor) { async function curateActor(actor) {
const aliases = await knex('actors') const [aliases, avatars] = await Promise.all([
.where({ alias_for: actor.id }); knex('actors').where({ alias_for: actor.id }),
knex('media').where({ domain: 'actors', target_id: actor.id }),
]);
return { return {
id: actor.id, id: actor.id,
@ -37,6 +41,7 @@ async function curateActor(actor) {
boobsNatural: actor.boobs_natural, boobsNatural: actor.boobs_natural,
aliases: aliases.map(({ name }) => name), aliases: aliases.map(({ name }) => name),
slug: actor.slug, slug: actor.slug,
avatars,
}; };
} }
@ -60,10 +65,10 @@ function curateActorEntry(actor, scraped, scrapeSuccess) {
residence_country_alpha2: actor.residenceCountry, residence_country_alpha2: actor.residenceCountry,
birth_place: actor.birthPlace, birth_place: actor.birthPlace,
residence_place: actor.residencePlace, residence_place: actor.residencePlace,
active: actor.active,
boobs_size: actor.boobs && actor.boobs.size, boobs_size: actor.boobs && actor.boobs.size,
boobs_natural: actor.boobs && actor.boobs.natural, boobs_natural: actor.boobs && actor.boobs.natural,
height: actor.height, height: actor.height,
weight: actor.weight,
hair: actor.hair, hair: actor.hair,
eyes: actor.eyes, eyes: actor.eyes,
tattoos: actor.tattoos, tattoos: actor.tattoos,
@ -116,11 +121,11 @@ async function storeActor(actor, scraped = false, scrapeSuccess = false) {
return null; return null;
} }
async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = false) { async function updateActor(actor, scraped = false, scrapeSuccess = false) {
const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess); const curatedActor = curateActorEntry(actor, scraped, scrapeSuccess);
const actorEntries = await knex('actors') const actorEntries = await knex('actors')
.where({ id: actorEntry.id }) .where({ id: actor.id })
.update(curatedActor) .update(curatedActor)
.returning('*'); .returning('*');
@ -129,23 +134,74 @@ async function updateActor(actorEntry, actor, scraped = false, scrapeSuccess = f
return actorEntries[0]; return actorEntries[0];
} }
function mergeProfiles(profiles, actor) {
return profiles.reduce((prevProfile, profile) => {
if (profile === null) {
return prevProfile;
}
return {
id: actor.id,
name: actor.name,
gender: prevProfile.gender || profile.gender,
birthdate: prevProfile.birthdate || profile.birthdate,
residenceCountry: prevProfile.residenceCountry || profile.residenceCountry,
birthPlace: prevProfile.birthPlace || profile.birthPlace,
ethnicity: prevProfile.ethnicity || profile.ethnicity,
boobs: profile.boobs
? {
size: prevProfile.boobs.size || profile.boobs.size,
natural: prevProfile.boobs.natural || profile.boobs.natural,
}
: {},
height: prevProfile.height || profile.height,
weight: prevProfile.weight || profile.weight,
hair: prevProfile.hair || profile.hair,
eyes: prevProfile.eyes || profile.eyes,
piercings: prevProfile.piercings || profile.piercings,
tattoos: prevProfile.tattoos || profile.tattoos,
social: prevProfile.social.concat(profile.social || []),
avatars: prevProfile.avatars.concat(profile.avatar || []),
};
}, {
boobs: {},
social: [],
avatars: [],
...actor,
});
}
async function scrapeActors(actorNames) { async function scrapeActors(actorNames) {
await Promise.map(actorNames || argv.actors, async (actorName) => { await Promise.map(actorNames || argv.actors, async (actorName) => {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-'); const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const [actorEntry] = await fetchActors({ slug: actorSlug }); const actorEntry = await knex('actors').where({ slug: actorSlug }).first();
const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchActor(actorEntry ? actorEntry.name : actorName))); const profiles = await Promise.all(Object.values(scrapers.actors).map(scraper => scraper.fetchProfile(actorEntry ? actorEntry.name : actorName)));
const profile = mergeProfiles(profiles, actorEntry);
if (profiles[0] === null) { if (profile === null) {
console.log(`Could not find profile for actor '${actorName}'`); console.log(`Could not find profile for actor '${actorName}'`);
return updateActor(actorEntry, actorEntry, true, false); await updateActor(profile, true, false);
return;
} }
if (actorEntry && profiles[0]) {
return updateActor(actorEntry, profiles[0], true, true); if (actorEntry && profile) {
await createActorMediaDirectory(profile, actorEntry);
await Promise.all([
updateActor(profile, true, true),
storeAvatars(profile, actorEntry),
]);
return;
} }
return storeActor(profiles[0], true, true); const newActorEntry = await storeActor(profile, true, true);
await createActorMediaDirectory(profile, newActorEntry);
await storeAvatars(profile, newActorEntry);
}, { }, {
concurrency: 1, concurrency: 1,
}); });

View File

@ -28,10 +28,19 @@ async function getThumbnail(buffer) {
.toBuffer(); .toBuffer();
} }
async function createMediaDirectory(release, releaseId) { async function createReleaseMediaDirectory(release, releaseId) {
if (release.poster || (release.photos && release.photos.length)) { if (release.poster || (release.photos && release.photos.length)) {
await fs.mkdir( await fs.mkdir(
path.join(config.media.path, release.site.network.slug, release.site.slug, releaseId.toString()), path.join(config.media.path, 'releases', release.site.network.slug, release.site.slug, releaseId.toString()),
{ recursive: true },
);
}
}
async function createActorMediaDirectory(profile, actor) {
if (profile.avatars && profile.avatars.length) {
await fs.mkdir(
path.join(config.media.path, 'actors', actor.slug),
{ recursive: true }, { recursive: true },
); );
} }
@ -46,15 +55,16 @@ async function storePoster(release, releaseId) {
console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`); console.log(`Storing poster for (${release.site.name}, ${releaseId}) "${release.title}"`);
const res = await bhttp.get(release.poster); const res = await bhttp.get(release.poster);
const thumbnail = await getThumbnail(res.body);
if (res.statusCode === 200) { if (res.statusCode === 200) {
const thumbnail = await getThumbnail(res.body);
const { pathname } = new URL(release.poster); const { pathname } = new URL(release.poster);
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg'; const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
const extension = mime.getExtension(mimetype); const extension = mime.getExtension(mimetype);
const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`); const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster.${extension}`);
const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`); const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `poster_thumb.${extension}`);
const hash = getHash(res.body); const hash = getHash(res.body);
await Promise.all([ await Promise.all([
@ -93,13 +103,13 @@ async function storePhotos(release, releaseId) {
try { try {
const res = await bhttp.get(photoUrl); const res = await bhttp.get(photoUrl);
const thumbnail = await getThumbnail(res.body);
if (res.statusCode === 200) { if (res.statusCode === 200) {
const thumbnail = await getThumbnail(res.body);
const extension = mime.getExtension(mimetype); const extension = mime.getExtension(mimetype);
const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`); const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}.${extension}`);
const thumbpath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`); const thumbpath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `${index + 1}_thumb.${extension}`);
const hash = getHash(res.body); const hash = getHash(res.body);
await Promise.all([ await Promise.all([
@ -153,7 +163,7 @@ async function storeTrailer(release, releaseId) {
const mimetype = release.trailer.type || mime.getType(pathname); const mimetype = release.trailer.type || mime.getType(pathname);
const res = await bhttp.get(release.trailer.src); const res = await bhttp.get(release.trailer.src);
const filepath = path.join(release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`); const filepath = path.join('releases', release.site.network.slug, release.site.slug, releaseId.toString(), `trailer${release.trailer.quality ? `_${release.trailer.quality}` : ''}.${mime.getExtension(mimetype)}`);
await Promise.all([ await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), res.body), fs.writeFile(path.join(config.media.path, filepath), res.body),
@ -169,8 +179,82 @@ async function storeTrailer(release, releaseId) {
]); ]);
} }
async function storeAvatars(profile, actor) {
if (!profile.avatars || profile.avatars.length === 0) {
console.warn(`No avatars available for '${profile.name}'`);
return;
}
console.log(`Storing ${profile.avatars.length} avatars for '${profile.name}'`);
const files = await Promise.map(profile.avatars, async (avatarUrl, index) => {
const { pathname } = new URL(avatarUrl);
const mimetype = mime.getType(pathname);
try {
const res = await bhttp.get(avatarUrl);
if (res.statusCode === 200) {
const thumbnail = await getThumbnail(res.body);
const extension = mime.getExtension(mimetype);
const filepath = path.join('actors', actor.slug, `${index + 1}.${extension}`);
const thumbpath = path.join('actors', actor.slug, `${index + 1}_thumb.${extension}`);
const hash = getHash(res.body);
await Promise.all([
fs.writeFile(path.join(config.media.path, filepath), res.body),
fs.writeFile(path.join(config.media.path, thumbpath), thumbnail),
]);
return {
filepath,
thumbpath,
mimetype,
hash,
source: avatarUrl,
};
}
throw new Error(`Response ${res.statusCode} not OK`);
} catch (error) {
console.warn(`Failed to store avatar ${index + 1} for '${profile.name}'`);
return null;
}
}, {
concurrency: 2,
});
const existingAvatars = await knex('media')
.whereIn('hash', files.map(file => file.hash));
const newAvatars = files.filter((file) => {
if (!file) {
return false;
}
return !existingAvatars.some(avatar => file.hash === avatar.hash);
});
await knex('media')
.insert(newAvatars.map((file, index) => ({
path: file.filepath,
thumbnail: file.thumbpath,
mime: file.mimetype,
hash: file.hash,
source: file.source,
index,
domain: 'actors',
target_id: actor.id,
role: 'avatar',
})));
}
module.exports = { module.exports = {
createMediaDirectory, createActorMediaDirectory,
createReleaseMediaDirectory,
storeAvatars,
storePoster, storePoster,
storePhotos, storePhotos,
storeTrailer, storeTrailer,

View File

@ -7,7 +7,7 @@ const whereOr = require('./utils/where-or');
const { associateTags } = require('./tags'); const { associateTags } = require('./tags');
const { associateActors } = require('./actors'); const { associateActors } = require('./actors');
const { const {
createMediaDirectory, createReleaseMediaDirectory,
storePoster, storePoster,
storePhotos, storePhotos,
storeTrailer, storeTrailer,
@ -16,16 +16,26 @@ const {
async function curateRelease(release) { async function curateRelease(release) {
const [actors, tags, media] = await Promise.all([ const [actors, tags, media] = await Promise.all([
knex('actors_associated') knex('actors_associated')
.select('actors.id', 'actors.name', 'actors.gender', 'actors.slug') .select('actors.id', 'actors.name', 'actors.gender', 'actors.slug', 'media.thumbnail as avatar')
.where({ release_id: release.id }) .where({ release_id: release.id })
.leftJoin('actors', 'actors.id', 'actors_associated.actor_id'), .leftJoin('actors', 'actors.id', 'actors_associated.actor_id')
.leftJoin('media', (builder) => {
builder
.on('media.target_id', 'actors.id')
.andOnVal('media.domain', 'actors')
.andOnVal('media.index', '0');
})
.orderBy('actors.gender'),
knex('tags_associated') knex('tags_associated')
.select('tags.name', 'tags.slug') .select('tags.name', 'tags.slug')
.where({ release_id: release.id }) .where({ release_id: release.id })
.leftJoin('tags', 'tags.id', 'tags_associated.tag_id') .leftJoin('tags', 'tags.id', 'tags_associated.tag_id')
.orderBy('tags.priority', 'desc'), .orderBy('tags.priority', 'desc'),
knex('media') knex('media')
.where({ target_id: release.id }) .where({
target_id: release.id,
domain: 'releases',
})
.orderBy('role'), .orderBy('role'),
]); ]);
@ -184,7 +194,7 @@ async function fetchTagReleases(queryObject, options = {}) {
} }
async function storeReleaseAssets(release, releaseId) { async function storeReleaseAssets(release, releaseId) {
await createMediaDirectory(release, releaseId); await createReleaseMediaDirectory(release, releaseId);
await Promise.all([ await Promise.all([
associateActors(release, releaseId), associateActors(release, releaseId),

View File

@ -7,7 +7,7 @@ const moment = require('moment');
const knex = require('../knex'); const knex = require('../knex');
async function scrapeActorFrontpage(html, url, name) { async function scrapeProfileFrontpage(html, url, name) {
const { document } = new JSDOM(html).window; const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list'); const bioEl = document.querySelector('.dashboard-bio-list');
@ -26,7 +26,6 @@ async function scrapeActorFrontpage(html, url, name) {
const boobsSizeString = bio['Measurements:']; const boobsSizeString = bio['Measurements:'];
const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString; const boobsSize = boobsSizeString === '??-??-??' ? null : boobsSizeString;
const boobsNatural = bio['Fake Boobs:'] === 'No'; const boobsNatural = bio['Fake Boobs:'] === 'No';
const active = bio['Career Status:'].trim() === 'Active';
const residenceCountryName = bio['Country of Origin:']; const residenceCountryName = bio['Country of Origin:'];
const countryEntry = await knex('countries').where({ name: residenceCountryName }).first(); const countryEntry = await knex('countries').where({ name: residenceCountryName }).first();
@ -59,14 +58,13 @@ async function scrapeActorFrontpage(html, url, name) {
eyes, eyes,
piercings, piercings,
tattoos, tattoos,
active,
social, social,
}, },
url: bioUrl, url: bioUrl,
}; };
} }
async function scrapeActorBio(html, frontpageBio, url, name) { async function scrapeProfileBio(html, frontpageBio, url, name) {
const { document } = new JSDOM(html).window; const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable'); const bioEl = document.querySelector('#biographyTable');
@ -92,6 +90,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
const hair = bio['Hair Color:'].toLowerCase(); const hair = bio['Hair Color:'].toLowerCase();
const eyes = bio['Eye Color:'].toLowerCase(); const eyes = bio['Eye Color:'].toLowerCase();
const height = Number(bio['Height:'].match(/\d+/)[0]);
const weight = Number(bio['Weight:'].match(/\d+/)[0]);
const piercingsString = bio['Piercings:']; const piercingsString = bio['Piercings:'];
const piercings = piercingsString === 'None' ? null : piercingsString; const piercings = piercingsString === 'None' ? null : piercingsString;
@ -113,6 +113,8 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
size: boobsSize, size: boobsSize,
natural: boobsNatural, natural: boobsNatural,
}, },
height,
weight,
hair, hair,
eyes, eyes,
piercings, piercings,
@ -121,23 +123,34 @@ async function scrapeActorBio(html, frontpageBio, url, name) {
}; };
} }
async function fetchActor(actorName) { async function fetchProfile(actorName) {
const slug = actorName.replace(' ', '_'); const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://freeones.com/html/v_links/${slug}`; const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl); const resFrontpage = await bhttp.get(frontpageUrl);
if (resFrontpage.statusCode === 200) { if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeActorFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName); const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url); const resBio = await bhttp.get(url);
return scrapeActorBio(resBio.body.toString(), bio, url, actorName); return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
}
// apparently some actors are appended 'Babe' as their surname...
const fallbackSlug = `${slug}_Babe`;
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
const resFallback = await bhttp.get(fallbackUrl);
if (resFallback.statusCode === 200) {
const { url, bio } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
} }
return null; return null;
} }
module.exports = { module.exports = {
fetchActor, fetchProfile,
}; };

View File

@ -1,6 +1,7 @@
'use strict'; 'use strict';
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
const knex = require('../knex'); const knex = require('../knex');
@ -69,6 +70,31 @@ function scrapeLatest(html, site) {
}); });
} }
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const profile = {
name: actorName,
};
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
const birthCountryName = bio.Nationality;
if (birthCountryName) {
const countryEntry = await knex('countries').where({ name: birthCountryName }).first();
if (countryEntry) profile.birthCountry = countryEntry.alpha2;
}
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
}
async function scrapeScene(html, url, site, useGallery) { async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new VideoPlayer")').html(); const playerObject = $('script:contains("new VideoPlayer")').html();
@ -145,7 +171,24 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site, useGallery); return scrapeScene(res.body.toString(), url, site, useGallery);
} }
async function fetchProfile(actorName) {
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const result = data.terms.find(item => item.type === 'model');
if (result) {
const bioRes = await bhttp.get(result.url);
const html = bioRes.body.toString();
return scrapeProfile(html, result.url, actorName);
}
return null;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile,
fetchScene, fetchScene,
}; };

109
src/scrapers/pornhub.js Normal file
View File

@ -0,0 +1,109 @@
'use strict';
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const knex = require('../knex');
const ethnicityMap = {
White: 'Caucasian',
};
const hairMap = {
Brunette: 'brown',
};
const countryMap = {
'United States of America': 'United States',
};
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
const profile = {
name: actorName,
boobs: {},
};
const descriptionString = document.querySelector('div[itemprop="description"]');
const birthPlaceString = bio['Birth Place'] || bio.Birthplace;
const residencePlaceString = bio['City and Country'];
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (descriptionString) profile.description = descriptionString.textContent;
if (bio.Birthday) bio.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate();
if (bio.Born) bio.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
if (birthPlaceString) {
const birthPlaceSegments = birthPlaceString.split(',');
const birthCountryName = birthPlaceSegments.slice(-1)[0].trim();
const birthCountryEntry = await knex('countries').where('name', countryMap[birthCountryName] || birthCountryName).first();
profile.birthPlace = birthPlaceSegments.slice(0, -1).join(',').trim();
profile.birthCountry = birthCountryEntry ? birthCountryEntry.alpha2 : null;
}
if (residencePlaceString) {
const residencePlaceSegments = residencePlaceString.split(',');
const residenceCountryAlpha2 = residencePlaceSegments.slice(-1)[0].trim();
const residenceCountryEntry = await knex('countries').where('alpha2', residenceCountryAlpha2).first();
profile.residencePlace = residencePlaceSegments.slice(0, -1).join(',').trim();
profile.residenceCountry = residenceCountryEntry ? residenceCountryEntry.alpha2 : null;
}
if (bio.Measurements && bio.Measurements !== '--') profile.boobs.size = bio.Measurements;
if (bio['Fake Boobs']) profile.boobs.natural = bio['Fake Boobs'] === 'No';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.piercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.tattoos = bio.tattoos === 'Yes';
if (avatarEl) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
return profile;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
/* Model pages are not reliably associated with actual porn stars
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const [modelRes, pornstarRes] = await Promise.all([
bhttp.get(modelUrl),
bhttp.get(pornstarUrl),
]);
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
const pornstar = pornstarRes.statusCode === 200 && await scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
if (model && pornstar) {
return {
...model,
...pornstar,
};
}
*/
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const pornstarRes = await bhttp.get(pornstarUrl);
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
}
module.exports = {
fetchProfile,
};

View File

@ -10,7 +10,6 @@ const dogfart = require('./dogfart');
const evilangel = require('./evilangel'); const evilangel = require('./evilangel');
const julesjordan = require('./julesjordan'); const julesjordan = require('./julesjordan');
const kink = require('./kink'); const kink = require('./kink');
const legalporno = require('./legalporno');
const mikeadriano = require('./mikeadriano'); const mikeadriano = require('./mikeadriano');
const mofos = require('./mofos'); const mofos = require('./mofos');
const pervcity = require('./pervcity'); const pervcity = require('./pervcity');
@ -20,8 +19,12 @@ const realitykings = require('./realitykings');
const vixen = require('./vixen'); const vixen = require('./vixen');
const xempire = require('./xempire'); const xempire = require('./xempire');
// actors // releases and profiles
const legalporno = require('./legalporno');
// profiles
const freeones = require('./freeones'); const freeones = require('./freeones');
const pornhub = require('./pornhub');
module.exports = { module.exports = {
releases: { releases: {
@ -47,5 +50,7 @@ module.exports = {
}, },
actors: { actors: {
freeones, freeones,
legalporno,
pornhub,
}, },
}; };