Added Bang! actor scraper. Fixed date interpolation. Showing date and age of death on profile (only if actor has already died).

This commit is contained in:
ThePendulum 2020-05-17 04:59:09 +02:00
parent 7f86399033
commit f42ca7bd52
10 changed files with 183 additions and 18 deletions

View File

@ -58,15 +58,24 @@
<ul class="bio nolist"> <ul class="bio nolist">
<li <li
v-if="actor.birthdate" v-if="actor.dateOfBirth"
class="bio-item" class="bio-item"
> >
<dfn class="bio-label"><Icon icon="cake" />Birthdate</dfn> <dfn class="bio-label"><Icon icon="cake" />Date of birth</dfn>
<span <span class="birthdate">{{ formatDate(actor.dateOfBirth, 'MMMM D, YYYY') }}<span
v-if="actor.birthdate" v-if="!actor.dateOfDeath"
class="birthdate" class="age"
>{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }}<span class="age">{{ actor.age }}</span></span> >{{ actor.age }}</span></span>
</li>
<li
v-if="actor.dateOfDeath"
class="bio-item"
>
<dfn class="bio-label"><Icon icon="christian-cross" />Date of death</dfn>
<span class="birthdate">{{ formatDate(actor.dateOfDeath, 'MMMM D, YYYY') }}<span class="age">{{ actor.ageAtDeath }}</span></span>
</li> </li>
<li <li

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
id="svg6"
viewBox="0 0 19.041 23.969"
height="23.969"
width="19.041"
version="1.1">
<metadata
id="metadata12">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title>cross</dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs10" />
<title
id="title2">cross</title>
<path
id="path4"
d="m 0,6.958 v 5.021 h 7.041 v 11.99 h 5.042 v -11.99 h 6.958 V 6.958 H 12.083 V 0 H 7.041 v 6.958 z" />
</svg>

After

Width:  |  Height:  |  Size: 896 B

View File

@ -76,8 +76,10 @@ function initActorActions(store, _router) {
name name
slug slug
gender gender
birthdate: dateOfBirth dateOfBirth
dateOfDeath
age age
ageAtDeath
ethnicity ethnicity
cup cup
bust bust
@ -233,7 +235,7 @@ function initActorActions(store, _router) {
name name
slug slug
age age
birthdate: dateOfBirth dateOfBirth
gender gender
network { network {
id id

View File

@ -41,7 +41,11 @@ function getMostFrequentDate(dates) {
const month = getMostFrequent(dates.map(dateX => dateX.getMonth())); const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
const date = getMostFrequent(dates.map(dateX => dateX.getDate())); const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
return moment({ year, month, date }).toDate(); if (year && month && date) {
return moment({ year, month, date }).toDate();
}
return null;
} }
function getLongest(items) { function getLongest(items) {
@ -201,10 +205,6 @@ async function curateProfile(profile) {
curatedProfile.releases = toBaseReleases(profile.releases); curatedProfile.releases = toBaseReleases(profile.releases);
if (argv.inspect) {
console.log(curatedProfile);
}
return curatedProfile; return curatedProfile;
} catch (error) { } catch (error) {
logger.error(`Failed to curate '${profile.name}': ${error.message}`); logger.error(`Failed to curate '${profile.name}': ${error.message}`);
@ -255,6 +255,8 @@ async function interpolateProfiles(actors) {
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth); profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death); profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
console.log(valuesByProperty.date_of_birth, profile.date_of_birth);
profile.birth_city = getMostFrequent(valuesByProperty.birth_city); profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
profile.birth_state = getMostFrequent(valuesByProperty.birth_state); profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2); profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
@ -437,10 +439,17 @@ async function scrapeActors(actorNames) {
); );
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile))); const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds); if (argv.inspect) {
await interpolateProfiles(actors); console.log(profiles);
}
if (argv.save) {
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
}
} }
async function getOrCreateActors(baseActors, batchId) { async function getOrCreateActors(baseActors, batchId) {

View File

@ -49,7 +49,7 @@ const { argv } = yargs
.option('profiles', { .option('profiles', {
describe: 'Scrape profiles for new actors after fetching scenes', describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean', type: 'boolean',
alias: 'bios', alias: 'with-actors',
default: false, default: false,
}) })
.option('scene', { .option('scene', {

View File

@ -2,7 +2,9 @@
const bhttp = require('bhttp'); const bhttp = require('bhttp');
const { post } = require('../utils/http');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
const { inchesToCm } = require('../utils/convert');
const clusterId = '617fb597b659459bafe6472470d9073a'; const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI='; const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
@ -81,6 +83,41 @@ function scrapeLatest(scenes, site) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, site)); return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
} }
function scrapeProfile(actor) {
const profile = {};
profile.aliases = actor.aliases;
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
// TODO: get releases
return profile;
}
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, { const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50, size: 50,
@ -181,7 +218,48 @@ async function fetchScene(url, site) {
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
} }
async function fetchProfile(actorName) {
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
Authorization: `Basic ${authKey}`,
}, { encodeJSON: true });
if (res.ok) {
const actor = res.body.hits.hits.find(hit => hit._source.name === actorName);
return scrapeProfile(actor._source);
}
return res.status;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchProfile,
fetchScene, fetchScene,
}; };

View File

@ -4,6 +4,7 @@
const { JSDOM } = require('jsdom'); const { JSDOM } = require('jsdom');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const moment = require('moment'); const moment = require('moment');
// const bhttp = require('bhttp');
const { get } = require('../utils/http'); const { get } = require('../utils/http');
@ -233,7 +234,26 @@ async function fetchScene(url, site) {
return res.code; return res.code;
} }
/* API protected
async function fetchProfile(actorName, scraperSlug, site) {
const session = bhttp.session();
await session.get(`https://tour.${site.slug}.com`);
const url = `https://tour.${site.slug}.com/search-preview`;
const res = await session.post(url, { q: actorName }, {
headers: {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
origin: `https://tour.${site.slug}.com`,
},
});
console.log(res.body.toString());
}
*/
module.exports = { module.exports = {
fetchLatest, fetchLatest,
// fetchProfile,
fetchScene, fetchScene,
}; };

View File

@ -132,6 +132,7 @@ module.exports = {
}, },
actors: { actors: {
'21sextury': sextury, '21sextury': sextury,
allanal: mikeadriano,
analbbc: fullpornnetwork, analbbc: fullpornnetwork,
analized: fullpornnetwork, analized: fullpornnetwork,
analviolation: fullpornnetwork, analviolation: fullpornnetwork,
@ -140,6 +141,7 @@ module.exports = {
babes, babes,
baddaddypov: fullpornnetwork, baddaddypov: fullpornnetwork,
bamvisions, bamvisions,
bang,
bangbros, bangbros,
blacked: vixen, blacked: vixen,
blackedraw: vixen, blackedraw: vixen,
@ -184,6 +186,7 @@ module.exports = {
nubilefilms: nubiles, nubilefilms: nubiles,
nubiles, nubiles,
nubilesporn: nubiles, nubilesporn: nubiles,
nympho: mikeadriano,
onlyprince: fullpornnetwork, onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork, pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps, pimpxxx: cherrypimps,
@ -195,8 +198,10 @@ module.exports = {
score, score,
seehimfuck: hush, seehimfuck: hush,
sexyhub: mindgeek, sexyhub: mindgeek,
swallowed: mikeadriano,
thatsitcomshow: nubiles, thatsitcomshow: nubiles,
transangels, transangels,
trueanal: mikeadriano,
tushy: vixen, tushy: vixen,
tushyraw: vixen, tushyraw: vixen,
twistys, twistys,

View File

@ -2,6 +2,7 @@
const config = require('config'); const config = require('config');
const argv = require('./argv');
const logger = require('./logger')(__filename); const logger = require('./logger')(__filename);
const knex = require('./knex'); const knex = require('./knex');
const slugify = require('./utils/slugify'); const slugify = require('./utils/slugify');
@ -235,7 +236,10 @@ async function storeReleases(releases) {
// media is more error-prone, associate separately // media is more error-prone, associate separately
await associateReleaseMedia(releasesWithId); await associateReleaseMedia(releasesWithId);
await scrapeActors(actors.map(actor => actor.name));
if (argv.withActors) {
await scrapeActors(actors.map(actor => actor.name));
}
logger.info(`Stored ${storedReleaseEntries.length} releases`); logger.info(`Stored ${storedReleaseEntries.length} releases`);

View File

@ -13,6 +13,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
extend type Actor { extend type Actor {
age: Int @requires(columns: ["dateOfBirth"]) age: Int @requires(columns: ["dateOfBirth"])
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
height(units:Units): String @requires(columns: ["height"]) height(units:Units): String @requires(columns: ["height"])
weight(units:Units): String @requires(columns: ["weight"]) weight(units:Units): String @requires(columns: ["weight"])
} }
@ -24,6 +25,11 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
return moment().diff(parent.dateOfBirth, 'years'); return moment().diff(parent.dateOfBirth, 'years');
}, },
ageAtDeath(parent, _args, _context, _info) {
if (!parent.dateOfDeath) return null;
return moment(parent.dateOfDeath).diff(parent.dateOfBirth, 'years');
},
height(parent, args, _context, _info) { height(parent, args, _context, _info) {
if (!parent.height) return null; if (!parent.height) return null;