Added Bang! actor scraper. Fixed date interpolation. Showing date and age of death on profile (only if actor has already died).

This commit is contained in:
ThePendulum 2020-05-17 04:59:09 +02:00
parent 7f86399033
commit f42ca7bd52
10 changed files with 183 additions and 18 deletions

View File

@ -58,15 +58,24 @@
<ul class="bio nolist">
<li
v-if="actor.birthdate"
v-if="actor.dateOfBirth"
class="bio-item"
>
<dfn class="bio-label"><Icon icon="cake" />Birthdate</dfn>
<dfn class="bio-label"><Icon icon="cake" />Date of birth</dfn>
<span
v-if="actor.birthdate"
class="birthdate"
>{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }}<span class="age">{{ actor.age }}</span></span>
<span class="birthdate">{{ formatDate(actor.dateOfBirth, 'MMMM D, YYYY') }}<span
v-if="!actor.dateOfDeath"
class="age"
>{{ actor.age }}</span></span>
</li>
<li
v-if="actor.dateOfDeath"
class="bio-item"
>
<dfn class="bio-label"><Icon icon="christian-cross" />Date of death</dfn>
<span class="birthdate">{{ formatDate(actor.dateOfDeath, 'MMMM D, YYYY') }}<span class="age">{{ actor.ageAtDeath }}</span></span>
</li>
<li

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
id="svg6"
viewBox="0 0 19.041 23.969"
height="23.969"
width="19.041"
version="1.1">
<metadata
id="metadata12">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title>cross</dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<defs
id="defs10" />
<title
id="title2">cross</title>
<path
id="path4"
d="m 0,6.958 v 5.021 h 7.041 v 11.99 h 5.042 v -11.99 h 6.958 V 6.958 H 12.083 V 0 H 7.041 v 6.958 z" />
</svg>

After

Width:  |  Height:  |  Size: 896 B

View File

@ -76,8 +76,10 @@ function initActorActions(store, _router) {
name
slug
gender
birthdate: dateOfBirth
dateOfBirth
dateOfDeath
age
ageAtDeath
ethnicity
cup
bust
@ -233,7 +235,7 @@ function initActorActions(store, _router) {
name
slug
age
birthdate: dateOfBirth
dateOfBirth
gender
network {
id

View File

@ -41,7 +41,11 @@ function getMostFrequentDate(dates) {
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
if (year && month && date) {
return moment({ year, month, date }).toDate();
}
return null;
}
function getLongest(items) {
@ -201,10 +205,6 @@ async function curateProfile(profile) {
curatedProfile.releases = toBaseReleases(profile.releases);
if (argv.inspect) {
console.log(curatedProfile);
}
return curatedProfile;
} catch (error) {
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
@ -255,6 +255,8 @@ async function interpolateProfiles(actors) {
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
console.log(valuesByProperty.date_of_birth, profile.date_of_birth);
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
@ -437,10 +439,17 @@ async function scrapeActors(actorNames) {
);
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
if (argv.inspect) {
console.log(profiles);
}
if (argv.save) {
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
}
}
async function getOrCreateActors(baseActors, batchId) {

View File

@ -49,7 +49,7 @@ const { argv } = yargs
.option('profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'bios',
alias: 'with-actors',
default: false,
})
.option('scene', {

View File

@ -2,7 +2,9 @@
const bhttp = require('bhttp');
const { post } = require('../utils/http');
const slugify = require('../utils/slugify');
const { inchesToCm } = require('../utils/convert');
const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
@ -81,6 +83,41 @@ function scrapeLatest(scenes, site) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
}
function scrapeProfile(actor) {
const profile = {};
profile.aliases = actor.aliases;
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
// TODO: get releases
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
@ -181,7 +218,48 @@ async function fetchScene(url, site) {
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile(actorName) {
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
Authorization: `Basic ${authKey}`,
}, { encodeJSON: true });
if (res.ok) {
const actor = res.body.hits.hits.find(hit => hit._source.name === actorName);
return scrapeProfile(actor._source);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@ -4,6 +4,7 @@
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
// const bhttp = require('bhttp');
const { get } = require('../utils/http');
@ -233,7 +234,26 @@ async function fetchScene(url, site) {
return res.code;
}
/* API protected
async function fetchProfile(actorName, scraperSlug, site) {
const session = bhttp.session();
await session.get(`https://tour.${site.slug}.com`);
const url = `https://tour.${site.slug}.com/search-preview`;
const res = await session.post(url, { q: actorName }, {
headers: {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
origin: `https://tour.${site.slug}.com`,
},
});
console.log(res.body.toString());
}
*/
module.exports = {
fetchLatest,
// fetchProfile,
fetchScene,
};

View File

@ -132,6 +132,7 @@ module.exports = {
},
actors: {
'21sextury': sextury,
allanal: mikeadriano,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
@ -140,6 +141,7 @@ module.exports = {
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bang,
bangbros,
blacked: vixen,
blackedraw: vixen,
@ -184,6 +186,7 @@ module.exports = {
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
nympho: mikeadriano,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
@ -195,8 +198,10 @@ module.exports = {
score,
seehimfuck: hush,
sexyhub: mindgeek,
swallowed: mikeadriano,
thatsitcomshow: nubiles,
transangels,
trueanal: mikeadriano,
tushy: vixen,
tushyraw: vixen,
twistys,

View File

@ -2,6 +2,7 @@
const config = require('config');
const argv = require('./argv');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const slugify = require('./utils/slugify');
@ -235,7 +236,10 @@ async function storeReleases(releases) {
// media is more error-prone, associate separately
await associateReleaseMedia(releasesWithId);
if (argv.withActors) {
await scrapeActors(actors.map(actor => actor.name));
}
logger.info(`Stored ${storedReleaseEntries.length} releases`);

View File

@ -13,6 +13,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
extend type Actor {
age: Int @requires(columns: ["dateOfBirth"])
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
height(units:Units): String @requires(columns: ["height"])
weight(units:Units): String @requires(columns: ["weight"])
}
@ -24,6 +25,11 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
return moment().diff(parent.dateOfBirth, 'years');
},
ageAtDeath(parent, _args, _context, _info) {
if (!parent.dateOfDeath) return null;
return moment(parent.dateOfDeath).diff(parent.dateOfBirth, 'years');
},
height(parent, args, _context, _info) {
if (!parent.height) return null;