Added Bang! actor scraper. Fixed date interpolation. Showing date and age of death on profile (only if actor has already died).
This commit is contained in:
parent
7f86399033
commit
f42ca7bd52
|
@ -58,15 +58,24 @@
|
|||
|
||||
<ul class="bio nolist">
|
||||
<li
|
||||
v-if="actor.birthdate"
|
||||
v-if="actor.dateOfBirth"
|
||||
class="bio-item"
|
||||
>
|
||||
<dfn class="bio-label"><Icon icon="cake" />Birthdate</dfn>
|
||||
<dfn class="bio-label"><Icon icon="cake" />Date of birth</dfn>
|
||||
|
||||
<span
|
||||
v-if="actor.birthdate"
|
||||
class="birthdate"
|
||||
>{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }}<span class="age">{{ actor.age }}</span></span>
|
||||
<span class="birthdate">{{ formatDate(actor.dateOfBirth, 'MMMM D, YYYY') }}<span
|
||||
v-if="!actor.dateOfDeath"
|
||||
class="age"
|
||||
>{{ actor.age }}</span></span>
|
||||
</li>
|
||||
|
||||
<li
|
||||
v-if="actor.dateOfDeath"
|
||||
class="bio-item"
|
||||
>
|
||||
<dfn class="bio-label"><Icon icon="christian-cross" />Date of death</dfn>
|
||||
|
||||
<span class="birthdate">{{ formatDate(actor.dateOfDeath, 'MMMM D, YYYY') }}<span class="age">{{ actor.ageAtDeath }}</span></span>
|
||||
</li>
|
||||
|
||||
<li
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
id="svg6"
|
||||
viewBox="0 0 19.041 23.969"
|
||||
height="23.969"
|
||||
width="19.041"
|
||||
version="1.1">
|
||||
<metadata
|
||||
id="metadata12">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title>cross</dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<defs
|
||||
id="defs10" />
|
||||
<title
|
||||
id="title2">cross</title>
|
||||
<path
|
||||
id="path4"
|
||||
d="m 0,6.958 v 5.021 h 7.041 v 11.99 h 5.042 v -11.99 h 6.958 V 6.958 H 12.083 V 0 H 7.041 v 6.958 z" />
|
||||
</svg>
|
After Width: | Height: | Size: 896 B |
|
@ -76,8 +76,10 @@ function initActorActions(store, _router) {
|
|||
name
|
||||
slug
|
||||
gender
|
||||
birthdate: dateOfBirth
|
||||
dateOfBirth
|
||||
dateOfDeath
|
||||
age
|
||||
ageAtDeath
|
||||
ethnicity
|
||||
cup
|
||||
bust
|
||||
|
@ -233,7 +235,7 @@ function initActorActions(store, _router) {
|
|||
name
|
||||
slug
|
||||
age
|
||||
birthdate: dateOfBirth
|
||||
dateOfBirth
|
||||
gender
|
||||
network {
|
||||
id
|
||||
|
|
|
@ -41,7 +41,11 @@ function getMostFrequentDate(dates) {
|
|||
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
|
||||
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
|
||||
|
||||
return moment({ year, month, date }).toDate();
|
||||
if (year && month && date) {
|
||||
return moment({ year, month, date }).toDate();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getLongest(items) {
|
||||
|
@ -201,10 +205,6 @@ async function curateProfile(profile) {
|
|||
|
||||
curatedProfile.releases = toBaseReleases(profile.releases);
|
||||
|
||||
if (argv.inspect) {
|
||||
console.log(curatedProfile);
|
||||
}
|
||||
|
||||
return curatedProfile;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
|
||||
|
@ -255,6 +255,8 @@ async function interpolateProfiles(actors) {
|
|||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||
|
||||
console.log(valuesByProperty.date_of_birth, profile.date_of_birth);
|
||||
|
||||
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
|
||||
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
|
||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
|
||||
|
@ -437,10 +439,17 @@ async function scrapeActors(actorNames) {
|
|||
);
|
||||
|
||||
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
|
||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
|
||||
await upsertProfiles(profilesWithAvatarIds);
|
||||
await interpolateProfiles(actors);
|
||||
if (argv.inspect) {
|
||||
console.log(profiles);
|
||||
}
|
||||
|
||||
if (argv.save) {
|
||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||
|
||||
await upsertProfiles(profilesWithAvatarIds);
|
||||
await interpolateProfiles(actors);
|
||||
}
|
||||
}
|
||||
|
||||
async function getOrCreateActors(baseActors, batchId) {
|
||||
|
|
|
@ -49,7 +49,7 @@ const { argv } = yargs
|
|||
.option('profiles', {
|
||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||
type: 'boolean',
|
||||
alias: 'bios',
|
||||
alias: 'with-actors',
|
||||
default: false,
|
||||
})
|
||||
.option('scene', {
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
|
||||
const bhttp = require('bhttp');
|
||||
|
||||
const { post } = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
|
||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
@ -81,6 +83,41 @@ function scrapeLatest(scenes, site) {
|
|||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
}
|
||||
|
||||
function scrapeProfile(actor) {
|
||||
const profile = {};
|
||||
|
||||
profile.aliases = actor.aliases;
|
||||
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||
|
||||
profile.ethnicity = actor.ethnicity;
|
||||
profile.nationality = actor.nationality;
|
||||
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||
|
||||
profile.hair = actor.hairColor;
|
||||
profile.eyes = actor.eyeColor;
|
||||
|
||||
profile.naturalBoobs = actor.naturalBreasts;
|
||||
|
||||
if (actor.measurements) {
|
||||
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||
|
||||
if (height) profile.height = inchesToCm(height);
|
||||
if (cupSize) profile.cup = cupSize;
|
||||
|
||||
// [SIC]
|
||||
if (shoulder) profile.bust = shoulder;
|
||||
if (chest) profile.waist = chest;
|
||||
if (waist) profile.hip = waist;
|
||||
}
|
||||
|
||||
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||
|
||||
// TODO: get releases
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
|
@ -181,7 +218,48 @@ async function fetchScene(url, site) {
|
|||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
order: 'desc',
|
||||
},
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
name: {
|
||||
query: actorName,
|
||||
operator: 'and',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
}, { encodeJSON: true });
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find(hit => hit._source.name === actorName);
|
||||
|
||||
return scrapeProfile(actor._source);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
// const bhttp = require('bhttp');
|
||||
|
||||
const { get } = require('../utils/http');
|
||||
|
||||
|
@ -233,7 +234,26 @@ async function fetchScene(url, site) {
|
|||
return res.code;
|
||||
}
|
||||
|
||||
/* API protected
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const session = bhttp.session();
|
||||
|
||||
await session.get(`https://tour.${site.slug}.com`);
|
||||
|
||||
const url = `https://tour.${site.slug}.com/search-preview`;
|
||||
const res = await session.post(url, { q: actorName }, {
|
||||
headers: {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
||||
origin: `https://tour.${site.slug}.com`,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(res.body.toString());
|
||||
}
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
// fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
|
|
@ -132,6 +132,7 @@ module.exports = {
|
|||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
allanal: mikeadriano,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
|
@ -140,6 +141,7 @@ module.exports = {
|
|||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
|
@ -184,6 +186,7 @@ module.exports = {
|
|||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
nympho: mikeadriano,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
|
@ -195,8 +198,10 @@ module.exports = {
|
|||
score,
|
||||
seehimfuck: hush,
|
||||
sexyhub: mindgeek,
|
||||
swallowed: mikeadriano,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
trueanal: mikeadriano,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
const config = require('config');
|
||||
|
||||
const argv = require('./argv');
|
||||
const logger = require('./logger')(__filename);
|
||||
const knex = require('./knex');
|
||||
const slugify = require('./utils/slugify');
|
||||
|
@ -235,7 +236,10 @@ async function storeReleases(releases) {
|
|||
|
||||
// media is more error-prone, associate separately
|
||||
await associateReleaseMedia(releasesWithId);
|
||||
await scrapeActors(actors.map(actor => actor.name));
|
||||
|
||||
if (argv.withActors) {
|
||||
await scrapeActors(actors.map(actor => actor.name));
|
||||
}
|
||||
|
||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
|||
|
||||
extend type Actor {
|
||||
age: Int @requires(columns: ["dateOfBirth"])
|
||||
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
|
||||
height(units:Units): String @requires(columns: ["height"])
|
||||
weight(units:Units): String @requires(columns: ["weight"])
|
||||
}
|
||||
|
@ -24,6 +25,11 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
|||
|
||||
return moment().diff(parent.dateOfBirth, 'years');
|
||||
},
|
||||
ageAtDeath(parent, _args, _context, _info) {
|
||||
if (!parent.dateOfDeath) return null;
|
||||
|
||||
return moment(parent.dateOfDeath).diff(parent.dateOfBirth, 'years');
|
||||
},
|
||||
height(parent, args, _context, _info) {
|
||||
if (!parent.height) return null;
|
||||
|
||||
|
|
Loading…
Reference in New Issue