forked from DebaucheryLibrarian/traxxx
Added Bang! actor scraper. Fixed date interpolation. Showing date and age of death on profile (only if actor has already died).
This commit is contained in:
parent
7f86399033
commit
f42ca7bd52
|
@ -58,15 +58,24 @@
|
||||||
|
|
||||||
<ul class="bio nolist">
|
<ul class="bio nolist">
|
||||||
<li
|
<li
|
||||||
v-if="actor.birthdate"
|
v-if="actor.dateOfBirth"
|
||||||
class="bio-item"
|
class="bio-item"
|
||||||
>
|
>
|
||||||
<dfn class="bio-label"><Icon icon="cake" />Birthdate</dfn>
|
<dfn class="bio-label"><Icon icon="cake" />Date of birth</dfn>
|
||||||
|
|
||||||
<span
|
<span class="birthdate">{{ formatDate(actor.dateOfBirth, 'MMMM D, YYYY') }}<span
|
||||||
v-if="actor.birthdate"
|
v-if="!actor.dateOfDeath"
|
||||||
class="birthdate"
|
class="age"
|
||||||
>{{ formatDate(actor.birthdate, 'MMMM D, YYYY') }}<span class="age">{{ actor.age }}</span></span>
|
>{{ actor.age }}</span></span>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
<li
|
||||||
|
v-if="actor.dateOfDeath"
|
||||||
|
class="bio-item"
|
||||||
|
>
|
||||||
|
<dfn class="bio-label"><Icon icon="christian-cross" />Date of death</dfn>
|
||||||
|
|
||||||
|
<span class="birthdate">{{ formatDate(actor.dateOfDeath, 'MMMM D, YYYY') }}<span class="age">{{ actor.ageAtDeath }}</span></span>
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
<li
|
<li
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
id="svg6"
|
||||||
|
viewBox="0 0 19.041 23.969"
|
||||||
|
height="23.969"
|
||||||
|
width="19.041"
|
||||||
|
version="1.1">
|
||||||
|
<metadata
|
||||||
|
id="metadata12">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title>cross</dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<defs
|
||||||
|
id="defs10" />
|
||||||
|
<title
|
||||||
|
id="title2">cross</title>
|
||||||
|
<path
|
||||||
|
id="path4"
|
||||||
|
d="m 0,6.958 v 5.021 h 7.041 v 11.99 h 5.042 v -11.99 h 6.958 V 6.958 H 12.083 V 0 H 7.041 v 6.958 z" />
|
||||||
|
</svg>
|
After Width: | Height: | Size: 896 B |
|
@ -76,8 +76,10 @@ function initActorActions(store, _router) {
|
||||||
name
|
name
|
||||||
slug
|
slug
|
||||||
gender
|
gender
|
||||||
birthdate: dateOfBirth
|
dateOfBirth
|
||||||
|
dateOfDeath
|
||||||
age
|
age
|
||||||
|
ageAtDeath
|
||||||
ethnicity
|
ethnicity
|
||||||
cup
|
cup
|
||||||
bust
|
bust
|
||||||
|
@ -233,7 +235,7 @@ function initActorActions(store, _router) {
|
||||||
name
|
name
|
||||||
slug
|
slug
|
||||||
age
|
age
|
||||||
birthdate: dateOfBirth
|
dateOfBirth
|
||||||
gender
|
gender
|
||||||
network {
|
network {
|
||||||
id
|
id
|
||||||
|
|
|
@ -41,7 +41,11 @@ function getMostFrequentDate(dates) {
|
||||||
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
|
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
|
||||||
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
|
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
|
||||||
|
|
||||||
return moment({ year, month, date }).toDate();
|
if (year && month && date) {
|
||||||
|
return moment({ year, month, date }).toDate();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getLongest(items) {
|
function getLongest(items) {
|
||||||
|
@ -201,10 +205,6 @@ async function curateProfile(profile) {
|
||||||
|
|
||||||
curatedProfile.releases = toBaseReleases(profile.releases);
|
curatedProfile.releases = toBaseReleases(profile.releases);
|
||||||
|
|
||||||
if (argv.inspect) {
|
|
||||||
console.log(curatedProfile);
|
|
||||||
}
|
|
||||||
|
|
||||||
return curatedProfile;
|
return curatedProfile;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
|
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
|
||||||
|
@ -255,6 +255,8 @@ async function interpolateProfiles(actors) {
|
||||||
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
|
||||||
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
|
||||||
|
|
||||||
|
console.log(valuesByProperty.date_of_birth, profile.date_of_birth);
|
||||||
|
|
||||||
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
|
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
|
||||||
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
|
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
|
||||||
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
|
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
|
||||||
|
@ -437,10 +439,17 @@ async function scrapeActors(actorNames) {
|
||||||
);
|
);
|
||||||
|
|
||||||
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
|
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
|
||||||
const profilesWithAvatarIds = await associateAvatars(profiles);
|
|
||||||
|
|
||||||
await upsertProfiles(profilesWithAvatarIds);
|
if (argv.inspect) {
|
||||||
await interpolateProfiles(actors);
|
console.log(profiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argv.save) {
|
||||||
|
const profilesWithAvatarIds = await associateAvatars(profiles);
|
||||||
|
|
||||||
|
await upsertProfiles(profilesWithAvatarIds);
|
||||||
|
await interpolateProfiles(actors);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getOrCreateActors(baseActors, batchId) {
|
async function getOrCreateActors(baseActors, batchId) {
|
||||||
|
|
|
@ -49,7 +49,7 @@ const { argv } = yargs
|
||||||
.option('profiles', {
|
.option('profiles', {
|
||||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'bios',
|
alias: 'with-actors',
|
||||||
default: false,
|
default: false,
|
||||||
})
|
})
|
||||||
.option('scene', {
|
.option('scene', {
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
const bhttp = require('bhttp');
|
||||||
|
|
||||||
|
const { post } = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const { inchesToCm } = require('../utils/convert');
|
||||||
|
|
||||||
const clusterId = '617fb597b659459bafe6472470d9073a';
|
const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||||
|
@ -81,6 +83,41 @@ function scrapeLatest(scenes, site) {
|
||||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function scrapeProfile(actor) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
profile.aliases = actor.aliases;
|
||||||
|
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
|
||||||
|
|
||||||
|
profile.ethnicity = actor.ethnicity;
|
||||||
|
profile.nationality = actor.nationality;
|
||||||
|
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
|
||||||
|
|
||||||
|
profile.hair = actor.hairColor;
|
||||||
|
profile.eyes = actor.eyeColor;
|
||||||
|
|
||||||
|
profile.naturalBoobs = actor.naturalBreasts;
|
||||||
|
|
||||||
|
if (actor.measurements) {
|
||||||
|
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
|
||||||
|
|
||||||
|
if (height) profile.height = inchesToCm(height);
|
||||||
|
if (cupSize) profile.cup = cupSize;
|
||||||
|
|
||||||
|
// [SIC]
|
||||||
|
if (shoulder) profile.bust = shoulder;
|
||||||
|
if (chest) profile.waist = chest;
|
||||||
|
if (waist) profile.hip = waist;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
|
||||||
|
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
|
||||||
|
|
||||||
|
// TODO: get releases
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||||
size: 50,
|
size: 50,
|
||||||
|
@ -181,7 +218,48 @@ async function fetchScene(url, site) {
|
||||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(actorName) {
|
||||||
|
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||||
|
size: 5,
|
||||||
|
sort: [{
|
||||||
|
_score: {
|
||||||
|
order: 'desc',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: [
|
||||||
|
{
|
||||||
|
match: {
|
||||||
|
name: {
|
||||||
|
query: actorName,
|
||||||
|
operator: 'and',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
match: {
|
||||||
|
status: 'ok',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
Authorization: `Basic ${authKey}`,
|
||||||
|
}, { encodeJSON: true });
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
const actor = res.body.hits.hits.find(hit => hit._source.name === actorName);
|
||||||
|
|
||||||
|
return scrapeProfile(actor._source);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
|
fetchProfile,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
};
|
};
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
// const bhttp = require('bhttp');
|
||||||
|
|
||||||
const { get } = require('../utils/http');
|
const { get } = require('../utils/http');
|
||||||
|
|
||||||
|
@ -233,7 +234,26 @@ async function fetchScene(url, site) {
|
||||||
return res.code;
|
return res.code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* API protected
|
||||||
|
async function fetchProfile(actorName, scraperSlug, site) {
|
||||||
|
const session = bhttp.session();
|
||||||
|
|
||||||
|
await session.get(`https://tour.${site.slug}.com`);
|
||||||
|
|
||||||
|
const url = `https://tour.${site.slug}.com/search-preview`;
|
||||||
|
const res = await session.post(url, { q: actorName }, {
|
||||||
|
headers: {
|
||||||
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
||||||
|
origin: `https://tour.${site.slug}.com`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(res.body.toString());
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
|
// fetchProfile,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
};
|
};
|
||||||
|
|
|
@ -132,6 +132,7 @@ module.exports = {
|
||||||
},
|
},
|
||||||
actors: {
|
actors: {
|
||||||
'21sextury': sextury,
|
'21sextury': sextury,
|
||||||
|
allanal: mikeadriano,
|
||||||
analbbc: fullpornnetwork,
|
analbbc: fullpornnetwork,
|
||||||
analized: fullpornnetwork,
|
analized: fullpornnetwork,
|
||||||
analviolation: fullpornnetwork,
|
analviolation: fullpornnetwork,
|
||||||
|
@ -140,6 +141,7 @@ module.exports = {
|
||||||
babes,
|
babes,
|
||||||
baddaddypov: fullpornnetwork,
|
baddaddypov: fullpornnetwork,
|
||||||
bamvisions,
|
bamvisions,
|
||||||
|
bang,
|
||||||
bangbros,
|
bangbros,
|
||||||
blacked: vixen,
|
blacked: vixen,
|
||||||
blackedraw: vixen,
|
blackedraw: vixen,
|
||||||
|
@ -184,6 +186,7 @@ module.exports = {
|
||||||
nubilefilms: nubiles,
|
nubilefilms: nubiles,
|
||||||
nubiles,
|
nubiles,
|
||||||
nubilesporn: nubiles,
|
nubilesporn: nubiles,
|
||||||
|
nympho: mikeadriano,
|
||||||
onlyprince: fullpornnetwork,
|
onlyprince: fullpornnetwork,
|
||||||
pervertgallery: fullpornnetwork,
|
pervertgallery: fullpornnetwork,
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
|
@ -195,8 +198,10 @@ module.exports = {
|
||||||
score,
|
score,
|
||||||
seehimfuck: hush,
|
seehimfuck: hush,
|
||||||
sexyhub: mindgeek,
|
sexyhub: mindgeek,
|
||||||
|
swallowed: mikeadriano,
|
||||||
thatsitcomshow: nubiles,
|
thatsitcomshow: nubiles,
|
||||||
transangels,
|
transangels,
|
||||||
|
trueanal: mikeadriano,
|
||||||
tushy: vixen,
|
tushy: vixen,
|
||||||
tushyraw: vixen,
|
tushyraw: vixen,
|
||||||
twistys,
|
twistys,
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
|
|
||||||
|
const argv = require('./argv');
|
||||||
const logger = require('./logger')(__filename);
|
const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
|
@ -235,7 +236,10 @@ async function storeReleases(releases) {
|
||||||
|
|
||||||
// media is more error-prone, associate separately
|
// media is more error-prone, associate separately
|
||||||
await associateReleaseMedia(releasesWithId);
|
await associateReleaseMedia(releasesWithId);
|
||||||
await scrapeActors(actors.map(actor => actor.name));
|
|
||||||
|
if (argv.withActors) {
|
||||||
|
await scrapeActors(actors.map(actor => actor.name));
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
logger.info(`Stored ${storedReleaseEntries.length} releases`);
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||||
|
|
||||||
extend type Actor {
|
extend type Actor {
|
||||||
age: Int @requires(columns: ["dateOfBirth"])
|
age: Int @requires(columns: ["dateOfBirth"])
|
||||||
|
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
|
||||||
height(units:Units): String @requires(columns: ["height"])
|
height(units:Units): String @requires(columns: ["height"])
|
||||||
weight(units:Units): String @requires(columns: ["weight"])
|
weight(units:Units): String @requires(columns: ["weight"])
|
||||||
}
|
}
|
||||||
|
@ -24,6 +25,11 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
|
||||||
|
|
||||||
return moment().diff(parent.dateOfBirth, 'years');
|
return moment().diff(parent.dateOfBirth, 'years');
|
||||||
},
|
},
|
||||||
|
ageAtDeath(parent, _args, _context, _info) {
|
||||||
|
if (!parent.dateOfDeath) return null;
|
||||||
|
|
||||||
|
return moment(parent.dateOfDeath).diff(parent.dateOfBirth, 'years');
|
||||||
|
},
|
||||||
height(parent, args, _context, _info) {
|
height(parent, args, _context, _info) {
|
||||||
if (!parent.height) return null;
|
if (!parent.height) return null;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue