Added Bang! actor scraper. Fixed date interpolation. Showing date and age of death on profile (only if actor has already died).

This commit is contained in:
2020-05-17 04:59:09 +02:00
parent 7f86399033
commit f42ca7bd52
10 changed files with 183 additions and 18 deletions

View File

@@ -41,7 +41,11 @@ function getMostFrequentDate(dates) {
const month = getMostFrequent(dates.map(dateX => dateX.getMonth()));
const date = getMostFrequent(dates.map(dateX => dateX.getDate()));
return moment({ year, month, date }).toDate();
if (year && month && date) {
return moment({ year, month, date }).toDate();
}
return null;
}
function getLongest(items) {
@@ -201,10 +205,6 @@ async function curateProfile(profile) {
curatedProfile.releases = toBaseReleases(profile.releases);
if (argv.inspect) {
console.log(curatedProfile);
}
return curatedProfile;
} catch (error) {
logger.error(`Failed to curate '${profile.name}': ${error.message}`);
@@ -255,6 +255,8 @@ async function interpolateProfiles(actors) {
profile.date_of_birth = getMostFrequentDate(valuesByProperty.date_of_birth);
profile.date_of_death = getMostFrequentDate(valuesByProperty.date_of_death);
console.log(valuesByProperty.date_of_birth, profile.date_of_birth);
profile.birth_city = getMostFrequent(valuesByProperty.birth_city);
profile.birth_state = getMostFrequent(valuesByProperty.birth_state);
profile.birth_country_alpha2 = getMostFrequent(valuesByProperty.birth_country_alpha2);
@@ -437,10 +439,17 @@ async function scrapeActors(actorNames) {
);
const profiles = await Promise.all(profilesPerActor.flat().map(profile => curateProfile(profile)));
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
if (argv.inspect) {
console.log(profiles);
}
if (argv.save) {
const profilesWithAvatarIds = await associateAvatars(profiles);
await upsertProfiles(profilesWithAvatarIds);
await interpolateProfiles(actors);
}
}
async function getOrCreateActors(baseActors, batchId) {

View File

@@ -49,7 +49,7 @@ const { argv } = yargs
.option('profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'bios',
alias: 'with-actors',
default: false,
})
.option('scene', {

View File

@@ -2,7 +2,9 @@
const bhttp = require('bhttp');
const { post } = require('../utils/http');
const slugify = require('../utils/slugify');
const { inchesToCm } = require('../utils/convert');
const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
@@ -81,6 +83,41 @@ function scrapeLatest(scenes, site) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
}
function scrapeProfile(actor) {
const profile = {};
profile.aliases = actor.aliases;
profile.gender = ({ F: 'female', M: 'male' })[actor.gender];
profile.ethnicity = actor.ethnicity;
profile.nationality = actor.nationality;
profile.birthPlace = `${actor.birthCity}, ${actor.birthCountry || ''}`;
profile.hair = actor.hairColor;
profile.eyes = actor.eyeColor;
profile.naturalBoobs = actor.naturalBreasts;
if (actor.measurements) {
const { cupSize, shoulder, chest, waist, height } = actor.measurements;
if (height) profile.height = inchesToCm(height);
if (cupSize) profile.cup = cupSize;
// [SIC]
if (shoulder) profile.bust = shoulder;
if (chest) profile.waist = chest;
if (waist) profile.hip = waist;
}
if (actor.twitter) profile.social = [`https://www.twitter.com/${actor.twitter}`];
if (actor.image) profile.avatar = `https://i.bang.com/pornstars/${actor.identifier}.jpg`;
// TODO: get releases
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
@@ -181,7 +218,48 @@ async function fetchScene(url, site) {
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
}
async function fetchProfile(actorName) {
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
size: 5,
sort: [{
_score: {
order: 'desc',
},
}],
query: {
bool: {
must: [
{
match: {
name: {
query: actorName,
operator: 'and',
},
},
},
{
match: {
status: 'ok',
},
},
],
},
},
}, {
Authorization: `Basic ${authKey}`,
}, { encodeJSON: true });
if (res.ok) {
const actor = res.body.hits.hits.find(hit => hit._source.name === actorName);
return scrapeProfile(actor._source);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -4,6 +4,7 @@
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
// const bhttp = require('bhttp');
const { get } = require('../utils/http');
@@ -233,7 +234,26 @@ async function fetchScene(url, site) {
return res.code;
}
/* API protected
async function fetchProfile(actorName, scraperSlug, site) {
const session = bhttp.session();
await session.get(`https://tour.${site.slug}.com`);
const url = `https://tour.${site.slug}.com/search-preview`;
const res = await session.post(url, { q: actorName }, {
headers: {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
origin: `https://tour.${site.slug}.com`,
},
});
console.log(res.body.toString());
}
*/
module.exports = {
fetchLatest,
// fetchProfile,
fetchScene,
};

View File

@@ -132,6 +132,7 @@ module.exports = {
},
actors: {
'21sextury': sextury,
allanal: mikeadriano,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
@@ -140,6 +141,7 @@ module.exports = {
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bang,
bangbros,
blacked: vixen,
blackedraw: vixen,
@@ -184,6 +186,7 @@ module.exports = {
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
nympho: mikeadriano,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
@@ -195,8 +198,10 @@ module.exports = {
score,
seehimfuck: hush,
sexyhub: mindgeek,
swallowed: mikeadriano,
thatsitcomshow: nubiles,
transangels,
trueanal: mikeadriano,
tushy: vixen,
tushyraw: vixen,
twistys,

View File

@@ -2,6 +2,7 @@
const config = require('config');
const argv = require('./argv');
const logger = require('./logger')(__filename);
const knex = require('./knex');
const slugify = require('./utils/slugify');
@@ -235,7 +236,10 @@ async function storeReleases(releases) {
// media is more error-prone, associate separately
await associateReleaseMedia(releasesWithId);
await scrapeActors(actors.map(actor => actor.name));
if (argv.withActors) {
await scrapeActors(actors.map(actor => actor.name));
}
logger.info(`Stored ${storedReleaseEntries.length} releases`);

View File

@@ -13,6 +13,7 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
extend type Actor {
age: Int @requires(columns: ["dateOfBirth"])
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
height(units:Units): String @requires(columns: ["height"])
weight(units:Units): String @requires(columns: ["weight"])
}
@@ -24,6 +25,11 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
return moment().diff(parent.dateOfBirth, 'years');
},
ageAtDeath(parent, _args, _context, _info) {
if (!parent.dateOfDeath) return null;
return moment(parent.dateOfDeath).diff(parent.dateOfBirth, 'years');
},
height(parent, args, _context, _info) {
if (!parent.height) return null;