Add Babepedia scraper #37

Closed
Ghost wants to merge 10 commits from (deleted):master into master
5 changed files with 324 additions and 114 deletions

View File

@@ -112,6 +112,12 @@ const networks = [
url: 'https://www.babes.com', url: 'https://www.babes.com',
parent: 'mindgeek', parent: 'mindgeek',
}, },
{
slug: 'babepedia',
name: 'Babepedia',
url: 'https://www.babepedia.com',
type: 'info',
},
{ {
slug: 'badoink', slug: 'badoink',
name: 'BaDoink', name: 'BaDoink',

View File

@@ -322,6 +322,8 @@ function curateProfileEntry(profile) {
avatar_media_id: profile.avatarMediaId || null, avatar_media_id: profile.avatarMediaId || null,
}; };
if (profile.update) curatedProfileEntry.updated_at = new Date().toDateString();
return curatedProfileEntry; return curatedProfileEntry;
} }
@@ -733,8 +735,9 @@ async function getActorNames(actorNames) {
SELECT * SELECT *
FROM actors_profiles FROM actors_profiles
WHERE actors_profiles.actor_id = actors.id WHERE actors_profiles.actor_id = actors.id
AND actors_profiles.updated_at <= (?) AND actors_profiles.updated_at >= (?)
) ) AND alias_for IS NULL
ORDER BY actors.name
`, [argv.actorsUpdate || new Date()]); `, [argv.actorsUpdate || new Date()]);
return actorsWithoutProfiles.rows.map(actor => actor.name); return actorsWithoutProfiles.rows.map(actor => actor.name);
@@ -750,9 +753,27 @@ async function storeProfiles(profiles) {
async function scrapeActors(argNames) { async function scrapeActors(argNames) {
const actorNames = await getActorNames(argNames); const actorNames = await getActorNames(argNames);
const profiles = [];
const batchSize = argv.actorsBatch;
logger.info(`Scraping profiles for ${actorNames.length} actors`);
if (batchSize > 0) {
for (let i=0; i < actorNames.length; i=i+batchSize) {
logger.info(`Scraping profiles ${((i/actorNames.length)*100).toFixed(2)}%`);
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames.slice(i, i + batchSize)));
}
} else {
profiles.push.apply(profiles, await scrapeActorsBatch(actorNames));
}
return profiles;
}
async function scrapeActorsBatch(actorNames) {
const baseActors = toBaseActors(actorNames); const baseActors = toBaseActors(actorNames);
logger.info(`Scraping profiles for ${actorNames.length} actors`); logger.info(`Actors: ${actorNames.join(', ')}`);
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors); const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
const entitySlugs = sources.flat(); const entitySlugs = sources.flat();
@@ -760,7 +781,7 @@ async function scrapeActors(argNames) {
const [entitiesBySlug, existingActorEntries] = await Promise.all([ const [entitiesBySlug, existingActorEntries] = await Promise.all([
fetchEntitiesBySlug(entitySlugs, 'desc'), fetchEntitiesBySlug(entitySlugs, 'desc'),
knex('actors') knex('actors')
.select(['id', 'name', 'slug', 'entry_id']) .select(['id', 'name', 'slug', 'entry_id', 'gender'])
.whereIn('slug', baseActors.map(baseActor => baseActor.slug)) .whereIn('slug', baseActors.map(baseActor => baseActor.slug))
.whereNull('alias_for'), .whereNull('alias_for'),
]); ]);

View File

@@ -23,6 +23,13 @@ function interpretAfter(after) {
.toDate(); .toDate();
} }
function interpretActorAfter(after) {
if (!after) {
return new Date();
}
return interpretAfter(after);
}
const { argv } = yargs const { argv } = yargs
.command('npm start') .command('npm start')
.option('server', { .option('server', {
@@ -69,6 +76,11 @@ const { argv } = yargs
default: false, default: false,
alias: 'actor-scenes', alias: 'actor-scenes',
}) })
.option('actors-batch', {
describe: 'Bath size to scrape actors, if not seet then all are scraped in on pass',
type: 'number',
default: config?.actors?.batchSize === null ? 0 : config?.actors?.batchSize,
})
.option('actor-sources', { .option('actor-sources', {
describe: 'Use these scrapers for actor data', describe: 'Use these scrapers for actor data',
type: 'array', type: 'array',
@@ -307,6 +319,6 @@ const { argv } = yargs
alias: ['delete-movie', 'remove-movies', 'remove-movies'], alias: ['delete-movie', 'remove-movies', 'remove-movies'],
}) })
.coerce('after', interpretAfter) .coerce('after', interpretAfter)
.coerce('actors-update', interpretAfter); .coerce('actors-update', interpretActorAfter);
module.exports = argv; module.exports = argv;

169
src/scrapers/babepedia.js Normal file
View File

@@ -0,0 +1,169 @@
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
const moment = require('moment');
function scrapeProfile({ query, el }, actorName, entity, include) {
let profile = { name: actorName };
if (!query) return {};
const name = query.cnt('#babename')
if (!name) return {};
if (actorName !== name) {
profile.aliasFor = name;
}
const avatar = query.url('#profimg a');
if (avatar && avatar.indexOf('javascript:alert(') === -1) profile.avatar = { src: `${entity.url}${avatar}`, credit: 'Babepedia' };
const aka = query.cnt('#aka');
if (aka) profile.aliases = aka?.replace('aka ', '')?.split('/').map(alias => alias.trim());
function measurementsFromString(str){
const [bra, waist, hip] = str.split("-");
if (bra && waist && hip) {
const measurements = {};
measurements.bust = parseInt(bra);
measurements.cup = measurements.bust ? bra.replace(measurements.bust, "") : null;
measurements.waist = parseInt(waist);
measurements.hip = parseInt(hip);
return measurements;
}
return null;
}
const allowedKeys = ['birthPlace', 'eyes', 'hair', 'birthdate', 'weight',
'height', 'naturalBoobs', 'tattoos', 'piercings'] ;
const bio = query.all('#biolist li').reduce((acc, item) => {
const keyMatch = query.cnt(item).split(':');
if (keyMatch && keyMatch.length === 2) {
let key = keyMatch[0].toLowerCase();
let value = keyMatch[1].trim();
if (key === 'birthplace') key = 'birthPlace';
if (key === 'eye color') key = 'eyes';
if (key === 'hair color') key = 'hair';
if (key == 'measurements' && value) {
const measurements = measurementsFromString(value);
if (measurements) {
if (measurements.bust) acc.bust = measurements.bust;
if (measurements.cup) acc.cup = measurements.cup;
if (measurements.waist) acc.waist = measurements.waist;
if (measurements.hip) acc.hip = measurements.hip;
}
}
if (key === 'born' && value) {
key = 'birthdate';
value = moment.utc(value.replace(' of ', ' '), 'dddd Do MMMM YYYY')?.toDate();
}
if (key == 'height' && value) {
const rawHeightMatch = value.match(/\d+ cm/);
const cm = rawHeightMatch ? rawHeightMatch[0] : null;
value = cm ? parseInt(cm.replace("cm", "")) : null;
}
if (key == 'weight' && value) {
const rawWeightMatch = value.match(/\d+ kg/);
const kg = rawWeightMatch ? rawWeightMatch[0] : null;
value = kg ? parseInt(kg.replace("kg", "")) : null;
}
if (key == 'boobs' && value) {
if (value.match(/fake/i)) {
key = 'naturalBoobs';
value = false;
} else if (value.match(/real/i)) {
key = 'naturalBoobs';
value = true;
}
}
if (key == 'tattoos' && value) {
if (value.match(/none/i)) {
acc.hasTattoos = false;
value = '';
} else {
acc.hasTattoos = true;
}
}
if (key == 'piercings' && value) {
if (value.match(/none/i)) {
acc.hasPiercings = false;
value = '';
} else {
acc.hasPiercings = true;
}
}
if (allowedKeys.includes(key)) {
acc[key] = value;
}
}
return acc;
}, {});
profile = Object.assign(profile, bio);
return profile;
}
function scrapeSearch({ query, el }, actorName, entity, include) {
const links = query.all('.results .thumbshot');
return links.map(link => {
const linkName = query.cnt(link, 'a');
const linkUrl = query.url(link, 'a');
const actorNameMatch = new RegExp( actorName, 'g' );
if (linkName?.match(actorNameMatch)) {
return linkUrl;
}
}).filter(Boolean);
}
async function fetchProfile(actor, entity, include) {
const actorName = actor.name;
const searchName = actorName.replace('\'', '');
if (actor?.gender === 'male') {
return null;
}
let url = `${entity.url}/search/${searchName}`;
let res = await qu.get(url);
// Check if search redirects
let result = res.ok ? scrapeProfile(res.item, actorName, entity, include) : {};
if (result.name === actorName) {
return result;
}
if (res.ok) {
const actorPath = scrapeSearch(res.item, actorName, entity, include);
if (actorPath.length === 1) {
url = `${entity.url}${actorPath[0]}`;
res = await qu.get(url);
return res.ok ? scrapeProfile(res.item, actorName, entity, include) : res.status;
}
}
return res.status;
}
module.exports = {
fetchProfile,
};

View File

@@ -64,6 +64,7 @@ const xempire = require('./xempire');
// profiles // profiles
const boobpedia = require('./boobpedia'); const boobpedia = require('./boobpedia');
const freeones = require('./freeones'); const freeones = require('./freeones');
const babepedia = require('./babepedia');
const scrapers = { const scrapers = {
releases: { releases: {
@@ -156,6 +157,7 @@ const scrapers = {
anilos: nubiles, anilos: nubiles,
aziani, aziani,
babes: mindgeek, babes: mindgeek,
babepedia,
babevr: badoink, babevr: badoink,
backroomcastingcouch: elevatedx, backroomcastingcouch: elevatedx,
baddaddypov: fullpornnetwork, baddaddypov: fullpornnetwork,