From 79b51eca6778bdec2274cdaf38e377052d8203d1 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 3 Feb 2021 21:03:35 +0100 Subject: [PATCH] Added BaDoink profile scraper. Improved convert wrapper. --- seeds/02_sites.js | 11 ++++++++++ src/actors.js | 13 ++++++++++++ src/scrapers/badoink.js | 46 ++++++++++++++++++++++++++++++++++++++++ src/scrapers/scrapers.js | 19 +++++++++++------ src/utils/convert.js | 7 ++++-- 5 files changed, 87 insertions(+), 9 deletions(-) diff --git a/seeds/02_sites.js b/seeds/02_sites.js index d18875c6..00c1b718 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -631,6 +631,9 @@ const sites = [ url: 'https://www.badoinkvr.com', tags: ['vr'], parent: 'badoink', + parameters: { + actor: 'vr-pornstar', + }, }, { name: 'VR Cosplay X', @@ -640,6 +643,7 @@ const sites = [ parent: 'badoink', parameters: { latest: 'cosplaypornvideos', + actor: 'cosplaygirl', }, }, { @@ -648,6 +652,9 @@ const sites = [ url: 'https://18vr.com', tags: ['vr'], parent: 'badoink', + parameters: { + actor: 'vrgirl', + }, }, { name: 'BabeVR', @@ -655,6 +662,9 @@ const sites = [ url: 'https://babevr.com', tags: ['vr'], parent: 'badoink', + parameters: { + actor: 'vrbabe', + }, }, { name: 'RealVR', @@ -663,6 +673,7 @@ const sites = [ tags: ['vr'], parameters: { teasers: false, + actor: 'pornstar', }, parent: 'badoink', }, diff --git a/src/actors.js b/src/actors.js index b5976d94..ad3d1611 100644 --- a/src/actors.js +++ b/src/actors.js @@ -324,6 +324,7 @@ async function curateProfile(profile, actor) { const curatedProfile = { id: profile.id, name: profile.name, + url: profile.url, avatar: profile.avatar, scraper: profile.scraper, entity: profile.entity, @@ -365,10 +366,22 @@ async function curateProfile(profile, actor) { curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null; curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null; + // separate measurement values curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match?.(/[a-zA-Z]+/)?.[0]) || null; curatedProfile.bust = Number(profile.bust) || profile.bust?.match?.(/\d+/)?.[0] || null; curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null; curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null; + + // combined measurement value + const measurements = profile.measurements?.match(/(\d+)(\w+)-(\d+)-(\d+)/); + + if (measurements) { + curatedProfile.bust = Number(measurements[1]); + curatedProfile.cup = measurements[2]; + curatedProfile.waist = Number(measurements[3]); + curatedProfile.hip = Number(measurements[4]); + } + curatedProfile.penisLength = Number(profile.penisLength) || profile.penisLength?.match?.(/\d+/)?.[0] || null; curatedProfile.penisGirth = Number(profile.penisGirth) || profile.penisGirth?.match?.(/\d+/)?.[0] || null; diff --git a/src/scrapers/badoink.js b/src/scrapers/badoink.js index 0eb2e630..f48dae3e 100644 --- a/src/scrapers/badoink.js +++ b/src/scrapers/badoink.js @@ -1,6 +1,8 @@ 'use strict'; const qu = require('../utils/qu'); +const slugify = require('../utils/slugify'); +const { convert } = require('../utils/convert'); function getPoster(posterSources) { if (posterSources?.[0]) { @@ -80,6 +82,38 @@ function scrapeScene({ query }, url, channel) { return release; } +function scrapeProfile({ query }, url, entity) { + const profile = { url }; + + const bio = query.all('.girl-details-stats-item').reduce((acc, el) => ({ + ...acc, + [slugify(qu.query.cnt(el, '.girl-details-stat'))]: qu.query.cnt(el, '.girl-details-stat-value'), + }), {}); + + profile.description = query.cnt('.girl-details-bio'); + + profile.age = bio.age; + profile.birthPlace = bio.country; + profile.ethnicity = bio.ethnicity; + + profile.height = convert(bio.height, 'cm'); + profile.weight = convert(bio.weight, 'lb', 'kg'); + + profile.measurements = bio.measurements; + + profile.hairColor = bio.hair; + profile.eyes = bio.eyes; + + const avatarSources = query.srcset('.girl-details-photo-content picture source', 'srcset') || [query.img('.girl-details-photo')]; + + profile.avatar = getPoster(avatarSources); + profile.social = query.urls('.girl-details-social-media-list a'); + + profile.scenes = scrapeAll(qu.initAll(query.all('.video-card')), entity); + + return profile; +} + async function fetchLatest(channel, page) { const res = await qu.getAll(`${channel.url}/${channel.parameters?.latest || 'vrpornvideos'}/${page}`, '.video-card', { Cookie: 'affsubid=12345-;', // required to show teaser video, exact number doesn't seem to matter @@ -92,7 +126,19 @@ async function fetchLatest(channel, page) { return res.status; } +async function fetchProfile(baseActor, { entity }) { + const url = `${entity.url}/${entity.parameters?.actor || 'pornstar'}/${slugify(baseActor.name, '')}/`; + const res = await qu.get(url); + + if (res.ok) { + return scrapeProfile(res.item, url, entity); + } + + return res.status; +} + module.exports = { fetchLatest, + fetchProfile, scrapeScene, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 60910643..af0ef0bb 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -141,6 +141,7 @@ const scrapers = { xempire, }, actors: { + '18vr': badoink, '21sextury': gamma, allanal: mikeadriano, amateureuro: porndoe, @@ -151,7 +152,9 @@ const scrapers = { anilos: nubiles, aziani, babes: mindgeek, + babevr: badoink, baddaddypov: fullpornnetwork, + badoinkvr: badoink, bamvisions, bang, bangbros, @@ -163,20 +166,19 @@ const scrapers = { brazzers: mindgeek, burningangel: gamma, cherrypimps, - pornworld: ddfnetwork, deeper: vixen, deeplush: nubiles, devilsfilm: famedigital, digitalplayground: mindgeek, - dtfsluts: fullpornnetwork, dogfartnetwork: dogfart, dorcelclub: dorcel, doubleviewcasting: firstanalquest, + dtfsluts: fullpornnetwork, elegantangel, evilangel: gamma, + exploitedcollegegirls: fcuk, eyeontheguy: hush, fakehub: mindgeek, - exploitedcollegegirls: fcuk, firstanalquest, forbondage: porndoe, freeones, @@ -202,7 +204,9 @@ const scrapers = { killergram, kink, legalporno, + letsdoeit: porndoe, littlecapricedreams, + mamacitaz: porndoe, men: mindgeek, metrohd: mindgeek, milehighmedia: mindgeek, @@ -215,31 +219,31 @@ const scrapers = { nubilesporn: nubiles, nympho: mikeadriano, onlyprince: fullpornnetwork, + pascalssubsluts, pervcity, pervertgallery: fullpornnetwork, peternorth: famedigital, pierrewoodman, pimpxxx: cherrypimps, - letsdoeit: porndoe, - mamacitaz: porndoe, - pascalssubsluts, porncz, pornhub, + pornworld: ddfnetwork, povperverts: fullpornnetwork, povpornstars: hush, private: privateNetwork, realitykings: mindgeek, + realvr: badoink, roccosiffredi: famedigital, score, seehimfuck: hush, sexyhub: mindgeek, silverstonedvd: famedigital, silviasaint: famedigital, - topwebmodels, swallowed: mikeadriano, teamskeet, teencoreclub, thatsitcomshow: nubiles, + topwebmodels, transangels: mindgeek, transbella: porndoe, trueanal: mikeadriano, @@ -248,6 +252,7 @@ const scrapers = { twistys: mindgeek, vipsexvault: porndoe, vixen, + vrcosplayx: badoink, wicked: gamma, wildoncam: cherrypimps, xempire, diff --git a/src/utils/convert.js b/src/utils/convert.js index 26e6cb06..358c7f3f 100644 --- a/src/utils/convert.js +++ b/src/utils/convert.js @@ -61,7 +61,8 @@ function kgToLbs(kgs) { function convertManyApi(input, to) { const curatedInput = input .replace('\'', 'ft') - .replace('"', 'in'); + .replace(/"|''/, 'in') + .replace(/\d+ft\s*\d+\s*$/, match => `${match}in`); // height without any inch symbol return Math.round(convertMany(curatedInput).to(to)) || null; } @@ -76,7 +77,9 @@ function convertApi(input, fromOrTo, to) { return convertManyApi(input, fromOrTo); } - return Math.round(convert(input).from(fromOrTo).to(to)) || null; + const inputNumber = Number(typeof input === 'string' ? input.match(/\d+(\.\d+)?/)?.[0] : input); + + return Math.round(convert(inputNumber).from(fromOrTo).to(to)) || null; } catch (error) { logger.error(error); return null;