Updated Hussie Pass scraper for new site design. Added cock size to profiles.

This commit is contained in:
DebaucheryLibrarian
2020-11-15 04:33:24 +01:00
parent b3a5d7f379
commit df9a6eac05
69 changed files with 267 additions and 145 deletions

View File

@@ -180,6 +180,9 @@ function curateActor(actor, withDetails = false, isProfile = false) {
waist: actor.waist,
hip: actor.hip,
naturalBoobs: actor.natural_boobs,
penisLength: actor.penis_length,
penisGirth: actor.penis_girth,
circumcised: actor.circumcised,
height: actor.height,
weight: actor.weight,
eyes: actor.eyes,
@@ -262,6 +265,9 @@ function curateProfileEntry(profile) {
bust: profile.bust,
waist: profile.waist,
hip: profile.hip,
penis_length: profile.penisLength,
penis_girth: profile.penisGirth,
circumcised: profile.circumcised,
natural_boobs: profile.naturalBoobs,
height: profile.height,
weight: profile.weight,
@@ -323,16 +329,35 @@ async function curateProfile(profile) {
curatedProfile.dateOfDeath = Number.isNaN(Number(profile.dateOfDeath)) ? null : profile.dateOfDeath;
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
curatedProfile.cup = profile.cup || (typeof profile.bust === 'string' && profile.bust?.match?.(/[a-zA-Z]+/)?.[0]) || null;
curatedProfile.bust = Number(profile.bust) || profile.bust?.match?.(/\d+/)?.[0] || null;
curatedProfile.waist = Number(profile.waist) || profile.waist?.match?.(/\d+/)?.[0] || null;
curatedProfile.hip = Number(profile.hip) || profile.hip?.match?.(/\d+/)?.[0] || null;
curatedProfile.height = Number(profile.height) || profile.height?.match?.(/\d+/)?.[0] || null;
curatedProfile.weight = Number(profile.weight) || profile.weight?.match?.(/\d+/)?.[0] || null;
curatedProfile.penisLength = Number(profile.penisLength) || profile.penisLength?.match?.(/\d+/)?.[0] || null;
curatedProfile.penisGirth = Number(profile.penisGirth) || profile.penisGirth?.match?.(/\d+/)?.[0] || null;
curatedProfile.naturalBoobs = typeof profile.naturalBoobs === 'boolean' ? profile.naturalBoobs : null;
curatedProfile.hasTattoos = typeof profile.hasTattoos === 'boolean' ? profile.hasTattoos : null;
curatedProfile.hasPiercings = typeof profile.hasPiercings === 'boolean' ? profile.hasPiercings : null;
curatedProfile.circumcised = (typeof profile.circumcised === 'boolean' && profile.circumcised)
|| (/yes/i.test(profile.circumcised) && true)
|| (/no/i.test(profile.circumcised) && false)
|| null;
curatedProfile.naturalBoobs = (typeof profile.naturalBoobs === 'boolean' && profile.naturalBoobs)
|| (/yes/i.test(profile.naturalBoobs) && true)
|| (/no/i.test(profile.naturalBoobs) && false)
|| null;
curatedProfile.hasTattoos = (typeof profile.hasTattoos === 'boolean' && profile.hasTattoos)
|| (/yes/i.test(profile.hasTattoos) && true)
|| (/no/i.test(profile.hasTattoos) && true)
|| null;
curatedProfile.hasPiercings = (typeof profile.hasPiercings === 'boolean' && profile.hasPiercings)
|| (/yes/i.test(profile.hasPiercings) && true)
|| (/no/i.test(profile.hasPiercings) && true)
|| null;
if (argv.resolvePlace) {
const [placeOfBirth, placeOfResidence] = await Promise.all([
@@ -437,6 +462,9 @@ async function interpolateProfiles(actorIds) {
'bust',
'waist',
'hip',
'penis_length',
'penis_girth',
'circumcised',
'natural_boobs',
'height',
'hair_color',

View File

@@ -2,11 +2,17 @@
const util = require('util');
const { get, geta, ed, formatDate, ctxa } = require('../utils/q');
const { get, getAll, ed, formatDate, prefixUrl, ctxa } = require('../utils/q');
const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
const { feetInchesToCm, inchesToCm } = require('../utils/convert');
function deriveEntryId(release) {
if (release.date && release.url) {
const slug = new URL(release.url).pathname.match(/\/trailers\/(.*).html/)[1];
return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(slug)}`;
}
if (release.date && release.title) {
return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
@@ -50,21 +56,25 @@ function getImageWithFallbacks(q, selector, site, el) {
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.title = query.q('h4 a', true);
release.url = query.url('a');
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.modeldata p');
release.date = query.date('.date', 'YYYY-MM-DD');
release.duration = query.duration('.time');
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
const count = query.number('a img', null, 'cnt');
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
[release.poster, ...release.photos] = Array.from({ length: count }, (value, index) => [
query.img('a img', `src${index}_3x`, { origin: channel.url }),
query.img('a img', `src${index}_2x`, { origin: channel.url }),
query.img('a img', `src${index}_1x`, { origin: channel.url }),
]);
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
release.stars = query.count('img[src*="star_full"]') + (query.count('img[src*="star_half"]') * 0.5);
release.entryId = deriveEntryId(release);
return release;
@@ -107,45 +117,37 @@ function scrapeAllT1(scenes, site, accNetworkReleases) {
}).filter(Boolean);
}
function scrapeAllTour(scenes) {
return scenes.map(({ qu }) => {
const release = {};
function scrapeScene({ html, query }, channel, url) {
const release = { url }; // url used for entry ID
release.title = qu.q('h4 a', true);
release.url = qu.url('a');
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
release.title = query.cnt('.videoDetails h3');
release.description = query.cnt('.videoDetails p');
release.actors = qu.all('.tour_update_models a', true);
release.date = query.date('.videoInfo p', ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.duration = Number(query.cnt('.videoInfo p:nth-of-type(2)')?.match(/(\d+) min/i)?.[1]) * 60;
release.poster = qu.img('a img');
release.entryId = deriveEntryId(release);
return release;
});
}
function scrapeScene({ html, qu }, site, url, baseRelease) {
const release = { url };
release.title = qu.q('.centerwrap h2', true);
release.description = qu.q('.videocontent p', true);
release.date = qu.date('.videodetails .date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.duration = qu.dur('.videodetails .date');
release.actors = qu.all('.modelname a', true);
release.actors = query.cnts('.update_models a');
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const poster = prefixUrl(posterPath, channel.url);
[release.poster, ...release.photos] = [poster, ...query.imgs('.item-thumb img', 'src0_1x', { origin: channel.url })]
.map(src => [
src.replace('-1x', '-3x'),
src.replace('-1x', '-2x'),
src,
]);
const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
if (trailerPath) {
// release.trailer = { src: `${channel.parameters?.media || channel.url}${trailerPath}` };
release.trailer = prefixUrl(trailerPath, channel.parameters?.media || channel.url);
}
release.tags = query.cnts('.featuring a[href*="categories/"]');
release.stars = query.count('.stars img[src*="star_full"]') + (query.count('.stars img[src*="star_half"]') * 0.5);
// release.entryId = html.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
@@ -193,64 +195,6 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease) {
return release;
}
function scrapeSceneTour({ html, qu }, site, url) {
const release = {};
if (url) release.url = url;
release.title = qu.q('.update_title, .video-title', true);
release.description = qu.q('.latest_update_description, .video-summary', true);
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date;
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
if (release.date) release.entryId = deriveEntryId(release);
const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
return release;
}
function scrapeProfile({ el, qu }, site) {
const profile = {};
const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height);
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site);
return profile;
}
function scrapeProfileT1({ el, qu }, site) {
const profile = {};
@@ -289,15 +233,16 @@ function scrapeProfileT1({ el, qu }, site) {
return profile;
}
function scrapeProfileTour({ el, qu }, site) {
function scrapeProfile({ query }, channel) {
const profile = {};
const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = query.all('.stats li').reduce((acc, bioEl) => {
const key = query.cnt(bioEl, 'strong');
const value = query.url(bioEl) || query.text(bioEl);
return {
...acc,
[slugify(key, '_')]: value.trim(),
[slugify(key, '_')]: value,
};
}, {});
@@ -318,6 +263,11 @@ function scrapeProfileTour({ el, qu }, site) {
if (hip) profile.hip = Number(hip);
}
if (bio.penis_length) profile.penisLength = Number(bio.penis_length.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_length.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
if (bio.penis_girth) profile.penisGirth = Number(bio.penis_girth.match(/(\d+)\s*cm/i)?.[1] || inchesToCm(bio.penis_girth.match(/(\d+\.?\d+)\s*in/i)?.[1])) || null;
if (bio.circumcised && /yes/i.test(bio.circumcised)) profile.circumcised = true;
if (bio.circumcised && /no/i.test(bio.circumcised)) profile.circumcised = false;
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
@@ -328,33 +278,31 @@ function scrapeProfileTour({ el, qu }, site) {
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
profile.social = [bio.onlyfans, bio.twitter, bio.instagram].filter(Boolean);
const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => {
const url = qRelease.qu.url('.update_image a[href]');
const release = scrapeSceneTour(qRelease, site);
if (!/\/(signup|join)/i.test(url)) release.url = url;
release.entryId = deriveEntryId(release);
release.site = site;
return release;
});
profile.avatar = [
query.img('.profile-pic img', 'src0_3x', { origin: channel.url }),
query.img('.profile-pic img', 'src0_2x', { origin: channel.url }),
query.img('.profile-pic img', 'src0_1x', { origin: channel.url }),
];
return profile;
}
async function fetchLatest(site, page = 1, include, { uniqueReleases, duplicateReleases }) {
async function fetchLatest(site, page = 1, include, { uniqueReleases = [], duplicateReleases = [] }) {
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|| `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
const res = await getAll(url, '.modelfeature, .item-video, .updateItem');
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, [...uniqueReleases, ...duplicateReleases]);
if (site.parameters?.tour) return scrapeAllTour(res.items, site);
if (!res.ok) {
return res.status;
}
if (site.parameters?.t1) {
return scrapeAllT1(res.items, site, [...uniqueReleases, ...duplicateReleases]);
}
return scrapeAll(res.items, site, uniqueReleases);
}
@@ -362,9 +310,13 @@ async function fetchLatest(site, page = 1, include, { uniqueReleases, duplicateR
async function fetchScene(url, site, baseRelease) {
const res = await get(url);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
if (!res.ok) {
return res.status;
}
if (site.parameters?.t1) {
return scrapeSceneT1(res.item, site, url, baseRelease);
}
return scrapeScene(res.item, site, url, baseRelease);
}
@@ -377,15 +329,19 @@ async function fetchProfile({ name: actorName }, { site }) {
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
: await get(`${site.url}/${t1}models/${actorSlugA}.html`, null, null, { followRedirects: false });
const res = (res1.ok && res1)
|| (site.parameters?.profile && await get(util.format(site.parameters.profile, actorSlugB)))
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`);
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`, null, null, { followRedirects: false });
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
if (!res.ok) {
return res.status;
}
if (site.parameters?.t1) {
return scrapeProfileT1(res.item, site);
}
return scrapeProfile(res.item, site);
}

View File

@@ -26,6 +26,10 @@ function cmToFeetInches(centimeters) {
return { feet, inches };
}
function cmToInches(centimeters) {
return centimeters / 2.54;
}
function heightToCm(height) {
if (!height) return null;
@@ -52,6 +56,7 @@ function kgToLbs(kgs) {
module.exports = {
cmToFeetInches,
cmToInches,
feetInchesToCm,
heightToCm,
inchesToCm,

View File

@@ -2,7 +2,7 @@
const { makeExtendSchemaPlugin, gql } = require('graphile-utils');
const moment = require('moment');
const { cmToFeetInches, kgToLbs } = require('../../utils/convert');
const { cmToFeetInches, cmToInches, kgToLbs } = require('../../utils/convert');
const schemaExtender = makeExtendSchemaPlugin(_build => ({
typeDefs: gql`
@@ -16,6 +16,8 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
ageAtDeath: Int @requires(columns: ["dateOfBirth", "dateOfDeath"])
height(units:Units): String @requires(columns: ["height"])
weight(units:Units): String @requires(columns: ["weight"])
penisLength(units:Units): String @requires(columns: ["penis_length"])
penisGirth(units:Units): String @requires(columns: ["penis_girth"])
}
`,
resolvers: {
@@ -47,6 +49,20 @@ const schemaExtender = makeExtendSchemaPlugin(_build => ({
? kgToLbs(parent.weight).toString()
: parent.weight.toString();
},
penisLength(parent, args, _context, _info) {
if (!parent.penisLength) return null;
return args.units === 'IMPERIAL'
? (Math.round(cmToInches(parent.penisLength) * 4) / 4).toString() // round to nearest quarter inch
: parent.penisLength.toString();
},
penisGirth(parent, args, _context, _info) {
if (!parent.penisGirth) return null;
return args.units === 'IMPERIAL'
? (Math.round(cmToInches(parent.penisGirth) * 4) / 4).toString() // round to nearest quarter inch
: parent.penisGirth.toString();
},
},
},
}));