Added profile scraper with releases to Hush. Added qtexts to q to return text nodes individually. Including network in profile site.
This commit is contained in:
@@ -3,8 +3,9 @@
|
||||
const util = require('util');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { get, geta, fd } = require('../utils/q');
|
||||
const { get, geta, ed, fd, ctxa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
async function getChannelRegExp(site) {
|
||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||
@@ -15,7 +16,11 @@ async function getChannelRegExp(site) {
|
||||
}
|
||||
|
||||
function deriveEntryId(release) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractPoster(posterPath, site, baseRelease) {
|
||||
@@ -38,7 +43,23 @@ function extractPoster(posterPath, site, baseRelease) {
|
||||
return [baseRelease?.poster || null, []];
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
function getImageWithFallbacks(q, selector, site, el) {
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
];
|
||||
|
||||
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ q, qu, qd, ql }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -50,19 +71,15 @@ function scrapeLatest(scenes, site) {
|
||||
release.date = qd('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = ql('.modeldata p');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind-the-scenes'];
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
release.poster = [
|
||||
q('.modelimg img', 'src0_3x'),
|
||||
q('.modelimg img', 'src0_2x'),
|
||||
q('.modelimg img', 'src0_1x'),
|
||||
].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
release.poster = getImageWithFallbacks(q, '.modelimg img', site);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeLatestT1(scenes, site, accSiteReleases) {
|
||||
function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||
return scenes.map(({ q, qi, qd, ql, qu }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -72,6 +89,8 @@ function scrapeLatestT1(scenes, site, accSiteReleases) {
|
||||
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = ql('.more-info-div');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
||||
|
||||
if (posterPath) {
|
||||
@@ -96,7 +115,7 @@ function scrapeLatestT1(scenes, site, accSiteReleases) {
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeLatestTour(scenes) {
|
||||
function scrapeAllTour(scenes) {
|
||||
return scenes.map(({ q, qa, qu, qd, qi }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -149,11 +168,7 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||
|
||||
release.actors = qa('.models-list-thumbs a').map(el => ({
|
||||
name: q(el, 'span', true),
|
||||
avatar: [
|
||||
q(el, 'img', 'src0_3x'),
|
||||
q(el, 'img', 'src0_2x'),
|
||||
q(el, 'img', 'src0_1x'),
|
||||
].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`),
|
||||
avatar: getImageWithFallbacks(q, 'img', site, el),
|
||||
}));
|
||||
|
||||
release.tags = qa('.tags a', true);
|
||||
@@ -187,12 +202,13 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||
}
|
||||
|
||||
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||
const release = { url };
|
||||
const release = {};
|
||||
|
||||
if (url) release.url = url;
|
||||
release.title = q('.update_title, .video-title', true);
|
||||
release.description = q('.latest_update_description, .video-summary', true);
|
||||
|
||||
const date = qd('.availdate', 'YYYY-MM-DD');
|
||||
const date = qd('.availdate, .update_date', 'YYYY-MM-DD');
|
||||
if (date) release.date = date;
|
||||
|
||||
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||
@@ -212,6 +228,131 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, q, qtxs }, site) {
|
||||
const profile = {};
|
||||
|
||||
const bio = qtxs('.stats p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
|
||||
profile.avatar = getImageWithFallbacks(q, '.profileimg img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||
profile.releases = scrapeAll(qReleases, site);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileT1({ el, q, qa }, site) {
|
||||
const profile = {};
|
||||
|
||||
const bio = qa('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
if (!value) return acc;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
|
||||
profile.avatar = getImageWithFallbacks(q, '.img-div img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.item-video');
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileTour({ el, q, qtxs }, site) {
|
||||
const profile = {};
|
||||
|
||||
const bio = qtxs('.model_bio').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
|
||||
profile.avatar = getImageWithFallbacks(q, '.model_picture img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.update_block');
|
||||
profile.releases = qReleases.map((qRelease) => {
|
||||
const url = qRelease.qu('.update_image a[href]');
|
||||
const release = scrapeSceneTour(qRelease, site);
|
||||
|
||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.site = site;
|
||||
|
||||
return release;
|
||||
});
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||
@@ -220,10 +361,10 @@ async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases)
|
||||
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||
|
||||
if (!qLatest) return null;
|
||||
if (site.parameters?.t1) return scrapeLatestT1(qLatest, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeLatestTour(qLatest, site, accSiteReleases);
|
||||
if (site.parameters?.t1) return scrapeAllT1(qLatest, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeAllTour(qLatest, site, accSiteReleases);
|
||||
|
||||
return scrapeLatest(qLatest, site, accSiteReleases);
|
||||
return scrapeAll(qLatest, site, accSiteReleases);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||
@@ -238,8 +379,24 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||
return scrapeScene(qScene, site, url, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlugA = slugify(actorName, { delimiter: '' });
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
const qProfile = site.parameters?.profile
|
||||
? (await get(util.format(site.parameters.profile, actorSlugA)) || await get(site.parameters.profile, actorSlugB))
|
||||
: (await get(`${site.url}/${t1}models/${actorSlugA}.html`) || await get(`${site.url}/${t1}models/${actorSlugB}.html`));
|
||||
|
||||
if (site.parameters?.t1) return qProfile && scrapeProfileT1(qProfile, site);
|
||||
if (site.parameters?.tour) return qProfile && scrapeProfileTour(qProfile, site);
|
||||
|
||||
return qProfile && scrapeProfile(qProfile, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
beforeFetchLatest: getChannelRegExp,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user