Added profile scraper with releases to Hush. Added qtexts to q to return text nodes individually. Including network in profile site.
|
@ -65,6 +65,15 @@ module.exports = {
|
||||||
'21sextury',
|
'21sextury',
|
||||||
'julesjordan',
|
'julesjordan',
|
||||||
'naughtyamerica',
|
'naughtyamerica',
|
||||||
|
[
|
||||||
|
'hussiepass',
|
||||||
|
'hushpass',
|
||||||
|
'interracialpass',
|
||||||
|
'interracialpovs',
|
||||||
|
'povpornstars',
|
||||||
|
'seehimfuck',
|
||||||
|
'eyeontheguy',
|
||||||
|
],
|
||||||
[
|
[
|
||||||
'cherrypimps',
|
'cherrypimps',
|
||||||
'drilledxxx',
|
'drilledxxx',
|
||||||
|
|
After Width: | Height: | Size: 103 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 114 KiB |
After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 18 KiB |
|
@ -985,6 +985,10 @@ const aliases = [
|
||||||
name: 'blonde hair',
|
name: 'blonde hair',
|
||||||
for: 'blonde',
|
for: 'blonde',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'blonde female',
|
||||||
|
for: 'blonde',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'blondes',
|
name: 'blondes',
|
||||||
for: 'blonde',
|
for: 'blonde',
|
||||||
|
|
|
@ -2097,6 +2097,7 @@ const sites = [
|
||||||
network: 'hussiepass',
|
network: 'hussiepass',
|
||||||
parameters: {
|
parameters: {
|
||||||
latest: 'http://www.povpornstars.com/tour/categories/movies_%d_d.html',
|
latest: 'http://www.povpornstars.com/tour/categories/movies_%d_d.html',
|
||||||
|
profile: 'http://www.povpornstars.com/tour/models/%s.html',
|
||||||
tour: true,
|
tour: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -2325,6 +2326,18 @@ const sites = [
|
||||||
t1: true,
|
t1: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
slug: 'mydaughtersfuckingablackdude',
|
||||||
|
name: 'My Daughter\'s Fucking A Black Dude',
|
||||||
|
url: 'https://www.mydaughtersfuckingablackdude.com/',
|
||||||
|
tags: ['interracial'],
|
||||||
|
network: 'interracialpass',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.interracialpass.com/t1/categories/my-daughters-fucking-a-black-dude_%d_d.html',
|
||||||
|
media: 'https://www.interracialpass.com',
|
||||||
|
t1: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
slug: 'mymomsfuckingblackzilla',
|
slug: 'mymomsfuckingblackzilla',
|
||||||
name: 'My Mom\'s Fucking Blackzilla',
|
name: 'My Mom\'s Fucking Blackzilla',
|
||||||
|
|
|
@ -13,7 +13,7 @@ const whereOr = require('./utils/where-or');
|
||||||
const resolvePlace = require('./utils/resolve-place');
|
const resolvePlace = require('./utils/resolve-place');
|
||||||
const slugify = require('./utils/slugify');
|
const slugify = require('./utils/slugify');
|
||||||
const capitalize = require('./utils/capitalize');
|
const capitalize = require('./utils/capitalize');
|
||||||
// const { createMediaDirectory, storePhotos } = require('./media_legacy');
|
const { curateSites } = require('./sites');
|
||||||
const { storeMedia, associateMedia } = require('./media');
|
const { storeMedia, associateMedia } = require('./media');
|
||||||
|
|
||||||
async function curateActor(actor) {
|
async function curateActor(actor) {
|
||||||
|
@ -313,7 +313,7 @@ async function mergeProfiles(profiles, actor) {
|
||||||
residencePlace: prevProfile.residencePlace || profile.residencePlace,
|
residencePlace: prevProfile.residencePlace || profile.residencePlace,
|
||||||
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
|
nationality: prevProfile.nationality || profile.nationality, // used to derive country when not available
|
||||||
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
ethnicity: prevProfile.ethnicity || profile.ethnicity,
|
||||||
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) && profile.bust),
|
bust: prevProfile.bust || (/\d+\w+/.test(profile.bust) ? profile.bust : null),
|
||||||
waist: prevProfile.waist || profile.waist,
|
waist: prevProfile.waist || profile.waist,
|
||||||
hip: prevProfile.hip || profile.hip,
|
hip: prevProfile.hip || profile.hip,
|
||||||
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
|
naturalBoobs: prevProfile.naturalBoobs === undefined ? profile.naturalBoobs : prevProfile.naturalBoobs,
|
||||||
|
@ -385,7 +385,18 @@ async function scrapeActors(actorNames) {
|
||||||
|
|
||||||
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
|
const finalSources = argv.withReleases ? sources.flat() : sources; // ignore race-to-success grouping when scenes are requested
|
||||||
|
|
||||||
const [sites, networks] = await Promise.all([knex('sites').select('*').whereIn('slug', finalSources.flat()), knex('networks').select('*').whereIn('slug', finalSources.flat())]);
|
const [siteEntries, networks] = await Promise.all([
|
||||||
|
knex('sites')
|
||||||
|
.leftJoin('networks', 'sites.network_id', 'networks.id')
|
||||||
|
.select(
|
||||||
|
'sites.*',
|
||||||
|
'networks.name as network_name', 'networks.slug as network_slug', 'networks.url as network_url', 'networks.description as network_description', 'networks.parameters as network_parameters',
|
||||||
|
)
|
||||||
|
.whereIn('sites.slug', finalSources.flat()),
|
||||||
|
knex('networks').select('*').whereIn('slug', finalSources.flat()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const sites = await curateSites(siteEntries, true);
|
||||||
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
const sitesBySlug = [].concat(networks, sites).reduce((acc, site) => ({ ...acc, [site.slug]: site }), {});
|
||||||
|
|
||||||
const profiles = await Promise.map(finalSources, async (source) => {
|
const profiles = await Promise.map(finalSources, async (source) => {
|
||||||
|
|
|
@ -5,7 +5,8 @@ const knex = require('./knex');
|
||||||
const initServer = require('./web/server');
|
const initServer = require('./web/server');
|
||||||
|
|
||||||
const scrapeSites = require('./scrape-sites');
|
const scrapeSites = require('./scrape-sites');
|
||||||
const { scrapeReleases } = require('./scrape-releases');
|
const { scrapeReleases, deepFetchReleases } = require('./scrape-releases');
|
||||||
|
const { storeReleases } = require('./releases');
|
||||||
const { scrapeActors, scrapeBasicActors } = require('./actors');
|
const { scrapeActors, scrapeBasicActors } = require('./actors');
|
||||||
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
if (process.env.NODE_ENV === 'development') {
|
||||||
|
@ -29,9 +30,10 @@ async function init() {
|
||||||
const actors = await scrapeActors();
|
const actors = await scrapeActors();
|
||||||
|
|
||||||
if (argv.withReleases) {
|
if (argv.withReleases) {
|
||||||
const releases = actors.map(actor => actor?.releases || []).flat();
|
const baseReleases = actors.map(actor => actor?.releases || []).flat();
|
||||||
|
const releases = await deepFetchReleases(baseReleases, null, 'scene');
|
||||||
|
|
||||||
await scrapeReleases(releases, null, 'scene');
|
await storeReleases(releases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -423,8 +423,12 @@ async function updateReleasesSearch(releaseIds) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function storeRelease(release, batchId) {
|
async function storeRelease(release, batchId) {
|
||||||
|
if (!release.site) {
|
||||||
|
throw new Error(`Missing site, unable to store "${release.title}" (${release.url})`);
|
||||||
|
}
|
||||||
|
|
||||||
if (!release.entryId) {
|
if (!release.entryId) {
|
||||||
logger.warn(`Missing entry ID, unable to store ${release.url}`);
|
logger.warn(`Missing entry ID, unable to store "${release.title}" (${release.url})`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
||||||
const site = basicRelease?.site || await findSite(url, release);
|
const site = basicRelease?.site || await findSite(url, release);
|
||||||
|
|
||||||
if (!site) {
|
if (!site) {
|
||||||
throw new Error(`Could not find site ${url} in database`);
|
throw new Error(`Could not find site for ${url} in database`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!argv.deep && release) {
|
if (!argv.deep && release) {
|
||||||
|
@ -55,25 +55,16 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene', prefli
|
||||||
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
const scraper = scrapers.releases[site.slug] || scrapers.releases[site.network.slug];
|
||||||
|
|
||||||
if (!scraper) {
|
if (!scraper) {
|
||||||
throw new Error('Could not find scraper for URL');
|
throw new Error(`Could not find scraper for ${url}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type === 'scene' && !scraper.fetchScene) {
|
if ((type === 'scene' && !scraper.fetchScene) || (type === 'movie' && !scraper.fetchMovie)) {
|
||||||
if (release) {
|
if (release) {
|
||||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
logger.warn(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual scenes`);
|
throw new Error(`The '${site.name}'-scraper cannot fetch individual ${type}s`);
|
||||||
}
|
|
||||||
|
|
||||||
if (type === 'movie' && !scraper.fetchMovie) {
|
|
||||||
if (release) {
|
|
||||||
logger.warn(`The '${site.name}'-scraper cannot fetch individual movies`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`The '${site.name}'-scraper cannot fetch individual movies`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!release) {
|
if (!release) {
|
||||||
|
@ -126,7 +117,41 @@ async function scrapeReleases(sources, release = null, type = 'scene', preflight
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
||||||
|
return Promise.map(baseReleases, async (release) => {
|
||||||
|
if (release.url || (release.path && release.site)) {
|
||||||
|
try {
|
||||||
|
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
|
||||||
|
|
||||||
|
if (fullRelease) {
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
...fullRelease,
|
||||||
|
deep: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn(`Release scraper returned empty result for ${release.url}`);
|
||||||
|
|
||||||
|
return release;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to scrape ${release.url}: ${error}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...release,
|
||||||
|
deep: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return release;
|
||||||
|
}, {
|
||||||
|
concurrency: 2,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
deepFetchReleases,
|
||||||
scrapeRelease,
|
scrapeRelease,
|
||||||
scrapeReleases,
|
scrapeReleases,
|
||||||
};
|
};
|
||||||
|
|
|
@ -8,7 +8,7 @@ const logger = require('./logger')(__filename);
|
||||||
const knex = require('./knex');
|
const knex = require('./knex');
|
||||||
const { fetchIncludedSites } = require('./sites');
|
const { fetchIncludedSites } = require('./sites');
|
||||||
const scrapers = require('./scrapers/scrapers');
|
const scrapers = require('./scrapers/scrapers');
|
||||||
const { scrapeRelease } = require('./scrape-releases');
|
const { deepFetchReleases } = require('./scrape-releases');
|
||||||
const { storeReleases } = require('./releases');
|
const { storeReleases } = require('./releases');
|
||||||
|
|
||||||
function getAfterDate() {
|
function getAfterDate() {
|
||||||
|
@ -99,39 +99,6 @@ async function scrapeUpcomingReleases(scraper, site, beforeFetchLatest) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function deepFetchReleases(baseReleases, beforeFetchLatest) {
|
|
||||||
return Promise.map(baseReleases, async (release) => {
|
|
||||||
if (release.url || (release.path && release.site)) {
|
|
||||||
try {
|
|
||||||
const fullRelease = await scrapeRelease(release.url, release, 'scene', beforeFetchLatest);
|
|
||||||
|
|
||||||
if (fullRelease) {
|
|
||||||
return {
|
|
||||||
...release,
|
|
||||||
...fullRelease,
|
|
||||||
deep: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.warn(`Release scraper returned empty result for ${release.url}`);
|
|
||||||
|
|
||||||
return release;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to scrape ${release.url}: ${error}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
...release,
|
|
||||||
deep: false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}, {
|
|
||||||
concurrency: 2,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeSiteReleases(scraper, site, accSiteReleases) {
|
async function scrapeSiteReleases(scraper, site, accSiteReleases) {
|
||||||
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site, accSiteReleases);
|
const beforeFetchLatest = await scraper.beforeFetchLatest?.(site, accSiteReleases);
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,9 @@
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
|
|
||||||
const knex = require('../knex');
|
const knex = require('../knex');
|
||||||
const { get, geta, fd } = require('../utils/q');
|
const { get, geta, ed, fd, ctxa } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const { feetInchesToCm } = require('../utils/convert');
|
||||||
|
|
||||||
async function getChannelRegExp(site) {
|
async function getChannelRegExp(site) {
|
||||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||||
|
@ -15,7 +16,11 @@ async function getChannelRegExp(site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function deriveEntryId(release) {
|
function deriveEntryId(release) {
|
||||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
if (release.date && release.title) {
|
||||||
|
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractPoster(posterPath, site, baseRelease) {
|
function extractPoster(posterPath, site, baseRelease) {
|
||||||
|
@ -38,7 +43,23 @@ function extractPoster(posterPath, site, baseRelease) {
|
||||||
return [baseRelease?.poster || null, []];
|
return [baseRelease?.poster || null, []];
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatest(scenes, site) {
|
function getImageWithFallbacks(q, selector, site, el) {
|
||||||
|
const sources = el
|
||||||
|
? [
|
||||||
|
q(el, selector, 'src0_3x'),
|
||||||
|
q(el, selector, 'src0_2x'),
|
||||||
|
q(el, selector, 'src0_1x'),
|
||||||
|
]
|
||||||
|
: [
|
||||||
|
q(selector, 'src0_3x'),
|
||||||
|
q(selector, 'src0_2x'),
|
||||||
|
q(selector, 'src0_1x'),
|
||||||
|
];
|
||||||
|
|
||||||
|
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeAll(scenes, site) {
|
||||||
return scenes.map(({ q, qu, qd, ql }) => {
|
return scenes.map(({ q, qu, qd, ql }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -50,19 +71,15 @@ function scrapeLatest(scenes, site) {
|
||||||
release.date = qd('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
release.date = qd('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||||
release.duration = ql('.modeldata p');
|
release.duration = ql('.modeldata p');
|
||||||
|
|
||||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind-the-scenes'];
|
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||||
|
|
||||||
release.poster = [
|
release.poster = getImageWithFallbacks(q, '.modelimg img', site);
|
||||||
q('.modelimg img', 'src0_3x'),
|
|
||||||
q('.modelimg img', 'src0_2x'),
|
|
||||||
q('.modelimg img', 'src0_1x'),
|
|
||||||
].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatestT1(scenes, site, accSiteReleases) {
|
function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||||
return scenes.map(({ q, qi, qd, ql, qu }) => {
|
return scenes.map(({ q, qi, qd, ql, qu }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -72,6 +89,8 @@ function scrapeLatestT1(scenes, site, accSiteReleases) {
|
||||||
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
release.date = qd('.more-info-div', 'MMM D, YYYY');
|
||||||
release.duration = ql('.more-info-div');
|
release.duration = ql('.more-info-div');
|
||||||
|
|
||||||
|
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||||
|
|
||||||
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder');
|
||||||
|
|
||||||
if (posterPath) {
|
if (posterPath) {
|
||||||
|
@ -96,7 +115,7 @@ function scrapeLatestT1(scenes, site, accSiteReleases) {
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatestTour(scenes) {
|
function scrapeAllTour(scenes) {
|
||||||
return scenes.map(({ q, qa, qu, qd, qi }) => {
|
return scenes.map(({ q, qa, qu, qd, qi }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
|
@ -149,11 +168,7 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||||
|
|
||||||
release.actors = qa('.models-list-thumbs a').map(el => ({
|
release.actors = qa('.models-list-thumbs a').map(el => ({
|
||||||
name: q(el, 'span', true),
|
name: q(el, 'span', true),
|
||||||
avatar: [
|
avatar: getImageWithFallbacks(q, 'img', site, el),
|
||||||
q(el, 'img', 'src0_3x'),
|
|
||||||
q(el, 'img', 'src0_2x'),
|
|
||||||
q(el, 'img', 'src0_1x'),
|
|
||||||
].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`),
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.tags = qa('.tags a', true);
|
release.tags = qa('.tags a', true);
|
||||||
|
@ -187,12 +202,13 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||||
const release = { url };
|
const release = {};
|
||||||
|
|
||||||
|
if (url) release.url = url;
|
||||||
release.title = q('.update_title, .video-title', true);
|
release.title = q('.update_title, .video-title', true);
|
||||||
release.description = q('.latest_update_description, .video-summary', true);
|
release.description = q('.latest_update_description, .video-summary', true);
|
||||||
|
|
||||||
const date = qd('.availdate', 'YYYY-MM-DD');
|
const date = qd('.availdate, .update_date', 'YYYY-MM-DD');
|
||||||
if (date) release.date = date;
|
if (date) release.date = date;
|
||||||
|
|
||||||
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||||
|
@ -212,6 +228,131 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function scrapeProfile({ el, q, qtxs }, site) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = qtxs('.stats p').reduce((acc, info) => {
|
||||||
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
if (bio.measurements) {
|
||||||
|
const [bust, waist, hip] = bio.measurements.split('-');
|
||||||
|
|
||||||
|
if (bust) profile.bust = bust;
|
||||||
|
if (waist) profile.waist = Number(waist);
|
||||||
|
if (hip) profile.hip = Number(hip);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bio.age) profile.age = Number(bio.age);
|
||||||
|
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||||
|
|
||||||
|
profile.avatar = getImageWithFallbacks(q, '.profileimg img', site);
|
||||||
|
|
||||||
|
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||||
|
profile.releases = scrapeAll(qReleases, site);
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfileT1({ el, q, qa }, site) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = qa('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||||
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
|
if (!value) return acc;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
if (bio.measurements) {
|
||||||
|
const [bust, waist, hip] = bio.measurements.split('-');
|
||||||
|
|
||||||
|
if (bust) profile.bust = bust;
|
||||||
|
if (waist) profile.waist = Number(waist);
|
||||||
|
if (hip) profile.hip = Number(hip);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||||
|
if (bio.age) profile.age = Number(bio.age);
|
||||||
|
|
||||||
|
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||||
|
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||||
|
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||||
|
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||||
|
|
||||||
|
profile.avatar = getImageWithFallbacks(q, '.img-div img', site);
|
||||||
|
|
||||||
|
const qReleases = ctxa(el, '.item-video');
|
||||||
|
profile.releases = scrapeAllT1(qReleases, site);
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeProfileTour({ el, q, qtxs }, site) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = qtxs('.model_bio').reduce((acc, info) => {
|
||||||
|
const [key, value] = info.split(':');
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||||
|
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||||
|
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||||
|
|
||||||
|
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||||
|
|
||||||
|
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||||
|
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||||
|
|
||||||
|
if (bio.measurements) {
|
||||||
|
const [bust, waist, hip] = bio.measurements.split('-');
|
||||||
|
|
||||||
|
if (bust) profile.bust = bust;
|
||||||
|
if (waist) profile.waist = Number(waist);
|
||||||
|
if (hip) profile.hip = Number(hip);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||||
|
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||||
|
|
||||||
|
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||||
|
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||||
|
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||||
|
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||||
|
|
||||||
|
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||||
|
|
||||||
|
profile.avatar = getImageWithFallbacks(q, '.model_picture img', site);
|
||||||
|
|
||||||
|
const qReleases = ctxa(el, '.update_block');
|
||||||
|
profile.releases = qReleases.map((qRelease) => {
|
||||||
|
const url = qRelease.qu('.update_image a[href]');
|
||||||
|
const release = scrapeSceneTour(qRelease, site);
|
||||||
|
|
||||||
|
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||||
|
release.entryId = deriveEntryId(release);
|
||||||
|
release.site = site;
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
|
||||||
|
return profile;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
|
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
|
||||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||||
|
@ -220,10 +361,10 @@ async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases)
|
||||||
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
|
const qLatest = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||||
|
|
||||||
if (!qLatest) return null;
|
if (!qLatest) return null;
|
||||||
if (site.parameters?.t1) return scrapeLatestT1(qLatest, site, accSiteReleases);
|
if (site.parameters?.t1) return scrapeAllT1(qLatest, site, accSiteReleases);
|
||||||
if (site.parameters?.tour) return scrapeLatestTour(qLatest, site, accSiteReleases);
|
if (site.parameters?.tour) return scrapeAllTour(qLatest, site, accSiteReleases);
|
||||||
|
|
||||||
return scrapeLatest(qLatest, site, accSiteReleases);
|
return scrapeAll(qLatest, site, accSiteReleases);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||||
|
@ -238,8 +379,24 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||||
return scrapeScene(qScene, site, url, baseRelease);
|
return scrapeScene(qScene, site, url, baseRelease);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(actorName, scraperSlug, site) {
|
||||||
|
const actorSlugA = slugify(actorName, { delimiter: '' });
|
||||||
|
const actorSlugB = slugify(actorName);
|
||||||
|
|
||||||
|
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||||
|
const qProfile = site.parameters?.profile
|
||||||
|
? (await get(util.format(site.parameters.profile, actorSlugA)) || await get(site.parameters.profile, actorSlugB))
|
||||||
|
: (await get(`${site.url}/${t1}models/${actorSlugA}.html`) || await get(`${site.url}/${t1}models/${actorSlugB}.html`));
|
||||||
|
|
||||||
|
if (site.parameters?.t1) return qProfile && scrapeProfileT1(qProfile, site);
|
||||||
|
if (site.parameters?.tour) return qProfile && scrapeProfileTour(qProfile, site);
|
||||||
|
|
||||||
|
return qProfile && scrapeProfile(qProfile, site);
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
beforeFetchLatest: getChannelRegExp,
|
beforeFetchLatest: getChannelRegExp,
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -146,6 +146,7 @@ module.exports = {
|
||||||
digitalplayground,
|
digitalplayground,
|
||||||
dtfsluts: fullpornnetwork,
|
dtfsluts: fullpornnetwork,
|
||||||
evilangel,
|
evilangel,
|
||||||
|
eyeontheguy: hush,
|
||||||
fakehub,
|
fakehub,
|
||||||
famedigital,
|
famedigital,
|
||||||
freeones,
|
freeones,
|
||||||
|
@ -154,7 +155,11 @@ module.exports = {
|
||||||
hergape: fullpornnetwork,
|
hergape: fullpornnetwork,
|
||||||
homemadeanalwhores: fullpornnetwork,
|
homemadeanalwhores: fullpornnetwork,
|
||||||
hotcrazymess: nubiles,
|
hotcrazymess: nubiles,
|
||||||
|
hushpass: hush,
|
||||||
|
hussiepass: hush,
|
||||||
iconmale,
|
iconmale,
|
||||||
|
interracialpass: hush,
|
||||||
|
interracialpovs: hush,
|
||||||
jamesdeen: fullpornnetwork,
|
jamesdeen: fullpornnetwork,
|
||||||
julesjordan,
|
julesjordan,
|
||||||
kellymadison,
|
kellymadison,
|
||||||
|
@ -174,9 +179,11 @@ module.exports = {
|
||||||
pimpxxx: cherrypimps,
|
pimpxxx: cherrypimps,
|
||||||
pornhub,
|
pornhub,
|
||||||
povperverts: fullpornnetwork,
|
povperverts: fullpornnetwork,
|
||||||
|
povpornstars: hush,
|
||||||
private: privateNetwork,
|
private: privateNetwork,
|
||||||
realitykings,
|
realitykings,
|
||||||
score,
|
score,
|
||||||
|
seehimfuck: hush,
|
||||||
thatsitcomshow: nubiles,
|
thatsitcomshow: nubiles,
|
||||||
transangels,
|
transangels,
|
||||||
tushy: vixen,
|
tushy: vixen,
|
||||||
|
|
|
@ -60,7 +60,7 @@ async function matchTags(rawTags) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function associateTags(release, releaseId) {
|
async function associateTags(release, releaseId) {
|
||||||
const siteTags = release.site.tags.filter(tag => tag.inherit === true).map(tag => tag.id);
|
const siteTags = release.site?.tags?.filter(tag => tag.inherit === true).map(tag => tag.id) || [];
|
||||||
|
|
||||||
const rawReleaseTags = release.tags || [];
|
const rawReleaseTags = release.tags || [];
|
||||||
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
const releaseTags = rawReleaseTags.some(tag => typeof tag === 'string')
|
||||||
|
|
|
@ -5,6 +5,11 @@ function inchesToCm(inches) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function feetInchesToCm(feet, inches) {
|
function feetInchesToCm(feet, inches) {
|
||||||
|
if (typeof feet === 'string' && !inches) {
|
||||||
|
const [feetPart, inchesPart] = feet.match(/\d+/g);
|
||||||
|
return feetInchesToCm(feetPart, inchesPart);
|
||||||
|
}
|
||||||
|
|
||||||
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
|
return Math.round((Number(feet) * 30.48) + (Number(inches) * 2.54));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,17 +64,24 @@ function qall(context, selector, attrArg, applyTrim = true) {
|
||||||
return Array.from(context.querySelectorAll(selector));
|
return Array.from(context.querySelectorAll(selector));
|
||||||
}
|
}
|
||||||
|
|
||||||
function qtext(context, selector, applyTrim = true) {
|
function qtexts(context, selector, applyTrim = true, filter = true) {
|
||||||
const el = q(context, selector, null, applyTrim);
|
const el = q(context, selector, null, applyTrim);
|
||||||
if (!el) return null;
|
if (!el) return null;
|
||||||
|
|
||||||
const text = Array.from(el.childNodes)
|
const nodes = Array.from(el.childNodes)
|
||||||
.filter(node => node.nodeName === '#text')
|
.filter(node => node.nodeName === '#text')
|
||||||
.map(node => (applyTrim ? node.textContent : trim(node.textContent)))
|
.map(node => (applyTrim ? trim(node.textContent) : node.textContent));
|
||||||
.join(' ');
|
|
||||||
|
|
||||||
if (applyTrim) return trim(text);
|
return filter ? nodes.filter(Boolean) : nodes;
|
||||||
return text;
|
}
|
||||||
|
|
||||||
|
function qtext(context, selector, applyTrim = true) {
|
||||||
|
const nodes = qtexts(context, selector, applyTrim, true);
|
||||||
|
if (!nodes) return null;
|
||||||
|
|
||||||
|
const text = nodes.join(' ');
|
||||||
|
|
||||||
|
return applyTrim ? trim(text) : text;
|
||||||
}
|
}
|
||||||
|
|
||||||
function qmeta(context, selector, attrArg = 'content', applyTrim = true) {
|
function qmeta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||||
|
@ -161,6 +168,7 @@ const funcs = {
|
||||||
qlength,
|
qlength,
|
||||||
qmeta,
|
qmeta,
|
||||||
qtext,
|
qtext,
|
||||||
|
qtexts,
|
||||||
qtrailer,
|
qtrailer,
|
||||||
qtrailers,
|
qtrailers,
|
||||||
qurl,
|
qurl,
|
||||||
|
@ -176,6 +184,9 @@ const funcs = {
|
||||||
qt: qtrailer,
|
qt: qtrailer,
|
||||||
qts: qtrailers,
|
qts: qtrailers,
|
||||||
qtx: qtext,
|
qtx: qtext,
|
||||||
|
qtxt: qtext,
|
||||||
|
qtxs: qtexts,
|
||||||
|
qtxts: qtexts,
|
||||||
qu: qurl,
|
qu: qurl,
|
||||||
qus: qurls,
|
qus: qurls,
|
||||||
};
|
};
|
||||||
|
|