Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.

This commit is contained in:
2020-05-14 04:26:05 +02:00
parent f1eb29c713
commit 11eb66f834
178 changed files with 16594 additions and 16929 deletions

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene,
};

View File

@@ -3,37 +3,37 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
function curateRelease(release, site) {
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
return {
...release,
title: release.title.split(/:|\|/)[1].trim(),
};
}
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
return {
...release,
title: release.title.split(/:|\|/)[1].trim(),
};
}
return release;
return release;
}
async function networkFetchScene(url, site, release) {
const scene = await fetchScene(url, site, release);
const scene = await fetchScene(url, site, release);
return curateRelease(scene, site);
return curateRelease(scene, site);
}
async function fetchLatest(site, page = 1) {
const releases = await fetchApiLatest(site, page, false);
const releases = await fetchApiLatest(site, page, false);
return releases.map(release => curateRelease(release, site));
return releases.map(release => curateRelease(release, site));
}
async function fetchUpcoming(site, page = 1) {
const releases = await fetchApiUpcoming(site, page, false);
const releases = await fetchApiUpcoming(site, page, false);
return releases.map(release => curateRelease(release, site));
return releases.map(release => curateRelease(release, site));
}
module.exports = {
fetchLatest,
fetchProfile: fetchApiProfile,
fetchScene: networkFetchScene,
fetchUpcoming,
fetchLatest,
fetchProfile: fetchApiProfile,
fetchScene: networkFetchScene,
fetchUpcoming,
};

View File

@@ -3,47 +3,47 @@
const { fetchLatest, fetchScene } = require('./julesjordan');
function extractActors(scene) {
const release = scene;
const release = scene;
if (!scene.actors || scene.actors.length === 0) {
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
if (!scene.actors || scene.actors.length === 0) {
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
const actors = rawActors?.filter((actor) => {
if (!actor) return false;
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
const actors = rawActors?.filter((actor) => {
if (!actor) return false;
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
return true;
});
return true;
});
if (actors) {
release.actors = actors;
}
}
if (actors) {
release.actors = actors;
}
}
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
}
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
}
return release;
return release;
}
async function fetchLatestWrap(site, page = 1) {
const latest = await fetchLatest(site, page);
const latest = await fetchLatest(site, page);
return latest.map(scene => extractActors(scene));
return latest.map(scene => extractActors(scene));
}
async function fetchSceneWrap(url, site) {
const scene = await fetchScene(url, site);
const scene = await fetchScene(url, site);
return extractActors(scene);
return extractActors(scene);
}
module.exports = {
fetchLatest: fetchLatestWrap,
fetchScene: fetchSceneWrap,
fetchLatest: fetchLatestWrap,
fetchScene: fetchSceneWrap,
};

View File

@@ -3,7 +3,7 @@
const { get, geta, ctxa } = require('../utils/q');
function extractActors(actorString) {
return actorString
return actorString
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
.split(/\band\b|\bvs\b|\/|,|&/ig)
.map(actor => actor.trim())
@@ -12,120 +12,120 @@ function extractActors(actorString) {
}
function matchActors(actorString, models) {
return models
.filter(model => new RegExp(model.name, 'i')
.test(actorString));
return models
.filter(model => new RegExp(model.name, 'i')
.test(actorString));
}
function scrapeLatest(scenes, site, models) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
const pathname = qu.url('a.itemimg').slice(1);
[release.entryId] = pathname.split('/').slice(-1);
release.url = `${site.url}${pathname}`;
const pathname = qu.url('a.itemimg').slice(1);
[release.entryId] = pathname.split('/').slice(-1);
release.url = `${site.url}${pathname}`;
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
const actorString = qu.q('.mas_description', true);
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const actorString = qu.q('.mas_description', true);
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const posterPath = qu.img('.itemimg img');
release.poster = `${site.url}/${posterPath}`;
const posterPath = qu.img('.itemimg img');
release.poster = `${site.url}/${posterPath}`;
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url, site, models) {
const release = { url };
const release = { url };
[release.entryId] = url.split('/').slice(-1);
release.title = qu.q('.mas_title', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
[release.entryId] = url.split('/').slice(-1);
release.title = qu.q('.mas_title', true);
release.description = qu.q('.mas_longdescription', true);
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
const actors = matchActors(actorString, models);
if (actors.length > 0) release.actors = actors;
else release.actors = extractActors(actorString);
release.tags = qu.all('.tags a', true);
release.tags = qu.all('.tags a', true);
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
if (poster) release.poster = `${site.url}/${poster}`;
const posterIndex = 'splash:';
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
if (poster) release.poster = `${site.url}/${poster}`;
const trailerIndex = html.indexOf('video/mp4');
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
const trailerIndex = html.indexOf('video/mp4');
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
return release;
return release;
}
function extractModels({ el }, site) {
const models = ctxa(el, '.item');
const models = ctxa(el, '.item');
return models.map(({ qu }) => {
const actor = { gender: 'female' };
return models.map(({ qu }) => {
const actor = { gender: 'female' };
const avatar = qu.q('.itemimg img');
actor.avatar = `${site.url}/${avatar.src}`;
actor.name = avatar.alt
.split(':').slice(-1)[0]
.replace(/xtreme girl|nurse/ig, '')
.trim();
const avatar = qu.q('.itemimg img');
actor.avatar = `${site.url}/${avatar.src}`;
actor.name = avatar.alt
.split(':').slice(-1)[0]
.replace(/xtreme girl|nurse/ig, '')
.trim();
const actorPath = qu.url('.itemimg');
actor.url = `${site.url}${actorPath.slice(1)}`;
const actorPath = qu.url('.itemimg');
actor.url = `${site.url}${actorPath.slice(1)}`;
return actor;
});
return actor;
});
}
async function fetchModels(site, page = 1, accModels = []) {
const url = `${site.url}/?models/${page}`;
const res = await get(url);
const url = `${site.url}/?models/${page}`;
const res = await get(url);
if (res.ok) {
const models = extractModels(res.item, site);
const nextPage = res.item.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
if (res.ok) {
const models = extractModels(res.item, site);
const nextPage = res.item.qa('.pagenumbers', true)
.map(pageX => Number(pageX))
.filter(Boolean) // remove << and >>
.includes(page + 1);
if (nextPage) {
return fetchModels(site, page + 1, accModels.concat(models));
}
if (nextPage) {
return fetchModels(site, page + 1, accModels.concat(models));
}
return accModels.concat(models, { name: 'Dr. Gray' });
}
return accModels.concat(models, { name: 'Dr. Gray' });
}
return [];
return [];
}
async function fetchLatest(site, page = 1, models) {
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const res = await geta(url, '.item');
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
const res = await geta(url, '.item');
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
}
async function fetchScene(url, site, release, beforeFetchLatest) {
const models = beforeFetchLatest || await fetchModels(site);
const res = await get(url);
const models = beforeFetchLatest || await fetchModels(site);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
beforeFetchLatest: fetchModels,
fetchLatest,
fetchScene,
beforeFetchLatest: fetchModels,
};

View File

@@ -5,141 +5,141 @@ const { get, getAll, initAll, extractDate } = require('../utils/qu');
const { feetInchesToCm } = require('../utils/convert');
function getFallbacks(source) {
return [
source.replace('-1x.jpg', '-4x.jpg'),
source.replace('-1x.jpg', '-3x.jpg'),
source.replace('-1x.jpg', '-2x.jpg'),
source,
];
return [
source.replace('-1x.jpg', '-4x.jpg'),
source.replace('-1x.jpg', '-3x.jpg'),
source.replace('-1x.jpg', '-2x.jpg'),
source,
];
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.url = qu.url('a');
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.url = qu.url('a');
release.title = qu.q('h5 a', true);
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
release.title = qu.q('h5 a', true);
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
const photoCount = qu.q('.stdimage', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
const photoCount = qu.q('.stdimage', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
return getFallbacks(source);
});
return getFallbacks(source);
});
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url) {
const release = { url };
const release = { url };
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
release.title = qu.q('h2', true);
release.description = qu.q('p', true);
release.title = qu.q('h2', true);
release.description = qu.q('p', true);
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
name: qu.q(actor, null, true),
url: qu.url(actor, null),
}));
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
name: qu.q(actor, null, true),
url: qu.url(actor, null),
}));
release.tags = qu.all('.video_categories a', true);
release.tags = qu.all('.video_categories a', true);
release.duration = qu.dur('.video_categories + p');
release.duration = qu.dur('.video_categories + p');
const poster = qu.img('a img');
const poster = qu.img('a img');
release.poster = getFallbacks(poster);
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
release.poster = getFallbacks(poster);
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
return release;
return release;
}
function scrapeProfile({ el, qu }) {
const profile = {};
const profile = {};
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
const nextNode = nodes[index + 1];
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
const nextNode = nodes[index + 1];
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
}
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
}
return acc;
}, {});
return acc;
}, {});
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.age) profile.age = Number(bio.age);
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.age) profile.age = Number(bio.age);
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
if (bio.birth_location) profile.birthPlace = bio.birth_location;
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
if (bio.birth_location) profile.birthPlace = bio.birth_location;
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
if (bio.eye_color) profile.eyes = bio.eye_color;
if (bio.eye_color) profile.eyes = bio.eye_color;
const avatar = qu.img('.tac img');
profile.avatar = getFallbacks(avatar);
const avatar = qu.img('.tac img');
profile.avatar = getFallbacks(avatar);
profile.releases = scrapeAll(initAll(el, '.featured-video'));
profile.releases = scrapeAll(initAll(el, '.featured-video'));
return profile;
return profile;
}
async function fetchLatest(site, page) {
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
const res = await getAll(url, '.featured-video');
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
const res = await getAll(url, '.featured-video');
if (res.ok) {
return scrapeAll(res.items, site);
}
if (res.ok) {
return scrapeAll(res.items, site);
}
return res.status;
return res.status;
}
async function fetchScene(url, site) {
const res = await get(url, '.page-content .row');
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeScene(res.item, url, site);
}
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
return res.status;
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');
const actorSlug = slugify(actorName, '');
const url = `${site.url}/tour/models/${actorSlug}.html`;
const res = await get(url, '.page-content .row');
if (res.ok) {
return scrapeProfile(res.item);
}
if (res.ok) {
return scrapeProfile(res.item);
}
return res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'babes');
return fetchProfile(actorName, 'babes');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -6,144 +6,144 @@ const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.title = qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
release.actors = qu.all('a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
.map(source => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
.map(source => [
source.getAttribute('src0_3x'),
source.getAttribute('src0_2x'),
source.getAttribute('src0_1x'),
]
.filter(Boolean)
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, url, site) {
const release = { url };
const release = { url };
release.title = qu.q('.item-episode h4 a', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.title = qu.q('.item-episode h4 a', true);
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.item-meta li:nth-child(2)');
release.description = qu.q('.description', true);
release.actors = qu.all('.item-episode a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
release.actors = qu.all('.item-episode a[href*="/models"]', true);
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
if (trailerPath) {
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
release.trailer = { src: trailer };
}
if (trailerPath) {
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
release.trailer = { src: trailer };
}
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
return release;
return release;
}
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
const res = await get(url);
if (!res.ok) return [];
if (!res.ok) return [];
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const quReleases = initAll(res.item.el, '.item-episode');
const releases = scrapeAll(quReleases, site);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
if (nextPage) {
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
}
return accScenes.concat(releases);
return accScenes.concat(releases);
}
async function scrapeProfile({ qu }, site, withScenes) {
const profile = {};
const profile = {};
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
profile.avatar = [
qu.q('.profile-pic img', 'src0_3x'),
qu.q('.profile-pic img', 'src0_2x'),
qu.q('.profile-pic img', 'src0_1x'),
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (withScenes) {
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
if (actorId) {
profile.releases = await fetchActorReleases(actorId, site);
}
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
const url = `${site.url}/categories/movies/${page}/latest/`;
const res = await geta(url, '.item-episode');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -8,99 +8,99 @@ const clusterId = '617fb597b659459bafe6472470d9073a';
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
const genderMap = {
M: 'male',
F: 'female',
M: 'male',
F: 'female',
};
function getScreenUrl(item, scene) {
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
}
function encodeId(id) {
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
return Buffer
.from(id, 'hex')
.toString('base64')
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, ',');
}
function decodeId(id) {
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
const restoredId = id
.replace(/-/g, '+')
.replace(/_/g, '/')
.replace(/,/g, '=');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
return Buffer
.from(restoredId, 'base64')
.toString('hex');
}
function scrapeScene(scene, site) {
const release = {
site,
entryId: scene.id,
title: scene.name,
description: scene.description,
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
duration: scene.duration,
};
const release = {
site,
entryId: scene.id,
title: scene.name,
description: scene.description,
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
duration: scene.duration,
};
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const slug = slugify(release.title);
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
const date = new Date(scene.releaseDate);
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
if (scene.is4k) release.tags.push('4k');
if (scene.gay) release.tags.push('gay');
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
const photoset = scene.screenshots.filter(photo => photo.default === false);
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
const photoset = scene.screenshots.filter(photo => photo.default === false);
const photos = defaultPoster ? photoset : photoset.slice(1);
const poster = defaultPoster || photoset[0];
const photos = defaultPoster ? photoset : photoset.slice(1);
const poster = defaultPoster || photoset[0];
release.poster = getScreenUrl(poster, scene);
release.photos = photos.map(photo => getScreenUrl(photo, scene));
release.poster = getScreenUrl(poster, scene);
release.photos = photos.map(photo => getScreenUrl(photo, scene));
release.trailer = {
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
};
release.trailer = {
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
};
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
release.channel = scene.series.name
.replace(/[! .]/g, '')
.replace('&', 'and');
return release;
return release;
}
function scrapeLatest(scenes, site) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
size: 50,
from: (page - 1) * 50,
query: {
bool: {
must: [
{
match: {
status: 'ok',
},
},
{
range: {
releaseDate: {
lte: 'now',
},
},
},
/*
* global fetch
{
nested: {
@@ -122,66 +122,66 @@ async function fetchLatest(site, page = 1) {
},
},
*/
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
{
nested: {
path: 'series',
query: {
bool: {
must: [
{
match: {
'series.id': {
operator: 'AND',
query: site.parameters.siteId,
},
},
},
],
},
},
},
},
],
must_not: [
{
match: {
type: 'trailer',
},
},
],
},
},
sort: [
{
releaseDate: {
order: 'desc',
},
},
],
}, {
encodeJSON: true,
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeLatest(res.body.hits.hits, site);
return scrapeLatest(res.body.hits.hits, site);
}
async function fetchScene(url, site) {
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const encodedId = new URL(url).pathname.split('/')[2];
const entryId = decodeId(encodedId);
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
headers: {
Authorization: `Basic ${authKey}`,
},
});
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -10,44 +10,44 @@ const slugify = require('../utils/slugify');
const { ex } = require('../utils/q');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
return {
url,
entryId,
shootId,
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
});
return {
url,
entryId,
shootId,
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
});
}
/* no dates available, breaks database
@@ -80,63 +80,63 @@ function scrapeUpcoming(html, site) {
*/
function scrapeScene(html, url, _site) {
const { qu } = ex(html, '.playerSection');
const release = {};
const { qu } = ex(html, '.playerSection');
const release = {};
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('.vdoTags a', true);
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('.vdoTags a', true);
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
const poster = qu.img('img#player-overlay-image');
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
const poster = qu.img('img#player-overlay-image');
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
release.trailer = { src: qu.trailer() };
release.trailer = { src: qu.trailer() };
// all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
// all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered';
else release.channel = channel;
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered';
else release.channel = channel;
return release;
return release;
}
function scrapeProfile(html) {
const { q } = ex(html);
const profile = {};
const { q } = ex(html);
const profile = {};
const avatar = q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const avatar = q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = scrape(html);
profile.releases = scrape(html);
return profile;
return profile;
}
function scrapeProfileSearch(html, actorName) {
const { qu } = ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
const { qu } = ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
return actorLink ? `https://bangbros.com${actorLink}` : null;
return actorLink ? `https://bangbros.com${actorLink}` : null;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/${page}`);
const res = await bhttp.get(`${site.url}/${page}`);
return scrape(res.body.toString(), site);
return scrape(res.body.toString(), site);
}
/*
@@ -148,43 +148,43 @@ async function fetchUpcoming(site) {
*/
async function fetchScene(url, site, release) {
if (!release?.date) {
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
}
if (!release?.date) {
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
}
const { origin } = new URL(url);
const res = await bhttp.get(url);
const { origin } = new URL(url);
const res = await bhttp.get(url);
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
}
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);
const actorSlug = slugify(actorName);
const url = `https://bangbros.com/search/${actorSlug}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
if (res.statusCode === 200) {
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString());
}
}
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString());
}
}
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
// fetchUpcoming, no dates available
fetchLatest,
fetchScene,
fetchProfile,
// fetchUpcoming, no dates available
};

View File

@@ -5,33 +5,33 @@
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
async function fetchSceneWrapper(url, site, baseRelease) {
const release = await fetchScene(url, site, baseRelease);
const release = await fetchScene(url, site, baseRelease);
if (site.isFallback && release.channel) {
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
if (site.isNetwork && release.channel) {
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
return release;
}
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
return release;
}
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
}
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
}
return release;
return release;
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene: fetchSceneWrapper,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -5,90 +5,90 @@ const bhttp = require('bhttp');
const { ex } = require('../utils/q');
function scrapeProfile(html) {
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
const profile = {};
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
const profile = {};
const bio = qu.all('.infobox tr[valign="top"]')
.map(detail => qu.all(detail, 'td', true))
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
const bio = qu.all('.infobox tr[valign="top"]')
.map(detail => qu.all(detail, 'td', true))
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
/* unreliable, see: Syren De Mer
/* unreliable, see: Syren De Mer
const catlinks = qa('#mw-normal-catlinks a', true);
const isTrans = catlinks.some(link => link.match(/shemale|transgender/i));
profile.gender = isTrans ? 'transsexual' : 'female';
*/
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
profile.description = qu.q('#mw-content-text > p', true);
profile.description = qu.q('#mw-content-text > p', true);
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements) {
const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g)
if (bio.Measurements) {
const measurements = bio.Measurements
.match(/\d+(\w+)?-\d+-\d+/g)
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
.split('-');
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
// account for measuemrents being just e.g. '32EE'
if (measurements) {
const [bust, waist, hip] = measurements;
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
profile.waist = Number(waist);
profile.hip = Number(hip);
}
profile.waist = Number(waist);
profile.hip = Number(hip);
}
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
}
if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust;
}
if (bio.Bra_cup_size) {
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
if (bust) [profile.bust] = bust;
}
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
if (bio.Eye_color) profile.eyes = bio.Eye_color;
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
if (bio.Eye_color) profile.eyes = bio.Eye_color;
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
if (bio.Blood_group) profile.blood = bio.Blood_group;
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
if (bio.Blood_group) profile.blood = bio.Blood_group;
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
const avatarThumbPath = qu.img('.image img');
const avatarThumbPath = qu.img('.image img');
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
copyright: null,
};
}
profile.avatar = {
src: `http://www.boobpedia.com${avatarPath}`,
copyright: null,
};
}
profile.social = qu.urls('.infobox a.external');
profile.social = qu.urls('.infobox a.external');
return profile;
return profile;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.replace(/\s+/, '_');
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
const actorSlug = actorName.replace(/\s+/, '_');
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -11,216 +11,216 @@ const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
const hairMap = {
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
Blonde: 'blonde',
Brunette: 'brown',
'Black Hair': 'black',
Redhead: 'red',
};
function scrapeAll(html, site, upcoming) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.release-card.scene').toArray();
return sceneElements.reduce((acc, element) => {
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
return sceneElements.reduce((acc, element) => {
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
return acc;
}
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
return acc;
}
const sceneLinkElement = $(element).find('a');
const sceneLinkElement = $(element).find('a');
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = url.split('/').slice(-3, -2)[0];
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = url.split('/').slice(-3, -2)[0];
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
const likes = Number($(element).find('.label-rating .like-amount').text());
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
const likes = Number($(element).find('.label-rating .like-amount').text());
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const channel = slugify($(element).find('.collection').attr('title'), '');
const channel = slugify($(element).find('.collection').attr('title'), '');
return acc.concat({
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
dislikes,
},
channel,
site,
});
}, []);
return acc.concat({
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
dislikes,
},
channel,
site,
});
}, []);
}
async function scrapeScene(html, url, _site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = {};
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
const videoJson = $('script:contains("window.videoUiOptions")').html();
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
const videoData = JSON.parse(videoString);
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
[release.entryId] = url.split('/').slice(-3, -2);
release.title = $('.scene-title[itemprop="name"]').text();
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
release.description = $('#scene-description p[itemprop="description"]')
.contents()
.first()
.text()
.trim();
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
return {
name: $(actorElement).attr('title'),
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
};
}).toArray();
return {
name: $(actorElement).attr('title'),
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
};
}).toArray();
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
const siteElement = $('.niche-site-logo');
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
if (posterPath) release.poster = `https:${posterPath}`;
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
if (videoData) {
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
src: `https:${path}`,
quality: Number(quality.match(/\d{3,}/)[0]),
}));
}
return release;
return release;
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
return actorLink ? actorLink.href : null;
return actorLink ? actorLink.href : null;
}
async function fetchActorReleases({ qu, html }, accReleases = []) {
const releases = scrapeAll(html);
const next = qu.url('.pagination .next a');
const releases = scrapeAll(html);
const next = qu.url('.pagination .next a');
if (next) {
const url = `https://www.brazzers.com${next}`;
const res = await get(url);
if (next) {
const url = `https://www.brazzers.com${next}`;
const res = await get(url);
if (res.ok) {
return fetchActorReleases(res.item, accReleases.concat(releases));
}
}
if (res.ok) {
return fetchActorReleases(res.item, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
return accReleases.concat(releases);
}
async function scrapeProfile(html, url, actorName) {
const qProfile = ex(html);
const { q, qa } = qProfile;
const qProfile = ex(html);
const { q, qa } = qProfile;
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
profile.description = q('.model-profile-specs p', true);
profile.description = q('.model-profile-specs p', true);
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Height) profile.height = heightToCm(bio.Height);
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
const avatarEl = q('.big-pic-model-container img');
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
profile.releases = await fetchActorReleases(qProfile);
profile.releases = await fetchActorReleases(qProfile);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/page/${page}/`);
const res = await bhttp.get(`${site.url}/page/${page}/`);
return scrapeAll(res.body.toString(), site, false);
return scrapeAll(res.body.toString(), site, false);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/`);
const res = await bhttp.get(`${site.url}/`);
return scrapeAll(res.body.toString(), site, true);
return scrapeAll(res.body.toString(), site, true);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const searchUrl = 'https://brazzers.com/pornstars-search/';
const searchRes = await bhttp.get(searchUrl, {
headers: {
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
},
});
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
if (actorLink) {
const url = `https://brazzers.com${actorLink}`;
const res = await bhttp.get(url);
return scrapeProfile(res.body.toString(), url, actorName);
}
return scrapeProfile(res.body.toString(), url, actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -4,139 +4,139 @@ const { get, geta, ctxa, ed } = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
return scenes.map(({ qu }) => {
const url = qu.url('.text-thumb a');
const { pathname } = new URL(url);
const channelUrl = qu.url('.badge');
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
return null;
}
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
return null;
}
const release = {};
const release = {};
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = qu.q('.text-thumb a', true);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
release.actors = qu.all('.category a', true);
release.actors = qu.all('.category a', true);
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
release.poster = qu.img('img.video_placeholder, .video-images img');
release.teaser = { src: qu.trailer() };
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
const release = { url };
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
const { pathname } = new URL(url);
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
release.title = q('.trailer-block_title', true);
release.description = q('.info-block:nth-child(3) .text', true);
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.title = q('.trailer-block_title', true);
release.description = q('.info-block:nth-child(3) .text', true);
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
if (duration) release.duration = duration;
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
if (duration) release.duration = duration;
release.actors = qa('.info-block_data a[href*="/models"]', true);
release.tags = qa('.info-block a[href*="/categories"]', true);
release.actors = qa('.info-block_data a[href*="/models"]', true);
release.tags = qa('.info-block a[href*="/categories"]', true);
const posterEl = q('.update_thumb');
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
const posterEl = q('.update_thumb');
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
if (poster && baseRelease?.poster) release.photos = [poster];
else if (poster) release.poster = poster;
if (poster && baseRelease?.poster) release.photos = [poster];
else if (poster) release.poster = poster;
return release;
return release;
}
function scrapeProfile({ q, qa, qtx }) {
const profile = {};
const profile = {};
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
if (bio.race) profile.ethnicity = bio.race;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (!/\?/.test(bust)) profile.bust = bust;
if (!/\?/.test(waist)) profile.waist = waist;
if (!/\?/.test(hip)) profile.hip = hip;
}
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (!/\?/.test(bust)) profile.bust = bust;
if (!/\?/.test(waist)) profile.waist = waist;
if (!/\?/.test(hip)) profile.hip = hip;
}
if (bio.hair) profile.hair = bio.hair;
if (bio.eyes) profile.eyes = bio.eyes;
if (bio.hair) profile.hair = bio.hair;
if (bio.eyes) profile.eyes = bio.eyes;
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
else if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
else if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
const avatar = q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const avatar = q('.model-img img');
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
const releases = qa('.video-thumb');
profile.releases = scrapeAll(ctxa(releases));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
const url = site.parameters?.extract
? `https://cherrypimps.com/categories/movies_${page}.html`
: `${site.url}/categories/movies_${page}.html`;
const res = await geta(url, 'div.video-thumb');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site, release) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, '');
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
const res = await get(url);
if (res.ok) return scrapeProfile(res.item);
const res = await get(url);
if (res.ok) return scrapeProfile(res.item);
const res2 = await get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
const res2 = await get(url2);
return res2.ok ? scrapeProfile(res2.item) : res2.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -7,182 +7,182 @@ const slugify = require('../utils/slugify');
/* eslint-disable newline-per-chained-call */
function scrapeAll(html, site, origin) {
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
const release = {};
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
const release = {};
release.title = q('a', 'title');
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
[release.entryId] = release.url.split('/').slice(-1);
release.title = q('a', 'title');
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
[release.entryId] = release.url.split('/').slice(-1);
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
release.actors = qa('.card-subtitle a', true).filter(Boolean);
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
release.actors = qa('.card-subtitle a', true).filter(Boolean);
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
if (duration) release.duration = duration;
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
if (duration) release.duration = duration;
release.poster = q('img').dataset.src;
release.poster = q('img').dataset.src;
return release;
});
return release;
});
}
async function scrapeScene(html, url, _site) {
const { qu } = ex(html);
const release = {};
const { qu } = ex(html);
const release = {};
[release.entryId] = url.split('/').slice(-1);
[release.entryId] = url.split('/').slice(-1);
release.title = qu.meta('itemprop=name');
release.description = qu.q('.descr-box p', true);
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
release.title = qu.meta('itemprop=name');
release.description = qu.q('.descr-box p', true);
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
release.actors = qu.all('.pornstar-card > a', 'title');
release.tags = qu.all('.tags-tab .tags a', true);
release.actors = qu.all('.pornstar-card > a', 'title');
release.tags = qu.all('.tags-tab .tags a', true);
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
release.likes = Number(qu.q('.icon-like-red + span', true));
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
release.likes = Number(qu.q('.icon-like-red + span', true));
release.poster = qu.poster();
release.photos = qu.urls('.photo-slider-guest .card a');
release.poster = qu.poster();
release.photos = qu.urls('.photo-slider-guest .card a');
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
src: trailer.src,
quality: Number(trailer.attributes.res.value),
}));
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
src: trailer.src,
quality: Number(trailer.attributes.res.value),
}));
return release;
return release;
}
async function fetchActorReleases(urls) {
// DDF Network and DDF Network Stream list all scenes, exclude
const sources = urls.filter(url => !/ddfnetwork/.test(url));
// DDF Network and DDF Network Stream list all scenes, exclude
const sources = urls.filter(url => !/ddfnetwork/.test(url));
const releases = await Promise.all(sources.map(async (url) => {
const { html } = await get(url);
const releases = await Promise.all(sources.map(async (url) => {
const { html } = await get(url);
return scrapeAll(html, null, new URL(url).origin);
}));
return scrapeAll(html, null, new URL(url).origin);
}));
// DDF cross-releases scenes between sites, filter duplicates by entryId
return Object.values(releases
.flat()
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
// DDF cross-releases scenes between sites, filter duplicates by entryId
return Object.values(releases
.flat()
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
}
async function scrapeProfile(html, _url, actorName) {
const { qu } = ex(html);
const { qu } = ex(html);
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
const values = qu.all('.about-info').map((el) => {
if (el.children.length > 0) {
return Array.from(el.children, child => child.textContent.trim()).join(', ');
}
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
const values = qu.all('.about-info').map((el) => {
if (el.children.length > 0) {
return Array.from(el.children, child => child.textContent.trim()).join(', ');
}
return el.textContent.trim();
});
return el.textContent.trim();
});
const bio = keys.reduce((acc, key, index) => {
if (values[index] === '-') return acc;
const bio = keys.reduce((acc, key, index) => {
if (values[index] === '-') return acc;
return {
...acc,
[key]: values[index],
};
}, {});
return {
...acc,
[key]: values[index],
};
}, {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
profile.description = qu.q('.description-box', true);
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
profile.description = qu.q('.description-box', true);
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
const avatarEl = qu.q('.pornstar-details .card-img-top');
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
const avatarEl = qu.q('.pornstar-details .card-img-top');
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.native
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
const url = site.parameters?.native
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site) {
// DDF's main site moved to Porn World
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
const res = await bhttp.get(url);
// DDF's main site moved to Porn World
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
{
type: 'hints',
word: actorName,
},
{
decodeJSON: true,
headers: {
'x-requested-with': 'XMLHttpRequest',
},
});
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
{
type: 'hints',
word: actorName,
},
{
decodeJSON: true,
headers: {
'x-requested-with': 'XMLHttpRequest',
},
});
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
return null;
}
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
return null;
}
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
return null;
}
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
return null;
}
const [actor] = resSearch.body.list.pornstarsName;
const url = `https://ddfnetwork.com${actor.href}`;
const [actor] = resSearch.body.list.pornstarsName;
const url = `https://ddfnetwork.com${actor.href}`;
const resActor = await bhttp.get(url);
const resActor = await bhttp.get(url);
if (resActor.statusCode !== 200) {
return null;
}
if (resActor.statusCode !== 200) {
return null;
}
return scrapeProfile(resActor.body.toString(), url, actorName);
return scrapeProfile(resActor.body.toString(), url, actorName);
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -7,136 +7,136 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function getPhotos(albumUrl) {
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const { document } = new JSDOM(html).window;
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const { document } = new JSDOM(html).window;
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
return {
url: pageUrl,
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
};
});
return {
url: pageUrl,
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
};
});
return photoUrls;
return photoUrls;
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
const { document } = new JSDOM(html).window;
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
return sceneElements.reduce((acc, element) => {
const siteUrl = element.querySelector('.help-block').textContent;
return sceneElements.reduce((acc, element) => {
const siteUrl = element.querySelector('.help-block').textContent;
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return acc;
}
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return acc;
}
const sceneLinkElement = element.querySelector('.thumbnail');
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
const { pathname } = new URL(url);
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
const sceneLinkElement = element.querySelector('.thumbnail');
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
const { pathname } = new URL(url);
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
const title = element.querySelector('.scene-title').textContent;
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
const title = element.querySelector('.scene-title').textContent;
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
const poster = `https:${element.querySelector('img').src}`;
const teaser = sceneLinkElement.dataset.preview_clip_url;
const poster = `https:${element.querySelector('img').src}`;
const teaser = sceneLinkElement.dataset.preview_clip_url;
return [
...acc,
{
url,
entryId,
title,
actors,
poster,
teaser: {
src: teaser,
},
site,
},
];
}, []);
return [
...acc,
{
url,
entryId,
title,
actors,
poster,
teaser: {
src: teaser,
},
site,
},
];
}, []);
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const title = document.querySelector('.description-title').textContent;
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
const description = metaDescription
? metaDescription.content
: document.querySelector('.description')
.textContent
.replace(/[ \t\n]{2,}/g, ' ')
.replace('...read more', '')
.trim();
const title = document.querySelector('.description-title').textContent;
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
const description = metaDescription
? metaDescription.content
: document.querySelector('.description')
.textContent
.replace(/[ \t\n]{2,}/g, ' ')
.replace('...read more', '')
.trim();
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
const { origin, pathname } = new URL(url);
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
const { origin, pathname } = new URL(url);
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
const duration = moment
.duration(`00:${document
.querySelectorAll('.extra-info p')[1]
.textContent
.match(/\d+:\d+$/)[0]}`)
.asSeconds();
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
const duration = moment
.duration(`00:${document
.querySelectorAll('.extra-info p')[1]
.textContent
.match(/\d+:\d+$/)[0]}`)
.asSeconds();
const trailerElement = document.querySelector('.html5-video');
const poster = `https:${trailerElement.dataset.poster}`;
const { trailer } = trailerElement.dataset;
const trailerElement = document.querySelector('.html5-video');
const poster = `https:${trailerElement.dataset.poster}`;
const { trailer } = trailerElement.dataset;
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
return {
entryId,
url: `${origin}${pathname}`,
title,
description,
actors,
date,
duration,
poster,
photos,
trailer: {
src: trailer,
},
tags,
rating: {
stars,
},
site,
channel,
};
return {
entryId,
url: `${origin}${pathname}`,
title,
description,
actors,
date,
duration,
poster,
photos,
trailer: {
src: trailer,
},
tags,
rating: {
stars,
},
site,
channel,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'fakehub', 'modelprofile');
return fetchProfile(actorName, 'fakehub', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -1,115 +1,115 @@
'use strict';
const {
fetchLatest,
fetchApiLatest,
fetchUpcoming,
fetchApiUpcoming,
fetchScene,
fetchProfile,
fetchApiProfile,
scrapeAll,
fetchLatest,
fetchApiLatest,
fetchUpcoming,
fetchApiUpcoming,
fetchScene,
fetchProfile,
fetchApiProfile,
scrapeAll,
} = require('./gamma');
const { get } = require('../utils/q');
const slugify = require('../utils/slugify');
function extractLowArtActors(release) {
const actors = release.title
.replace(/solo/i, '')
.split(/,|\band\b/ig)
.map(actor => actor.trim());
const actors = release.title
.replace(/solo/i, '')
.split(/,|\band\b/ig)
.map(actor => actor.trim());
return {
...release,
actors,
};
return {
...release,
actors,
};
}
async function networkFetchLatest(site, page = 1) {
if (site.parameters?.api) return fetchApiLatest(site, page, false);
if (site.parameters?.api) return fetchApiLatest(site, page, false);
const releases = await fetchLatest(site, page);
const releases = await fetchLatest(site, page);
if (site.slug === 'lowartfilms') {
return releases.map(release => extractLowArtActors(release));
}
if (site.slug === 'lowartfilms') {
return releases.map(release => extractLowArtActors(release));
}
return releases;
return releases;
}
async function networkFetchScene(url, site) {
const release = await fetchScene(url, site);
const release = await fetchScene(url, site);
if (site.slug === 'lowartfilms') {
return extractLowArtActors(release);
}
if (site.slug === 'lowartfilms') {
return extractLowArtActors(release);
}
return release;
return release;
}
async function networkFetchUpcoming(site, page = 1) {
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
return fetchUpcoming(site, page);
return fetchUpcoming(site, page);
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
}
async function fetchClassicProfile(actorName, siteSlug) {
const actorSlug = slugify(actorName);
const actorSlug = slugify(actorName);
const url = `https://${siteSlug}.com/en/pornstars`;
const pornstarsRes = await get(url);
const url = `https://${siteSlug}.com/en/pornstars`;
const pornstarsRes = await get(url);
if (!pornstarsRes.ok) return null;
if (!pornstarsRes.ok) return null;
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
.find(el => slugify(el.textContent) === actorSlug)
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
.find(el => slugify(el.textContent) === actorSlug)
?.value;
if (actorPath) {
const actorUrl = `https://${siteSlug}.com${actorPath}`;
const res = await get(actorUrl);
if (actorPath) {
const actorUrl = `https://${siteSlug}.com${actorPath}`;
const res = await get(actorUrl);
if (res.ok) {
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
if (res.ok) {
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
return { releases };
}
}
return { releases };
}
}
return null;
return null;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
// not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'),
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]);
// not all Fame Digital sites offer Gamma actors
const [devils, rocco, peter, silvia] = await Promise.all([
fetchApiProfile(actorName, 'devilsfilm', true),
fetchApiProfile(actorName, 'roccosiffredi'),
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
]);
if (devils || rocco || peter) {
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
if (devils || rocco || peter) {
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
return {
...peter,
...rocco,
...devils,
releases,
};
}
return {
...peter,
...rocco,
...devils,
releases,
};
}
return null;
return null;
}
module.exports = {
fetchLatest: networkFetchLatest,
fetchProfile: networkFetchProfile,
fetchScene: networkFetchScene,
fetchUpcoming: networkFetchUpcoming,
fetchLatest: networkFetchLatest,
fetchProfile: networkFetchProfile,
fetchScene: networkFetchScene,
fetchUpcoming: networkFetchUpcoming,
};

View File

@@ -4,7 +4,7 @@ const { fetchLatest, fetchUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest,
fetchScene,
fetchUpcoming,
fetchLatest,
fetchScene,
fetchUpcoming,
};

View File

@@ -5,89 +5,89 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
const keyMatch = item.match(/\[\w+\]/);
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
const keyMatch = item.match(/\[\w+\]/);
if (keyMatch) {
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
if (keyMatch) {
const key = keyMatch[0].slice(1, -1);
const [, value] = item.split('=');
// both hip and waist link to 'waist', assume biggest value is hip
if (key === 'waist' && acc.waist) {
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
// both hip and waist link to 'waist', assume biggest value is hip
if (key === 'waist' && acc.waist) {
if (acc.waist > value) {
acc.hip = acc.waist;
acc.waist = value;
return acc;
}
return acc;
}
acc.hip = value;
acc.hip = value;
return acc;
}
return acc;
}
acc[key] = value;
}
acc[key] = value;
}
return acc;
}, {});
return acc;
}, {});
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.ethnicity = bio.ethnicity;
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
profile.eyes = bio.eyeColor;
profile.hair = bio.hairColor;
profile.ethnicity = bio.ethnicity;
profile.bust = bio.bra;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
profile.bust = bio.bra;
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
const avatar = document.querySelector('.profile-image-large img').src;
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
return profile;
return profile;
}
function scrapeSearch(html) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return document.querySelector('a.image-link')?.href || null;
return document.querySelector('a.image-link')?.href || null;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
const actorPath = scrapeSearch(searchRes.body.toString());
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
const actorPath = scrapeSearch(searchRes.body.toString());
if (actorPath) {
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
if (actorPath) {
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorName);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorName);
}
return null;
}
return null;
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function scrapeProfileFrontpage(html, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list');
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('.dashboard-bio-list');
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const profile = {
name,
gender: 'female',
};
const profile = {
name,
gender: 'female',
};
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
return {
profile,
url: bioUrl,
};
return {
profile,
url: bioUrl,
};
}
async function scrapeProfileBio(html, frontpageProfile, url, name) {
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable');
const { document } = new JSDOM(html).window;
const bioEl = document.querySelector('#biographyTable');
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
const profile = {
...frontpageProfile,
name,
gender: 'female',
};
const profile = {
...frontpageProfile,
name,
gender: 'female',
};
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthdateString = bio['Date of Birth:'];
const measurementsString = bio['Measurements:'];
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCityString = bio['Place of Birth:'];
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const birthCountryString = bio['Country of Origin:'];
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
const piercingsString = bio['Piercings:'];
const tattoosString = bio['Tattoos:'];
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
profile.ethnicity = bio['Ethnicity:'];
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
profile.ethnicity = bio['Ethnicity:'];
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
profile.hair = bio['Hair Color:'].toLowerCase();
profile.eyes = bio['Eye Color:'].toLowerCase();
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
return profile;
return profile;
}
async function fetchProfile(actorName) {
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
const slug = actorName.replace(' ', '_');
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
const resFrontpage = await bhttp.get(frontpageUrl);
const resFrontpage = await bhttp.get(frontpageUrl);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
if (resFrontpage.statusCode === 200) {
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
}
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
}
// apparently some actors are appended 'Babe' as their surname...
const fallbackSlug = `${slug}_Babe`;
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
const resFallback = await bhttp.get(fallbackUrl);
// apparently some actors are appended 'Babe' as their surname...
const fallbackSlug = `${slug}_Babe`;
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
const resFallback = await bhttp.get(fallbackUrl);
if (resFallback.statusCode === 200) {
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
const resBio = await bhttp.get(url);
if (resFallback.statusCode === 200) {
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
const resBio = await bhttp.get(url);
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
}
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
}
return null;
return null;
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -4,93 +4,93 @@ const { get, geta, ctxa } = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.url = qu.url('.title');
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.url = qu.url('.title');
release.title = qu.q('.title', true);
release.description = qu.q('.title', 'title');
release.title = qu.q('.title', true);
release.description = qu.q('.title', 'title');
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
release.duration = qu.dur('.video-data > span');
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
release.duration = qu.dur('.video-data > span');
release.actors = qu.all('.update_models a', true);
release.actors = qu.all('.update_models a', true);
const poster = qu.q('.update_thumb', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
const poster = qu.q('.update_thumb', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
return release;
});
return release;
});
}
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
const release = { url };
const release = { url };
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.title = q('.trailer_title', true);
release.description = qtx('.text p');
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
release.title = q('.trailer_title', true);
release.description = qtx('.text p');
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
release.actors = qa('.update_models a', true);
release.tags = qa('.video-info a[href*="/categories"]', true);
release.actors = qa('.update_models a', true);
release.tags = qa('.video-info a[href*="/categories"]', true);
const poster = q('#image_parent img', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
const poster = q('#image_parent img', 'src0_1x');
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
return release;
return release;
}
function scrapeProfile({ el, q, qtx }) {
const profile = {};
const profile = {};
const description = qtx('.model-bio');
if (description) profile.description = description;
const description = qtx('.model-bio');
if (description) profile.description = description;
profile.avatar = [
q('.model-image img', 'src0_2x'),
q('.model-image img', 'src0_1x'),
];
profile.avatar = [
q('.model-image img', 'src0_2x'),
q('.model-image img', 'src0_1x'),
];
profile.releases = scrapeAll(ctxa(el, '.update'));
profile.releases = scrapeAll(ctxa(el, '.update'));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.latest-updates .update');
const url = `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.latest-updates .update');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchScene(url, site) {
const res = await get(url, '.content-wrapper');
const res = await get(url, '.content-wrapper');
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName, '');
const url = scraperSlug === 'povperverts'
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
const actorSlug = slugify(actorName, '');
const url = scraperSlug === 'povperverts'
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -8,404 +8,403 @@ const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert');
async function getChannelRegExp(site) {
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
const sites = await knex('sites').where('network_id', site.network.id);
const sites = await knex('sites').where('network_id', site.network.id);
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
}
function deriveEntryId(release) {
if (release.date && release.title) {
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
if (release.date && release.title) {
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
}
return null;
return null;
}
function extractPoster(posterPath, site, baseRelease) {
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = `${site.parameters?.media || site.url}${posterPath}`;
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (posterPath && !/400.jpg/.test(posterPath)) {
const poster = `${site.parameters?.media || site.url}${posterPath}`;
const posterSources = [
poster,
// upscaled
poster.replace('-1x', '-2x'),
poster.replace('-1x', '-3x'),
];
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
if (baseRelease?.poster) {
return [posterSources, [baseRelease.poster]];
}
return [posterSources, []];
}
return [posterSources, []];
}
return [baseRelease?.poster || null, []];
return [baseRelease?.poster || null, []];
}
function getImageWithFallbacks(q, selector, site, el) {
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
const sources = el
? [
q(el, selector, 'src0_3x'),
q(el, selector, 'src0_2x'),
q(el, selector, 'src0_1x'),
]
: [
q(selector, 'src0_3x'),
q(selector, 'src0_2x'),
q(selector, 'src0_1x'),
];
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
}
function scrapeAll(scenes, site) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
release.url = qu.url('h3 a');
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.modeldata p');
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
release.duration = qu.dur('.modeldata p');
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
});
return release;
});
}
function scrapeAllT1(scenes, site, accSiteReleases) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
release.duration = qu.dur('.more-info-div');
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
release.duration = qu.dur('.more-info-div');
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
release.poster = [
poster.replace('-1x', '-3x'),
poster.replace('-1x', '-2x'),
poster,
];
}
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site
return null;
}
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
// filter out releases that were already scraped from a categorized site
return null;
}
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
function scrapeAllTour(scenes) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('h4 a', true);
release.url = qu.url('a');
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
release.title = qu.q('h4 a', true);
release.url = qu.url('a');
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
release.actors = qu.all('.tour_update_models a', true);
release.actors = qu.all('.tour_update_models a', true);
release.poster = qu.img('a img');
release.poster = qu.img('a img');
release.entryId = deriveEntryId(release);
release.entryId = deriveEntryId(release);
return release;
});
return release;
});
}
function scrapeScene({ html, qu }, site, url, baseRelease) {
const release = { url };
const release = { url };
release.title = qu.q('.centerwrap h2', true);
release.description = qu.q('.videocontent p', true);
release.title = qu.q('.centerwrap h2', true);
release.description = qu.q('.videocontent p', true);
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
release.duration = qu.dur('.videodetails .date');
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
release.duration = qu.dur('.videodetails .date');
release.actors = qu.all('.modelname a', true);
release.actors = qu.all('.modelname a', true);
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const trailerPath = html.match(/\/trailers\/.*.mp4/);
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
// release.entryId = html.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = html.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
return release;
}
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
const release = { url };
const release = { url };
release.title = qu.q('.trailer-section-head .section-title', true);
release.description = qu.text('.row .update-info-block');
release.title = qu.q('.trailer-section-head .section-title', true);
release.description = qu.text('.row .update-info-block');
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.update-info-row:nth-child(2)');
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
release.duration = qu.dur('.update-info-row:nth-child(2)');
release.actors = qu.all('.models-list-thumbs a').map(el => ({
name: qu.q(el, 'span', true),
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
}));
release.actors = qu.all('.models-list-thumbs a').map(el => ({
name: qu.q(el, 'span', true),
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
}));
release.tags = qu.all('.tags a', true);
release.tags = qu.all('.tags a', true);
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
const posterPath = qu.q('.player-thumb img', 'src0_1x');
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
const posterPath = qu.q('.player-thumb img', 'src0_1x');
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars);
if (channelRegExp) {
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channelRegExp) {
const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channel) {
release.channel = {
force: true,
slug: slugify(channel, ''),
};
}
}
if (channel) {
release.channel = {
force: true,
slug: slugify(channel, ''),
};
}
}
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
release.entryId = deriveEntryId(release);
return release;
return release;
}
function scrapeSceneTour({ html, qu }, site, url) {
const release = {};
const release = {};
if (url) release.url = url;
release.title = qu.q('.update_title, .video-title', true);
release.description = qu.q('.latest_update_description, .video-summary', true);
if (url) release.url = url;
release.title = qu.q('.update_title, .video-title', true);
release.description = qu.q('.latest_update_description, .video-summary', true);
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date;
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
if (date) release.date = date;
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
if (poster || photo) release.poster = poster || photo;
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
if (release.date) release.entryId = deriveEntryId(release);
if (release.date) release.entryId = deriveEntryId(release);
const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
const trailerCode = qu.q('.update_image a', 'onclick');
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
return release;
return release;
}
function scrapeProfile({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.texts('.stats p').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height);
if (bio.age) profile.age = Number(bio.age);
if (bio.height) profile.height = feetInchesToCm(bio.height);
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site);
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
profile.releases = scrapeAll(qReleases, site);
return profile;
return profile;
}
function scrapeProfileT1({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
const [key, value] = info.split(':');
if (!value) return acc;
if (!value) return acc;
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.age) profile.age = Number(bio.age);
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.age) profile.age = Number(bio.age);
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
if (heightMetric) profile.height = Number(heightMetric[1]);
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
if (heightMetric) profile.height = Number(heightMetric[1]);
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
const qReleases = ctxa(el, '.item-video');
profile.releases = scrapeAllT1(qReleases, site);
const qReleases = ctxa(el, '.item-video');
profile.releases = scrapeAllT1(qReleases, site);
return profile;
return profile;
}
function scrapeProfileTour({ el, qu }, site) {
const profile = {};
const profile = {};
const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':');
const bio = qu.texts('.model_bio').reduce((acc, info) => {
const [key, value] = info.split(':');
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
if (bio.birthplace) profile.birthPlace = bio.birthplace;
if (bio.fun_fact) profile.description = bio.fun_fact;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => {
const url = qRelease.qu.url('.update_image a[href]');
const release = scrapeSceneTour(qRelease, site);
const qReleases = ctxa(el, '.update_block');
profile.releases = qReleases.map((qRelease) => {
const url = qRelease.qu.url('.update_image a[href]');
const release = scrapeSceneTour(qRelease, site);
if (!/\/(signup|join)/i.test(url)) release.url = url;
release.entryId = deriveEntryId(release);
release.site = site;
if (!/\/(signup|join)/i.test(url)) release.url = url;
release.entryId = deriveEntryId(release);
release.site = site;
return release;
});
return release;
});
return profile;
return profile;
}
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|| `${site.url}/categories/movies_${page}_d.html`;
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
return scrapeAll(res.items, site, accSiteReleases);
return scrapeAll(res.items, site, accSiteReleases);
}
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const res = await get(url);
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const res = await get(url);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease);
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const t1 = site.parameters?.t1 ? 't1/' : '';
const t1 = site.parameters?.t1 ? 't1/' : '';
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
const res1 = site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugA))
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
const res = (res1.ok && res1)
|| (site.parameters?.profile
? await get(util.format(site.parameters.profile, actorSlugB))
: await get(`${site.url}/${t1}models/${actorSlugB}.html`));
const res = (res1.ok && res1)
|| (site.parameters?.profile && await get(util.format(site.parameters.profile, actorSlugB)))
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
return scrapeProfile(res.item, site);
return scrapeProfile(res.item, site);
}
module.exports = {
beforeFetchLatest: getChannelRegExp,
fetchLatest,
fetchScene,
fetchProfile,
beforeFetchLatest: getChannelRegExp,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,9 +3,9 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'iconmale');
return fetchProfile(actorName, 'iconmale');
}
module.exports = {
fetchProfile: networkFetchProfile,
fetchProfile: networkFetchProfile,
};

View File

@@ -4,104 +4,104 @@ const bhttp = require('bhttp');
const { get, exa, ed } = require('../utils/q');
function scrapeLatest(html, site) {
const scenes = site.slug === 'paintoy'
? exa(html, '#articleTable table[cellspacing="2"]')
: exa(html, 'body > table');
const scenes = site.slug === 'paintoy'
? exa(html, '#articleTable table[cellspacing="2"]')
: exa(html, 'body > table');
return scenes.map(({ qu }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
return scenes.map(({ qu }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
if (date) {
release.title = title.trim();
release.date = date;
} else {
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
if (date) {
release.title = title.trim();
release.date = date;
} else {
// title should contain date instead, not applicable in brief mode
release.title = title.slice(title.indexOf(':') + 1).trim();
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
}
release.actors = actors.map(actor => actor.trim());
release.actors = actors.map(actor => actor.trim());
const description = qu.q('.articleCopyText', true);
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const description = qu.q('.articleCopyText', true);
if (description) release.description = description.slice(0, description.lastIndexOf('('));
const duration = qu.dur('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
const duration = qu.dur('.articleCopyText a:nth-child(2)');
if (duration) release.duration = duration;
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
const cover = qu.img('a img');
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
const cover = qu.img('a img');
release.covers = [[
cover.replace('_thumbnail', ''),
cover,
]];
return release;
});
return release;
});
}
function scrapeScene({ qu }, site) {
const release = {};
const release = {};
const titleEl = qu.q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const titleEl = qu.q('.articleTitleText');
const [title, ...actors] = titleEl.textContent.split('|');
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
const url = qu.url(titleEl, 'a');
[release.entryId] = url.split('/').slice(-2);
release.url = `${site.url}${url}`;
release.title = title.trim();
release.description = qu.q('.articleCopyText', true);
release.title = title.trim();
release.description = qu.q('.articleCopyText', true);
release.actors = actors.map(actor => actor.trim());
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
release.actors = actors.map(actor => actor.trim());
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
const [cover, ...photos] = qu.imgs('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
const [cover, ...photos] = qu.imgs('img[src*="images"]');
release.covers = [cover];
release.photos = photos;
release.poster = qu.poster();
release.poster = qu.poster();
const trailer = qu.trailer();
if (trailer) release.trailer = { src: trailer };
const trailer = qu.trailer();
if (trailer) release.trailer = { src: trailer };
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await bhttp.get(url, {
type: 'brief',
page,
});
const res = await bhttp.get(url, {
type: 'brief',
page,
});
if (res.statusCode === 200) {
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
}
if (res.statusCode === 200) {
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
}
return null;
return null;
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, site) : res.status;
return res.ok ? scrapeScene(res.item, site) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -9,116 +9,116 @@ const slugify = require('../utils/slugify');
const { fetchApiLatest, fetchScene } = require('./gamma');
async function fetchToken(site) {
const res = await bhttp.get(site.url);
const html = res.body.toString();
const res = await bhttp.get(site.url);
const html = res.body.toString();
const time = html.match(/"aet":\d+/)[0].split(':')[1];
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
const token = ah.split('').reverse().join('');
const time = html.match(/"aet":\d+/)[0].split(':')[1];
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
const token = ah.split('').reverse().join('');
return { time, token };
return { time, token };
}
async function fetchActors(entryId, site, { token, time }) {
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
const res = await bhttp.get(url);
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
const res = await bhttp.get(url);
if (res.statusCode === 200 && res.body.status === true) {
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
}
if (res.statusCode === 200 && res.body.status === true) {
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
}
return [];
return [];
}
async function fetchTrailerLocation(entryId, site) {
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
try {
const res = await bhttp.get(url, {
followRedirects: false,
});
try {
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 302) {
return res.headers.location;
}
} catch (error) {
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
}
if (res.statusCode === 302) {
return res.headers.location;
}
} catch (error) {
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
}
return null;
return null;
}
async function scrapeScene(scene, site, tokens) {
const release = {
entryId: scene.id,
title: scene.title,
duration: scene.length,
site,
meta: {
tokens, // attach tokens to reduce number of requests required for deep fetching
},
};
const release = {
entryId: scene.id,
title: scene.title,
duration: scene.length,
site,
meta: {
tokens, // attach tokens to reduce number of requests required for deep fetching
},
};
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
release.date = new Date(scene.sites.collection[scene.id].publishDate);
release.poster = scene._resources.primary[0].url;
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
release.date = new Date(scene.sites.collection[scene.id].publishDate);
release.poster = scene._resources.primary[0].url;
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
const [actors, trailer] = await Promise.all([
fetchActors(release.entryId, site, tokens),
fetchTrailerLocation(release.entryId, site),
]);
const [actors, trailer] = await Promise.all([
fetchActors(release.entryId, site, tokens),
fetchTrailerLocation(release.entryId, site),
]);
release.actors = actors;
if (trailer) release.trailer = { src: trailer, quality: 1080 };
release.actors = actors;
if (trailer) release.trailer = { src: trailer, quality: 1080 };
return release;
return release;
}
function scrapeLatest(scenes, site, tokens) {
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
}
async function fetchLatest(site, page = 1) {
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
const { time, token } = await fetchToken(site);
const { time, token } = await fetchToken(site);
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const res = await bhttp.get(url);
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const res = await bhttp.get(url);
if (res.statusCode === 200 && res.body.status) {
return scrapeLatest(res.body.response.collection, site, { time, token });
}
if (res.statusCode === 200 && res.body.status) {
return scrapeLatest(res.body.response.collection, site, { time, token });
}
return null;
return null;
}
async function fetchNetworkScene(url, site, release) {
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
const { pathname } = new URL(url);
const entryId = pathname.split('/')[2];
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
const { pathname } = new URL(url);
const entryId = pathname.split('/')[2];
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
const res = await bhttp.get(apiUrl);
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
const res = await bhttp.get(apiUrl);
if (res.statusCode === 200 && res.body.status) {
return scrapeScene(res.body.response.collection[0], site, { time, token });
}
if (res.statusCode === 200 && res.body.status) {
return scrapeScene(res.body.response.collection[0], site, { time, token });
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene: fetchNetworkScene,
fetchLatest,
fetchScene: fetchNetworkScene,
};

View File

@@ -3,83 +3,83 @@
const { get, initAll } = require('../utils/qu');
function scrapeLatest(scenes, dates, site) {
return scenes.map(({ qu }, index) => {
const release = {};
return scenes.map(({ qu }, index) => {
const release = {};
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
const path = qu.url('a');
release.url = `${site.url}/visitors/${path}`;
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
if (dates && dates[index]) {
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
}
release.description = qu.q('tbody tr:nth-child(3) font', true);
release.description = qu.q('tbody tr:nth-child(3) font', true);
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const infoLine = qu.q('font[color="#663366"]', true);
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
const poster = qu.img('img[src*="photos/"][width="400"]');
release.poster = `${site.url}/visitors/${poster}`;
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
return release;
});
return release;
});
}
function scrapeScene({ qu }, url, site) {
const release = { url };
const release = { url };
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const { pathname } = new URL(url);
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const actor = qu.q('font[color="#990033"] strong', true);
release.actors = [actor];
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
release.trailer = [
{
src: `${site.url}/visitors/videos/${hdTrailer}`,
quality: 1080,
},
{
src: `${site.url}/visitors/videos/${sdTrailer}`,
quality: 270,
},
];
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
const res = await get(url);
if (!res.ok) {
return res.status;
}
if (!res.ok) {
return res.status;
}
const { el } = res.item;
const { el } = res.item;
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
const scenes = initAll(el, 'table[width="880"]');
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
return scrapeLatest(scenes, dates, site);
return scrapeLatest(scenes, dates, site);
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
if (res.ok) {
return scrapeScene(res.item, url, site);
}
if (res.ok) {
return scrapeScene(res.item, url, site);
}
return res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -13,406 +13,406 @@ const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
async function fetchPhotos(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return res.body.toString();
return res.body.toString();
}
function scrapePhotos(html, type) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.toArray()
.map((photoElement) => {
const src = $(photoElement).attr('src');
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
.toArray()
.map((photoElement) => {
const src = $(photoElement).attr('src');
// high res often available in alternative directories, but not always, provide original as fallback
if (type === 'caps') {
return [
src.replace('capthumbs/', 'caps/'),
src,
];
}
// high res often available in alternative directories, but not always, provide original as fallback
if (type === 'caps') {
return [
src.replace('capthumbs/', 'caps/'),
src,
];
}
return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1600watermarked/'),
src.replace('thumbs/', '1280watermarked/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
});
return [
src.replace('thumbs/', 'photos/'),
src.replace('thumbs/', '1600watermarked/'),
src.replace('thumbs/', '1280watermarked/'),
src.replace('thumbs/', '1024watermarked/'),
src,
];
});
return photos;
return photos;
}
async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
// don't add first URL to pages to prevent unnecessary duplicate request
const photos = scrapePhotos(html, type);
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
// don't add first URL to pages to prevent unnecessary duplicate request
const photos = scrapePhotos(html, type);
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
const otherPhotos = pages
? await Promise.map(pages, async (pageX) => {
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
const pageHtml = await fetchPhotos(pageUrl);
const otherPhotos = pages
? await Promise.map(pages, async (pageX) => {
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml, type);
}, {
concurrency: 2,
})
: [];
return scrapePhotos(pageHtml, type);
}, {
concurrency: 2,
})
: [];
const allPhotos = photos.concat(otherPhotos.flat());
const allPhotos = photos.concat(otherPhotos.flat());
if (allPhotos.length === 0 && type === 'highres') {
// photos not available, try for screencaps instead
return getPhotosLegacy(entryId, site, 'caps', 1);
}
if (allPhotos.length === 0 && type === 'highres') {
// photos not available, try for screencaps instead
return getPhotosLegacy(entryId, site, 'caps', 1);
}
return allPhotos;
return allPhotos;
}
async function getPhotos(entryId, site, type = 'highres', page = 1) {
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const res = await bhttp.get(albumUrl);
const html = res.body.toString();
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
const sources = sourceLines.reduce((acc, sourceLine) => {
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
const sources = sourceLines.reduce((acc, sourceLine) => {
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
if (!sourceStart) return acc;
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
if (!sourceStart) return acc;
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
if (!source) return acc;
if (!acc[quality]) acc[quality] = [];
if (!source) return acc;
if (!acc[quality]) acc[quality] = [];
acc[quality].push(`${site.url}${source}`);
acc[quality].push(`${site.url}${source}`);
return acc;
}, {});
return acc;
}, {});
if (type === 'highres') {
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
if (type === 'highres') {
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
// no photos available, try for screencaps instead
return getPhotos(entryId, site, 'caps', 1);
}
// no photos available, try for screencaps instead
return getPhotos(entryId, site, 'caps', 1);
}
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
// no screencaps available either, try legacy scraper just in case
return getPhotosLegacy(entryId, site, 'highres', 1);
// no screencaps available either, try legacy scraper just in case
return getPhotosLegacy(entryId, site, 'highres', 1);
}
function getEntryId(html) {
const entryId = html.match(/showtagform\((\d+)\)/);
const entryId = html.match(/showtagform\((\d+)\)/);
if (entryId) {
return entryId[1];
}
if (entryId) {
return entryId[1];
}
const setIdIndex = html.indexOf('setid:"');
const setIdIndex = html.indexOf('setid:"');
if (setIdIndex) {
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
}
if (setIdIndex) {
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
}
return null;
return null;
}
function scrapeAll(scenes, site) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.actors = qu.all('.update_models a', true);
release.actors = qu.all('.update_models a', true);
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
[release.poster, ...release.photos] = dvdPhotos.length
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
}).filter(Boolean);
const teaserScript = qu.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
}
const teaserScript = qu.html('script');
if (teaserScript) {
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
if (src) release.teaser = { src };
}
return release;
});
return release;
});
}
function scrapeUpcoming(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => {
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
return scenesElements.map((element) => {
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
.remove();
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
.remove();
const title = details
.end()
.text()
.trim();
const title = details
.end()
.text()
.trim();
const actors = details
.text()
.trim()
.split(', ');
const actors = details
.text()
.trim()
.split(', ');
const date = moment
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
.toDate();
const date = moment
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
.toDate();
const photoElement = $(element).find('a img.thumbs');
const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const photoElement = $(element).find('a img.thumbs');
const posterPath = photoElement.attr('src');
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
return {
url: null,
entryId,
title,
date,
actors,
poster,
teaser: {
src: teaser,
},
rating: null,
site,
};
});
return {
url: null,
entryId,
title,
date,
actors,
poster,
teaser: {
src: teaser,
},
rating: null,
site,
};
});
}
async function scrapeScene({ html, qu }, url, site, include) {
const release = { url, site };
const release = { url, site };
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.entryId = getEntryId(html);
release.title = qu.q('.title_bar_hilite', true);
release.description = qu.q('.update_description', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
release.tags = qu.all('.update_tags a', true);
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
if (posterPath) {
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
if (poster) {
release.poster = {
src: poster,
referer: site.url,
};
}
}
if (poster) {
release.poster = {
src: poster,
referer: site.url,
};
}
}
if (include.trailer && site.slug !== 'manuelferrara') {
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
if (include.trailer && site.slug !== 'manuelferrara') {
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
if (trailerLines.length) {
release.trailer = trailerLines.map((trailerLine) => {
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
if (trailerLines.length) {
release.trailer = trailerLines.map((trailerLine) => {
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
return src && {
src: /^http/.test(src) ? src : `${site.url}${src}`,
quality: quality && Number(quality.replace('558', '540')),
};
}).filter(Boolean);
}
}
return src && {
src: /^http/.test(src) ? src : `${site.url}${src}`,
quality: quality && Number(quality.replace('558', '540')),
};
}).filter(Boolean);
}
}
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (include.photos) release.photos = await getPhotos(release.entryId, site);
if (qu.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
};
}
if (qu.exists('.update_dvds a')) {
release.movie = {
url: qu.url('.update_dvds a'),
title: qu.q('.update_dvds a', true),
};
}
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
if (stars) release.stars = stars;
return release;
return release;
}
function scrapeMovie({ el, qu }, url, site) {
const movie = { url, site };
const movie = { url, site };
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
movie.title = qu.q('.title_bar span', true);
movie.covers = qu.urls('#dvd-cover-flip > a');
movie.channel = slugify(qu.q('.update_date a', true), '');
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
movie.title = qu.q('.title_bar span', true);
movie.covers = qu.urls('#dvd-cover-flip > a');
movie.channel = slugify(qu.q('.update_date a', true), '');
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
const sceneQus = ctxa(el, '.dvd_details');
const scenes = scrapeAll(sceneQus, site);
const curatedScenes = scenes
const curatedScenes = scenes
?.map(scene => ({ ...scene, movie }))
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
movie.date = curatedScenes?.[0].date;
movie.date = curatedScenes?.[0].date;
return {
...movie,
...(curatedScenes && { scenes: curatedScenes }),
};
return {
...movie,
...(curatedScenes && { scenes: curatedScenes }),
};
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img');
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img');
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*(\d{2})/);
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*(\d{2})/);
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (heightString) profile.height = heightToCm(heightString[0]);
if (heightString) profile.height = heightToCm(heightString[0]);
if (measurementsString) {
const [bust, waist, hip] = measurementsString[0].split('-');
if (measurementsString) {
const [bust, waist, hip] = measurementsString[0].split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
].filter(Boolean);
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
].filter(Boolean);
if (avatarSources.length) profile.avatar = avatarSources;
}
if (avatarSources.length) profile.avatar = avatarSources;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
console.log(profile);
console.log(profile);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming === false) return null;
if (site.parameters?.upcoming === false) return null;
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
const res = await bhttp.get(url);
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeUpcoming(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeUpcoming(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site, baseRelease, preflight, include) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
}
async function fetchMovie(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile(actorName) {
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
const resA = await bhttp.get(urlA);
const resA = await bhttp.get(urlA);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
return profile;
}
return profile;
}
const resB = await bhttp.get(urlB);
const resB = await bhttp.get(urlB);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
return profile;
}
return profile;
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchMovie,
fetchProfile,
fetchUpcoming,
fetchScene,
fetchLatest,
fetchMovie,
fetchProfile,
fetchUpcoming,
fetchScene,
};

View File

@@ -7,184 +7,184 @@ const moment = require('moment');
const { feetInchesToCm } = require('../utils/convert');
const siteMapByKey = {
PF: 'pornfidelity',
TF: 'teenfidelity',
KM: 'kellymadison',
PF: 'pornfidelity',
TF: 'teenfidelity',
KM: 'kellymadison',
};
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
function extractTextNode(parentEl) {
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return Array.from(document.querySelectorAll('.episode'), (scene) => {
const release = { site };
return Array.from(document.querySelectorAll('.episode'), (scene) => {
const release = { site };
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
const siteId = release.shootId.match(/\w{2}/)[0];
const siteSlug = siteMapByKey[siteId];
const siteId = release.shootId.match(/\w{2}/)[0];
const siteSlug = siteMapByKey[siteId];
if (site.slug !== siteSlug) {
// using generic network overview, scene is not from the site we want
return null;
}
if (site.slug !== siteSlug) {
// using generic network overview, scene is not from the site we want
return null;
}
const durationEl = scene.querySelector('.content a');
const durationEl = scene.querySelector('.content a');
[release.entryId] = durationEl.href.match(/\d+$/);
release.url = `${site.url}/episodes/${release.entryId}`;
[release.entryId] = durationEl.href.match(/\d+$/);
release.url = `${site.url}/episodes/${release.entryId}`;
release.title = scene.querySelector('h5 a').textContent.trim();
release.title = scene.querySelector('h5 a').textContent.trim();
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
const dateString = extractTextNode(dateEl);
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
const dateString = extractTextNode(dateEl);
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
const durationString = durationEl.textContent.match(/\d+ min/);
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
const durationString = durationEl.textContent.match(/\d+ min/);
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
release.poster = scene.querySelector('.card-img-top').dataset.src;
release.teaser = {
src: scene.querySelector('video').src,
};
release.poster = scene.querySelector('.card-img-top').dataset.src;
release.teaser = {
src: scene.querySelector('video').src,
};
return release;
}).filter(scene => scene);
return release;
}).filter(scene => scene);
}
function scrapeScene(html, url, site, baseRelease) {
const { document } = new JSDOM(html).window;
const release = { url, site };
const { document } = new JSDOM(html).window;
const release = { url, site };
const titleEl = document.querySelector('.card-header.row h4').childNodes;
const titleString = extractTextNode(titleEl);
const titleEl = document.querySelector('.card-header.row h4').childNodes;
const titleString = extractTextNode(titleEl);
if (!baseRelease) [release.entryId] = url.match(/\d+/);
if (!baseRelease) [release.entryId] = url.match(/\d+/);
release.title = titleString
.replace('Trailer: ', '')
.replace(/- \w+ #\d+$/, '')
.trim();
release.title = titleString
.replace('Trailer: ', '')
.replace(/- \w+ #\d+$/, '')
.trim();
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
const episode = titleString.match(/#\d+$/)[0];
const siteKey = siteMapBySlug[release.channel];
const episode = titleString.match(/#\d+$/)[0];
const siteKey = siteMapBySlug[release.channel];
release.shootId = `${siteKey} ${episode}`;
release.description = document.querySelector('p.card-text').textContent.trim();
release.shootId = `${siteKey} ${episode}`;
release.description = document.querySelector('p.card-text').textContent.trim();
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
const dateString = extractTextNode(dateEl);
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
const dateString = extractTextNode(dateEl);
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
const durationString = durationRaw.match(/\d+:\d+/)[0];
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
const durationString = durationRaw.match(/\d+:\d+/)[0];
release.duration = moment.duration(`00:${durationString}`).asSeconds();
release.duration = moment.duration(`00:${durationString}`).asSeconds();
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
const resolution = Number(res.match(/\d+/)[0]);
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
const resolution = Number(res.match(/\d+/)[0]);
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
});
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
});
release.trailer = trailers.map((trailer, index) => ({
src: trailer,
quality: resolutions[index],
}));
release.trailer = trailers.map((trailer, index) => ({
src: trailer,
quality: resolutions[index],
}));
const posterPrefix = html.indexOf('poster:');
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
const posterPrefix = html.indexOf('poster:');
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
if (baseRelease?.poster) release.photos = [poster];
else release.poster = poster;
if (baseRelease?.poster) release.photos = [poster];
else release.poster = poster;
return release;
return release;
}
function scrapeProfile(html, actorName) {
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const { document } = new JSDOM(html).window;
const profile = { name: actorName };
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
if (bio.Height) {
const [feet, inches] = bio.Height.match(/\d+/g);
profile.height = feetInchesToCm(feet, inches);
}
if (bio.Height) {
const [feet, inches] = bio.Height.match(/\d+/g);
profile.height = feetInchesToCm(feet, inches);
}
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
if (avatarEl) profile.avatar = avatarEl.src;
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await bhttp.get(url, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
const res = await bhttp.get(url, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.statusCode === 200 && res.body.status === 'success') {
return scrapeLatest(res.body.html, site);
}
if (res.statusCode === 200 && res.body.status === 'success') {
return scrapeLatest(res.body.html, site);
}
return null;
return null;
}
async function fetchScene(url, site, baseRelease) {
const { pathname } = new URL(url);
const { pathname } = new URL(url);
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
return scrapeScene(res.body.toString(), url, site, baseRelease);
return scrapeScene(res.body.toString(), url, site, baseRelease);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
});
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -5,116 +5,116 @@ const cheerio = require('cheerio');
const moment = require('moment');
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.shoot-list .shoot').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.shoot-list .shoot').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const shootId = href.split('/')[2];
const title = sceneLinkElement.text().trim();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
const href = sceneLinkElement.attr('href');
const url = `https://kink.com${href}`;
const shootId = href.split('/')[2];
const title = sceneLinkElement.text().trim();
const poster = $(element).find('.adimage').attr('src');
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
const poster = $(element).find('.adimage').attr('src');
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
const timestamp = $(element).find('.video span').text();
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
const timestamp = $(element).find('.video span').text();
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
return {
url,
shootId,
entryId: shootId,
title,
actors,
date,
photos,
poster,
rating: {
stars,
},
duration,
site,
};
});
return {
url,
shootId,
entryId: shootId,
title,
actors,
date,
photos,
poster,
rating: {
stars,
},
duration,
site,
};
});
}
async function scrapeScene(html, url, shootId, ratingRes, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
const actorsRaw = $('.shoot-info p.starring');
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
const actorsRaw = $('.shoot-info p.starring');
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
const trailerPoster = $('.player video#kink-player').attr('poster');
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
const trailerPoster = $('.player video#kink-player').attr('poster');
const date = moment.utc($(actorsRaw)
.prev()
.text()
.trim()
.replace('Date: ', ''),
'MMMM DD, YYYY')
.toDate();
const date = moment.utc($(actorsRaw)
.prev()
.text()
.trim()
.replace('Date: ', ''),
'MMMM DD, YYYY')
.toDate();
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.shoot-info .description').text().trim();
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('.shoot-info .description').text().trim();
const { average: stars } = ratingRes.body;
const { average: stars } = ratingRes.body;
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const channel = siteSlug;
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const channel = siteSlug;
return {
url,
shootId,
entryId: shootId,
title,
date,
actors,
description,
photos,
poster: trailerPoster,
trailer: {
src: trailerVideo,
quality: 480,
},
rating: {
stars,
},
tags,
site,
channel,
};
return {
url,
shootId,
entryId: shootId,
title,
date,
actors,
description,
photos,
poster: trailerPoster,
trailer: {
src: trailerVideo,
quality: 480,
},
rating: {
stars,
},
tags,
site,
channel,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const shootId = new URL(url).pathname.split('/')[2];
const shootId = new URL(url).pathname.split('/')[2];
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
const [res, ratingRes] = await Promise.all([
bhttp.get(url),
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
]);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -8,201 +8,201 @@ const moment = require('moment');
const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
return { shootId, title };
return { shootId, title };
}
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
const posterStyle = posterElement.attr('style');
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
if (!posterTimeRange) {
return null;
}
if (!posterTimeRange) {
return null;
}
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.thumbnail-title a');
const url = sceneLinkElement.attr('href');
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
const poster = getPoster(posterElement, sceneId);
return {
url,
shootId,
entryId,
title,
date,
poster,
site,
};
});
return {
url,
shootId,
entryId,
title,
date,
poster,
site,
};
});
}
async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new WatchPage")').html();
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
const data = playerData && JSON.parse(playerData);
const release = { url };
const release = { url };
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
release.description = $('meta[name="description"]')?.attr('content')?.trim()
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
return `${origin}${pathname}`;
/* disable thumbnail as fallback, usually enough high res photos available
/* disable thumbnail as fallback, usually enough high res photos available
return [
`${origin}${pathname}`,
source,
];
*/
});
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
if (data) {
const qualityMap = {
web: 240,
vga: 480,
hd: 720,
'1080p': 1080,
};
release.trailer = data.clip.qualities.map(trailer => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
release.trailer = data.clip.qualities.map(trailer => ({
src: trailer.src,
type: trailer.type,
quality: qualityMap[trailer.quality] || trailer.quality,
}));
}
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '');
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = slugify(studioName, '');
return release;
return release;
}
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
const bio = entries
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
const bio = entries
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
profile.birthPlace = bio.Nationality;
profile.birthPlace = bio.Nationality;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
if (bio.Age) profile.age = bio.Age;
if (avatarEl) profile.avatar = avatarEl.src;
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const useGallery = true;
const useGallery = true;
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
// TODO: fall back on screenshots when gallery is not available
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
return scrapeScene(res.body.toString(), url, site, useGallery);
return scrapeScene(res.body.toString(), url, site, useGallery);
}
async function fetchProfile(actorName) {
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
const data = res.body;
const result = data.terms.find(item => item.type === 'model');
const result = data.terms.find(item => item.type === 'model');
if (result) {
const bioRes = await bhttp.get(result.url);
const html = bioRes.body.toString();
if (result) {
const bioRes = await bhttp.get(result.url);
const html = bioRes.body.toString();
return scrapeProfile(html, result.url, actorName);
}
return scrapeProfile(html, result.url, actorName);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchScene,
fetchLatest,
fetchProfile,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'men', 'modelprofile');
return fetchProfile(actorName, 'men', 'modelprofile');
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'devianthardcore');
return fetchProfile(actorName, 'devianthardcore');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -8,232 +8,232 @@ const moment = require('moment');
const { get } = require('../utils/http');
const descriptionTags = {
'anal cream pie': 'anal creampie',
'ass to mouth': 'ass to mouth',
'cream pie in her ass': 'anal creampie',
'eats ass': 'ass eating',
facial: 'facial',
gaped: 'gaping',
gapes: 'gaping',
gape: 'gaping',
'rectal cream pie': 'anal creampie',
rimming: 'ass eating',
'anal cream pie': 'anal creampie',
'ass to mouth': 'ass to mouth',
'cream pie in her ass': 'anal creampie',
'eats ass': 'ass eating',
facial: 'facial',
gaped: 'gaping',
gapes: 'gaping',
gape: 'gaping',
'rectal cream pie': 'anal creampie',
rimming: 'ass eating',
};
function deriveTagsFromDescription(description) {
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
return matches
? matches.map(match => descriptionTags[match])
: [];
return matches
? matches.map(match => descriptionTags[match])
: [];
}
async function scrapeLatestA(html, site) {
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const titleElement = element.querySelector('h3.title a');
const title = titleElement.textContent;
const url = titleElement.href;
const entryId = url.split('/').slice(-2)[0];
const titleElement = element.querySelector('h3.title a');
const title = titleElement.textContent;
const url = titleElement.href;
const entryId = url.split('/').slice(-2)[0];
const descriptionElement = element.querySelector('.desc');
const description = descriptionElement && descriptionElement.textContent.trim();
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
const descriptionElement = element.querySelector('.desc');
const description = descriptionElement && descriptionElement.textContent.trim();
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const ratingElement = element.querySelector('.rating');
const stars = ratingElement && ratingElement.dataset.rating;
const ratingElement = element.querySelector('.rating');
const stars = ratingElement && ratingElement.dataset.rating;
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = deriveTagsFromDescription(description);
const photos = [...primaryPhotos, ...secondaryPhotos];
const tags = deriveTagsFromDescription(description);
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
photos,
rating: {
stars,
},
site,
};
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
photos,
rating: {
stars,
},
site,
};
return scene;
}));
return scene;
}));
}
async function scrapeLatestB(html) {
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-border');
const { document } = new JSDOM(html).window;
const sceneElements = document.querySelectorAll('.content-border');
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const release = {
director: 'Mike Adriano',
};
return Promise.all(Array.from(sceneElements, async (element) => {
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
const release = {
director: 'Mike Adriano',
};
const titleElement = element.querySelector('.content-title-wrap a');
release.title = titleElement.title || titleElement.textContent.trim();
release.url = titleElement.href;
release.entryId = release.url.split('/').slice(-2)[0];
const titleElement = element.querySelector('.content-title-wrap a');
release.title = titleElement.title || titleElement.textContent.trim();
release.url = titleElement.href;
release.entryId = release.url.split('/').slice(-2)[0];
release.description = element.querySelector('.content-description').textContent.trim();
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
release.description = element.querySelector('.content-description').textContent.trim();
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|| moment(element.querySelector('.date').textContent, 'Do MMM YYYY')).toDate();
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-mouseover')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
const secondaryPhotos = $('.thumb-mouseover')
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
.toArray()
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
release.poster = poster;
release.photos = [...primaryPhotos, ...secondaryPhotos];
release.poster = poster;
release.photos = [...primaryPhotos, ...secondaryPhotos];
release.tags = deriveTagsFromDescription(release.description);
return release;
}));
release.tags = deriveTagsFromDescription(release.description);
return release;
}));
}
async function scrapeSceneA(html, url) {
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const release = {
url,
director: 'Mike Adriano',
};
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const release = {
url,
director: 'Mike Adriano',
};
release.entryId = url.split('/').slice(-2)[0];
release.title = element.querySelector('.title').textContent.trim();
release.description = element.querySelector('.desc').textContent.trim();
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
release.entryId = url.split('/').slice(-2)[0];
release.title = element.querySelector('.title').textContent.trim();
release.description = element.querySelector('.desc').textContent.trim();
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is sometimes 00:00, sometimes 0:00:00
release.duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const { poster } = document.querySelector('.content-page-header video');
const { src, type } = document.querySelector('.content-page-header source');
const { poster } = document.querySelector('.content-page-header video');
const { src, type } = document.querySelector('.content-page-header source');
release.poster = poster;
release.trailer = { src, type };
release.poster = poster;
release.trailer = { src, type };
release.tags = deriveTagsFromDescription(release.description);
release.tags = deriveTagsFromDescription(release.description);
return release;
return release;
}
async function scrapeSceneB(html, url, site) {
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const { document } = new JSDOM(html).window;
const element = document.querySelector('.content-page-info');
const entryId = url.split('/').slice(-2)[0];
const title = element.querySelector('.title').textContent.trim();
const description = element.querySelector('.desc').textContent.trim();
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
const entryId = url.split('/').slice(-2)[0];
const title = element.querySelector('.title').textContent.trim();
const description = element.querySelector('.desc').textContent.trim();
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const durationString = element.querySelector('.total-time').textContent.trim();
// timestamp is somethines 00:00, sometimes 0:00:00
const duration = durationString.split(':').length === 3
? moment.duration(durationString).asSeconds()
: moment.duration(`00:${durationString}`).asSeconds();
const { poster } = document.querySelector('.content-page-header-inner video');
const { src, type } = document.querySelector('.content-page-header-inner source');
const { poster } = document.querySelector('.content-page-header-inner video');
const { src, type } = document.querySelector('.content-page-header-inner source');
const tags = deriveTagsFromDescription(description);
const tags = deriveTagsFromDescription(description);
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
trailer: {
src,
type,
},
site,
};
const scene = {
url,
entryId,
title,
description,
actors,
director: 'Mike Adriano',
date,
duration,
tags,
poster,
trailer: {
src,
type,
},
site,
};
return scene;
return scene;
}
async function fetchLatest(site, page = 1) {
const { host } = new URL(site.url);
const url = `https://tour.${host}/videos?page=${page}`;
const { host } = new URL(site.url);
const url = `https://tour.${host}/videos?page=${page}`;
const res = await get(url);
const res = await get(url);
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeLatestA(res.html, site);
}
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeLatestA(res.html, site);
}
return scrapeLatestB(res.html, site);
}
return scrapeLatestB(res.html, site);
}
return res.code;
return res.code;
}
async function fetchScene(url, site) {
const { host } = new URL(site.url);
const res = await get(url);
const { host } = new URL(site.url);
const res = await get(url);
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeSceneA(res.body.toString(), url, site);
}
if (res.code === 200) {
if (host === 'trueanal.com' || host === 'swallowed.com') {
return scrapeSceneA(res.body.toString(), url, site);
}
return scrapeSceneB(res.body.toString(), url, site);
}
return scrapeSceneB(res.body.toString(), url, site);
}
return res.code;
return res.code;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'milehighmedia');
return fetchProfile(actorName, 'milehighmedia');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -13,257 +13,257 @@ const { inchesToCm, lbsToKg } = require('../utils/convert');
const { cookieToData } = require('../utils/cookies');
function getThumbs(scene) {
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.poster) {
return scene.images.poster.map(image => image.xl.url);
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
if (scene.images.card_main_rect) {
return scene.images.card_main_rect
.concat(scene.images.card_secondary_rect || [])
.map(image => image.xl.url.replace('.thumb', ''));
}
return [];
return [];
}
function scrapeLatestX(data, site) {
if (site.parameters?.extract === true && data.collections.length > 0) {
// release should not belong to any channel
return null;
}
if (site.parameters?.extract === true && data.collections.length > 0) {
// release should not belong to any channel
return null;
}
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
// release should belong to specific channel
return null;
}
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
// release should belong to specific channel
return null;
}
const release = {
entryId: data.id,
title: data.title,
description: data.description,
};
const release = {
entryId: data.id,
title: data.title,
description: data.description,
};
const hostname = site.parameters?.native ? site.url : site.network.url;
const hostname = site.parameters?.native ? site.url : site.network.url;
release.url = `${hostname}/scene/${release.entryId}/`;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.url = `${hostname}/scene/${release.entryId}/`;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.tags = data.tags.map(tag => tag.name);
release.tags = data.tags.map(tag => tag.name);
release.duration = data.videos.mediabook?.length;
[release.poster, ...release.photos] = getThumbs(data);
release.duration = data.videos.mediabook?.length;
[release.poster, ...release.photos] = getThumbs(data);
const teaserSources = data.videos.mediabook?.files;
const teaserSources = data.videos.mediabook?.files;
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
return release;
return release;
}
async function scrapeLatest(items, site) {
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
return latestReleases.filter(Boolean);
return latestReleases.filter(Boolean);
}
function scrapeScene(data, url, _site, networkName) {
const release = {};
const release = {};
const { id: entryId, title, description } = data;
const { id: entryId, title, description } = data;
release.entryId = data.id;
release.title = title;
release.description = description;
release.entryId = data.id;
release.title = title;
release.description = description;
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.date = new Date(data.dateReleased);
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
release.tags = data.tags.map(tag => tag.name);
release.tags = data.tags.map(tag => tag.name);
[release.poster, ...release.photos] = getThumbs(data);
[release.poster, ...release.photos] = getThumbs(data);
const teaserSources = data.videos.mediabook?.files;
const teaserSources = data.videos.mediabook?.files;
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
if (teaserSources) {
release.teaser = Object.values(teaserSources).map(teaser => ({
src: teaser.urls.view,
quality: parseInt(teaser.format, 10),
}));
}
const siteName = data.collections[0]?.name || data.brand;
release.channel = slugify(siteName, '');
const siteName = data.collections[0]?.name || data.brand;
release.channel = slugify(siteName, '');
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
return release;
return release;
}
function getUrl(site) {
const { search } = new URL(site.url);
const { search } = new URL(site.url);
if (search.match(/\?site=\d+/)) {
return site.url;
}
if (search.match(/\?site=\d+/)) {
return site.url;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.native) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.extract) {
return `${site.url}/scenes`;
}
if (site.parameters?.siteId) {
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
}
if (site.parameters?.siteId) {
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
}
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
}
async function getSession(url) {
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
const cookieJar = new CookieJar();
const session = bhttp.session({ cookieJar });
await session.get(url);
await session.get(url);
const cookieString = await cookieJar.getCookieStringAsync(url);
const { instance_token: instanceToken } = cookieToData(cookieString);
const cookieString = await cookieJar.getCookieStringAsync(url);
const { instance_token: instanceToken } = cookieToData(cookieString);
return { session, instanceToken };
return { session, instanceToken };
}
function scrapeProfile(data, html, releases = [], networkName) {
const { qa, qd } = ex(html);
const { qa, qd } = ex(html);
const profile = {
description: data.bio,
aliases: data.aliases,
};
const profile = {
description: data.bio,
aliases: data.aliases,
};
const [bust, waist, hip] = data.measurements.split('-');
const [bust, waist, hip] = data.measurements.split('-');
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (profile.gender === 'female') {
if (bust) profile.bust = bust.toUpperCase();
if (waist) profile.waist = waist;
if (hip) profile.hip = hip;
}
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.birthPlace) profile.birthPlace = data.birthPlace;
if (data.height) profile.height = inchesToCm(data.height);
if (data.weight) profile.weight = lbsToKg(data.weight);
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
if (data.images.card_main_rect?.[0]) {
profile.avatar = data.images.card_main_rect[0].xl?.url
|| data.images.card_main_rect[0].lg?.url
|| data.images.card_main_rect[0].md?.url
|| data.images.card_main_rect[0].sm?.url
|| data.images.card_main_rect[0].xs?.url;
}
}
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = getUrl(site);
const { search } = new URL(url);
const siteId = new URLSearchParams(search).get('site');
const url = getUrl(site);
const { search } = new URL(url);
const siteId = new URLSearchParams(search).get('site');
const { session, instanceToken } = await getSession(url);
const { session, instanceToken } = await getSession(url);
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
const limit = 10;
const apiUrl = site.parameters?.native || site.parameters?.extract
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
const res = await session.get(apiUrl, {
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
},
});
const res = await session.get(apiUrl, {
headers: {
Instance: instanceToken,
Origin: site.url,
Referer: url,
},
});
if (res.statusCode === 200 && res.body.result) {
return scrapeLatest(res.body.result, site);
}
if (res.statusCode === 200 && res.body.result) {
return scrapeLatest(res.body.result, site);
}
return null;
return null;
}
async function fetchScene(url, site) {
const entryId = url.match(/\d+/)[0];
const { session, instanceToken } = await getSession(url);
const entryId = url.match(/\d+/)[0];
const { session, instanceToken } = await getSession(url);
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
headers: {
Instance: instanceToken,
},
});
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
headers: {
Instance: instanceToken,
},
});
if (res.statusCode === 200 && res.body.result) {
return scrapeScene(res.body.result, url, site);
}
if (res.statusCode === 200 && res.body.result) {
return scrapeScene(res.body.result, url, site);
}
return null;
return null;
}
async function fetchProfile(actorName, networkName, actorPath = 'model') {
const url = `https://www.${networkName}.com`;
const { session, instanceToken } = await getSession(url);
const url = `https://www.${networkName}.com`;
const { session, instanceToken } = await getSession(url);
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
headers: {
Instance: instanceToken,
},
});
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
headers: {
Instance: instanceToken,
},
});
if (res.statusCode === 200) {
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
if (res.statusCode === 200) {
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
if (actorData) {
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
if (actorData) {
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
const [actorRes, actorReleasesRes] = await Promise.all([
bhttp.get(actorUrl),
session.get(actorReleasesUrl, {
headers: {
Instance: instanceToken,
},
}),
]);
const [actorRes, actorReleasesRes] = await Promise.all([
bhttp.get(actorUrl),
session.get(actorReleasesUrl, {
headers: {
Instance: instanceToken,
},
}),
]);
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
}
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
}
}
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
}
}
}
return null;
return null;
}
module.exports = {
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'mofos');
return fetchProfile(actorName, 'mofos');
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
fetchLatest,
fetchScene,
fetchProfile: networkFetchProfile,
};

View File

@@ -9,149 +9,149 @@ const slugify = require('../utils/slugify');
const { ex, get } = require('../utils/q');
function titleExtractor(pathname) {
const components = pathname.split('/')[2].split('-');
const entryId = components.slice(-1)[0];
const components = pathname.split('/')[2].split('-');
const entryId = components.slice(-1)[0];
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
return { title, entryId };
return { title, entryId };
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.site-list .scene-item').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.site-list .scene-item').toArray();
return sceneElements.map((item) => {
const element = $(item);
return sceneElements.map((item) => {
const element = $(item);
const sceneLinkElement = element.find('a').first();
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
const url = `${protocol}//${hostname}${pathname}`;
const { title, entryId } = titleExtractor(pathname);
const sceneLinkElement = element.find('a').first();
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
const url = `${protocol}//${hostname}${pathname}`;
const { title, entryId } = titleExtractor(pathname);
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
return {
url,
entryId,
title,
actors,
date,
duration,
poster,
rating: null,
site,
};
});
return {
url,
entryId,
title,
actors,
date,
duration,
poster,
rating: null,
site,
};
});
}
function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElement = $('.scene-info');
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElement = $('.scene-info');
const { protocol, hostname, pathname } = new URL(url);
const originalUrl = `${protocol}//${hostname}${pathname}`;
const { protocol, hostname, pathname } = new URL(url);
const originalUrl = `${protocol}//${hostname}${pathname}`;
const entryId = originalUrl.split('-').slice(-1)[0];
const title = sceneElement.find('h1.scene-title.grey-text').text();
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
const entryId = originalUrl.split('-').slice(-1)[0];
const title = sceneElement.find('h1.scene-title.grey-text').text();
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
const poster = `https:${$('video, dl8-video').attr('poster')}`;
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
const poster = `https:${$('video, dl8-video').attr('poster')}`;
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
const trailerEl = $('source');
const trailerSrc = trailerEl.attr('src');
const trailerType = trailerEl.attr('type');
const trailerEl = $('source');
const trailerSrc = trailerEl.attr('src');
const trailerType = trailerEl.attr('type');
const siteName = sceneElement.find('a.site-title').text();
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
const siteName = sceneElement.find('a.site-title').text();
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
return {
url,
entryId,
title,
description,
actors,
date,
duration,
tags,
photos,
poster,
trailer: {
src: trailerSrc,
type: trailerType,
},
rating: null,
site,
channel,
};
return {
url,
entryId,
title,
description,
actors,
date,
duration,
tags,
photos,
poster,
trailer: {
src: trailerSrc,
type: trailerType,
},
rating: null,
site,
channel,
};
}
async function fetchActorReleases(url) {
const res = await get(url);
const res = await get(url);
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
return res.ok
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
: [];
}
async function scrapeProfile(html) {
const { qu } = ex(html);
const profile = {};
const { qu } = ex(html);
const profile = {};
profile.description = qu.q('.bio_about_text', true);
profile.description = qu.q('.bio_about_text', true);
const avatar = qu.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const avatar = qu.q('img.performer-pic', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
const releases = qu.urls('.scene-item > a:first-child');
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
profile.releases = releases.concat(olderReleases.flat());
profile.releases = releases.concat(olderReleases.flat());
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}?page=${page}`);
const res = await bhttp.get(`${site.url}?page=${page}`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = slugify(actorName);
const actorSlug = slugify(actorName);
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString());
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,75 +3,75 @@
const { geta, ed } = require('../utils/q');
function scrapeBlockLatest(scenes) {
return scenes.map(({ html, qu }) => {
const release = {};
return scenes.map(({ html, qu }) => {
const release = {};
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
release.entryId = entryId[1];
release.entryId = entryId[1];
release.title = qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.title = qu.q('h4 a', true);
release.url = qu.url('h4 a');
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
release.actors = qu.all('.tour_update_models a', true);
release.actors = qu.all('.tour_update_models a', true);
release.poster = qu.q('div img').dataset.src;
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
release.poster = qu.q('div img').dataset.src;
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
release.teaser = qu.video();
release.teaser = qu.video();
return release;
});
return release;
});
}
function scrapeClassicLatest(scenes) {
return scenes.map(({ el, qu }) => {
const release = {};
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid;
release.url = qu.url('a');
release.entryId = el.dataset.setid;
release.url = qu.url('a');
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
const description = qu.q('a', 'title');
if (description) release.description = description;
const description = qu.q('a', 'title');
if (description) release.description = description;
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
if (date) release.date = date;
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
if (date) release.date = date;
const durationLine = qu.q('.update_counts', true);
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
const durationLine = qu.q('.update_counts', true);
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
const actors = qu.all('.update_models a', true);
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
const actors = qu.all('.update_models a', true);
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
const photoCount = qu.q('.update_thumb', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: photoCount })
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
const photoCount = qu.q('.update_thumb', 'cnt');
[release.poster, ...release.photos] = Array.from({ length: photoCount })
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|| qu.q('.update_thumb', `src${index}_2x`)
|| qu.q('.update_thumb', `src${index}_1x`));
return release;
});
return release;
});
}
async function fetchLatest(site, page = 1) {
if (!site.parameters) {
return null;
}
if (!site.parameters) {
return null;
}
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
if (res.ok && site.parameters.block) {
return scrapeBlockLatest(res.items, site);
}
if (res.ok && site.parameters.block) {
return scrapeBlockLatest(res.items, site);
}
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
}
module.exports = {
fetchLatest,
fetchLatest,
};

View File

@@ -5,161 +5,161 @@ const slugify = require('../utils/slugify');
const { heightToCm } = require('../utils/convert');
const slugUrlMap = {
nubiles: 'https://www.nubiles.net',
nubilesporn: 'https://www.nubiles-porn.com',
nubiles: 'https://www.nubiles.net',
nubilesporn: 'https://www.nubiles-porn.com',
};
async function getPhotos(albumUrl) {
const res = await geta(albumUrl, '.photo-thumb');
const res = await geta(albumUrl, '.photo-thumb');
return res.ok
? res.items.map(({ q }) => q('source').srcset)
: [];
return res.ok
? res.items.map(({ q }) => q('source').srcset)
: [];
}
function scrapeAll(scenes, site, origin) {
return scenes.map(({ qu }) => {
const release = {};
return scenes.map(({ qu }) => {
const release = {};
release.title = qu.q('.title a', true);
release.title = qu.q('.title a', true);
const url = qu.url('.title a').split('?')[0];
const channelUrl = qu.url('.site-link');
const url = qu.url('.title a').split('?')[0];
const channelUrl = qu.url('.site-link');
if (/^http/.test(url)) {
const { pathname } = new URL(url);
release.entryId = pathname.split('/')[3];
if (/^http/.test(url)) {
const { pathname } = new URL(url);
release.entryId = pathname.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${pathname}`;
else release.url = url;
} else if (!/\/join/.test(url)) {
release.entryId = url.split('/')[3];
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
if (channelUrl) release.url = `${channelUrl}${url}`;
else if (site?.url) release.url = `${site.url}${url}`;
else if (origin) release.url = `${origin}${url}`;
} else {
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
}
release.date = qu.date('.date', 'MMM D, YYYY');
release.actors = qu.all('.models a.model', true);
release.date = qu.date('.date', 'MMM D, YYYY');
release.actors = qu.all('.models a.model', true);
const poster = qu.q('img').dataset.original;
release.poster = [
poster.replace('_640', '_1280'),
poster,
];
const poster = qu.q('img').dataset.original;
release.poster = [
poster.replace('_640', '_1280'),
poster,
];
release.stars = Number(qu.q('.rating', true));
release.likes = Number(qu.q('.likes', true));
release.stars = Number(qu.q('.rating', true));
release.likes = Number(qu.q('.likes', true));
return release;
});
return release;
});
}
async function scrapeScene({ qu }, url, site) {
const release = {};
const release = {};
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
const { origin, pathname } = new URL(url);
release.url = `${origin}${pathname}`;
release.entryId = new URL(url).pathname.split('/')[3];
release.title = qu.q('.content-pane-title h2', true);
release.description = qu.q('.content-pane-column div', true);
release.entryId = new URL(url).pathname.split('/')[3];
release.title = qu.q('.content-pane-title h2', true);
release.description = qu.q('.content-pane-column div', true);
release.date = qu.q('.date', 'MMM D, YYYY');
release.date = qu.q('.date', 'MMM D, YYYY');
release.actors = qu.all('.content-pane-performers .model', true);
release.tags = qu.all('.categories a', true);
release.actors = qu.all('.content-pane-performers .model', true);
release.tags = qu.all('.categories a', true);
release.poster = qu.poster() || qu.img('.fake-video-player img');
release.trailer = qu.all('source').map(source => ({
src: source.src,
quality: Number(source.getAttribute('res')),
}));
release.poster = qu.poster() || qu.img('.fake-video-player img');
release.trailer = qu.all('source').map(source => ({
src: source.src,
quality: Number(source.getAttribute('res')),
}));
release.stars = Number(qu.q('.score', true));
release.likes = Number(qu.q('#likecount', true));
release.stars = Number(qu.q('.score', true));
release.likes = Number(qu.q('#likecount', true));
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
return release;
return release;
}
function scrapeProfile({ qu }, _actorName, origin) {
const profile = {};
const profile = {};
const keys = qu.all('.model-profile h5', true);
const values = qu.all('.model-profile h5 + p', true);
const keys = qu.all('.model-profile h5', true);
const values = qu.all('.model-profile h5 + p', true);
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = qu.q('.model-bio', true);
profile.age = Number(bio.age);
profile.description = qu.q('.model-bio', true);
profile.residencePlace = bio.location;
profile.residencePlace = bio.location;
profile.height = heightToCm(bio.height);
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
profile.height = heightToCm(bio.height);
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
profile.avatar = qu.img('.model-profile img');
profile.avatar = qu.img('.model-profile img');
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(ctxa(releases), null, origin);
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
profile.releases = scrapeAll(ctxa(releases), null, origin);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await geta(url, '.content-grid-item');
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
const res = await geta(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site) : res.status;
}
async function fetchUpcoming(site) {
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await geta(url, '.content-grid-item');
if (site.parameters?.upcoming) {
const url = `${site.url}/video/upcoming`;
const res = await geta(url, '.content-grid-item');
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return res.ok ? scrapeAll(res.items, site) : res.status;
}
return [];
return [];
}
async function fetchScene(url, site) {
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeScene(res.item, url, site) : res.status;
return res.ok ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile(actorName, siteSlug) {
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
const firstLetter = actorName.charAt(0).toLowerCase();
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await get(url);
const url = `${origin}/model/alpha/${firstLetter}`;
const resModels = await get(url);
if (!resModels.ok) return resModels.status;
if (!resModels.ok) return resModels.status;
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await get(modelUrl);
if (modelPath) {
const modelUrl = `${origin}${modelPath}`;
const resModel = await get(modelUrl);
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};

View File

@@ -7,143 +7,143 @@ const knex = require('../knex');
const { ex, ctxa } = require('../utils/q');
async function getSiteSlugs() {
return knex('sites')
.pluck('sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'perfectgonzo');
return knex('sites')
.pluck('sites.slug')
.join('networks', 'networks.id', 'sites.network_id')
.where('networks.slug', 'perfectgonzo');
}
function getHash(identifier) {
const hash = blake2.createHash('blake2b', { digestLength: 8 });
const hash = blake2.createHash('blake2b', { digestLength: 8 });
hash.update(Buffer.from(identifier));
hash.update(Buffer.from(identifier));
return hash.digest('hex');
return hash.digest('hex');
}
function extractMaleModelsFromTags(tagContainer) {
if (!tagContainer) {
return [];
}
if (!tagContainer) {
return [];
}
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
if (modelLabelIndex > -1) {
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
if (modelLabelIndex > -1) {
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
return maleModels.map(model => model.text);
}
return maleModels.map(model => model.text);
}
return [];
return [];
}
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
if (channelMatch) {
return channelMatch[0];
}
if (channelMatch) {
return channelMatch[0];
}
return null;
return null;
}
async function scrapeLatest(html, site) {
const siteSlugs = await getSiteSlugs();
const { element } = ex(html);
const siteSlugs = await getSiteSlugs();
const { element } = ex(html);
return ctxa(element, '#content-main .itemm').map(({
q, qa, qlength, qdate, qimages,
}) => {
const release = {
site,
meta: {
siteSlugs,
},
};
return ctxa(element, '#content-main .itemm').map(({
q, qa, qlength, qdate, qimages,
}) => {
const release = {
site,
meta: {
siteSlugs,
},
};
const sceneLink = q('a');
const sceneLink = q('a');
release.title = sceneLink.title;
release.url = `${site.url}${sceneLink.href}`;
release.date = qdate('.nm-date', 'MM/DD/YYYY');
release.title = sceneLink.title;
release.url = `${site.url}${sceneLink.href}`;
release.date = qdate('.nm-date', 'MM/DD/YYYY');
const slug = new URL(release.url).pathname.split('/')[2];
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
const slug = new URL(release.url).pathname.split('/')[2];
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
release.actors = release.title.split('&').map(actor => actor.trim());
release.actors = release.title.split('&').map(actor => actor.trim());
[release.poster, ...release.photos] = qimages('.bloc-link img');
[release.poster, ...release.photos] = qimages('.bloc-link img');
release.tags = qa('.dropdown ul a', true).slice(1);
release.duration = qlength('.dropdown p:first-child');
release.tags = qa('.dropdown ul a', true).slice(1);
release.duration = qlength('.dropdown p:first-child');
return release;
});
return release;
});
}
async function scrapeScene(html, site, url, metaSiteSlugs) {
const {
q, qa, qlength, qdate, qposter, qtrailer,
} = ex(html);
const {
q, qa, qlength, qdate, qposter, qtrailer,
} = ex(html);
const release = { url, site };
const release = { url, site };
release.title = q('#movie-header h2', true);
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.title = q('#movie-header h2', true);
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = q('.container .mg-md', true);
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
release.description = q('.container .mg-md', true);
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
release.tags = qa('.tag-container a', true);
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
release.tags = qa('.tag-container a', true);
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
release.poster = qposter();
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
release.poster = qposter();
const trailer = qtrailer();
if (trailer) release.trailer = { src: trailer };
const trailer = qtrailer();
if (trailer) release.trailer = { src: trailer };
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
if (release.channel) {
const { pathname } = new URL(url);
release.url = `https://${release.channel}.com${pathname}`;
if (release.channel) {
const { pathname } = new URL(url);
release.url = `https://${release.channel}.com${pathname}`;
const slug = pathname.split('/')[2];
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
}
const slug = pathname.split('/')[2];
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
}
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/movies/page-${page}`;
const res = await bhttp.get(url);
const url = `${site.url}/movies/page-${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return [];
return [];
}
async function fetchScene(url, site, release) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
}
return [];
return [];
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
async function getTrailer(entryId) {
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
setId: entryId,
});
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
setId: entryId,
});
if (trailerRes.statusCode === 200) {
return {
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
};
}
if (trailerRes.statusCode === 200) {
return {
poster: trailerRes.body.TrailerImg,
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
};
}
return null;
return null;
}
function scrapeLatestScene(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const entryId = $('li').attr('id');
const sceneLinkElement = $('#scene_title_border a');
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const poster = $('a:nth-child(2) > img').attr('src');
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
const stars = $('img[src*="/star.png"]')
.toArray()
.map(element => $(element).attr('src'))
.length || 0;
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
return {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
stars,
},
site,
};
}
async function scrapeScene(html, url, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const release = { url, site };
const release = { url, site };
release.entryId = document.querySelector('input#set_ID').value;
release.entryId = document.querySelector('input#set_ID').value;
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
release.title = document.querySelector('title').textContent;
release.description = document.querySelector('.player_data').textContent.trim();
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
const [minutes, seconds] = durationString.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
release.duration = Number(minutes) * 60 + Number(seconds);
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
const { poster, trailer } = await getTrailer(release.entryId);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.poster = poster;
release.trailer = { src: trailer };
return release;
return release;
}
function scrapeFallbackLanding(html) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
return document.querySelector('input#set_ID').value;
return document.querySelector('input#set_ID').value;
}
async function scrapeFallbackScene(html, entryId, url, site) {
const { document } = new JSDOM(html).window;
const release = { url, entryId, site };
const { document } = new JSDOM(html).window;
const release = { url, entryId, site };
release.title = document.querySelector('.popup_data_set_head label').textContent;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
release.title = document.querySelector('.popup_data_set_head label').textContent;
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
const { poster, trailer } = await getTrailer(release.entryId);
const { poster, trailer } = await getTrailer(release.entryId);
release.poster = poster;
release.trailer = { src: trailer };
release.poster = poster;
release.trailer = { src: trailer };
release.channel = document.querySelector('.popup_left_top div img').alt;
release.channel = document.querySelector('.popup_left_top div img').alt;
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const res = page === 1
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const res = page === 1
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
const elements = JSON.parse(res.body.toString());
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
return latest;
return latest;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
if (site.isFallback) {
const entryId = scrapeFallbackLanding(res.body.toString(), url);
if (res.statusCode === 200) {
if (site.isNetwork) {
const entryId = scrapeFallbackLanding(res.body.toString(), url);
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
setId: entryId,
});
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
}
return scrapeScene(res.body.toString(), url, site);
}
return scrapeScene(res.body.toString(), url, site);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -5,56 +5,56 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
const ethnicityMap = {
White: 'Caucasian',
White: 'Caucasian',
};
const hairMap = {
Brunette: 'brown',
Brunette: 'brown',
};
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
const profile = {
name: actorName,
};
const profile = {
name: actorName,
};
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (descriptionString) profile.description = descriptionString.textContent;
if (descriptionString) profile.description = descriptionString.textContent;
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
profile.residencePlace = bio['City and Country'];
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
profile.residencePlace = bio['City and Country'];
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
return profile;
return profile;
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
/* Model pages are not reliably associated with actual porn stars
/* Model pages are not reliably associated with actual porn stars
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
@@ -74,12 +74,12 @@ async function fetchProfile(actorName) {
}
*/
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const pornstarRes = await bhttp.get(pornstarUrl);
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
const pornstarRes = await bhttp.get(pornstarUrl);
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
}
module.exports = {
fetchProfile,
fetchProfile,
};

View File

@@ -9,193 +9,193 @@ const { get, geta } = require('../utils/q');
const slugify = require('../utils/slugify');
async function getPhotos(entryId, site) {
const { hostname } = new URL(site.url);
const { hostname } = new URL(site.url);
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
const html = res.body.toString();
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
const html = res.body.toString();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
return photos;
return photos;
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.content-wrapper .scene').toArray();
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.content-wrapper .scene').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('h3 a');
const thumbnailElement = $(element).find('a img');
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('h3 a');
const thumbnailElement = $(element).find('a img');
const url = sceneLinkElement.attr('href');
// const title = sceneLinkElement.text();
const entryId = url.split('/').slice(-1)[0];
const url = sceneLinkElement.attr('href');
// const title = sceneLinkElement.text();
const entryId = url.split('/').slice(-1)[0];
const titleText = thumbnailElement.attr('alt');
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
const titleText = thumbnailElement.attr('alt');
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.scene-votes').text());
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const likes = Number($(element).find('.scene-votes').text());
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
const poster = thumbnailElement.attr('src');
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
const poster = thumbnailElement.attr('src');
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
const scene = {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
},
site,
};
const scene = {
url,
entryId,
title,
actors,
date,
poster,
photos,
rating: {
likes,
},
site,
};
return scene;
});
return scene;
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { url };
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { url };
[release.entryId] = url.split('/').slice(-1);
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
[release.entryId] = url.split('/').slice(-1);
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
if (timestamp) {
const [minutes, seconds] = timestamp.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
}
if (timestamp) {
const [minutes, seconds] = timestamp.match(/\d+/g);
release.duration = Number(minutes) * 60 + Number(seconds);
}
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.likes = Number($('.content-desc #social-actions #likes').text());
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.likes = Number($('.content-desc #social-actions #likes').text());
const posterScript = $('script:contains(poster)').html();
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
const posterScript = $('script:contains(poster)').html();
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
if (trailer) release.trailer = { src: trailer };
if (trailer) release.trailer = { src: trailer };
release.photos = await getPhotos(release.entryId, site);
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
release.photos = await getPhotos(release.entryId, site);
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
const siteElement = $('.content-wrapper .logos-sites a');
if (siteElement) release.channel = slugify(siteElement.text(), '');
const siteElement = $('.content-wrapper .logos-sites a');
if (siteElement) release.channel = slugify(siteElement.text(), '');
return release;
return release;
}
function scrapeProfile({ html, q, qa, qtx }) {
const profile = {};
const profile = {};
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
const [key, value] = fact.split(':');
const trimmedValue = value.trim();
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
const [key, value] = fact.split(':');
const trimmedValue = value.trim();
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
return { ...acc, [slugify(key, '_')]: trimmedValue };
}, {});
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
return { ...acc, [slugify(key, '_')]: trimmedValue };
}, {});
const description = q('.model-facts-long', true);
if (description) profile.description = description;
const description = q('.model-facts-long', true);
if (description) profile.description = description;
const aliases = qtx('.aka')?.split(/,\s*/);
if (aliases) profile.aliases = aliases;
const aliases = qtx('.aka')?.split(/,\s*/);
if (aliases) profile.aliases = aliases;
if (bio.birth_place) profile.birthPlace = bio.birth_place;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.birth_place) profile.birthPlace = bio.birth_place;
if (bio.nationality) profile.nationality = bio.nationality;
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bio.measurements) {
const [bust, waist, hip] = bio.measurements.split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.eye_color) profile.eye = bio.eye_color;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.eye_color) profile.eye = bio.eye_color;
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = bio.tattoos;
}
if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = bio.piercings;
}
profile.avatar = q('.img-pornstar img').dataset.src;
profile.releases = scrapeLatest(html);
profile.avatar = q('.img-pornstar img').dataset.src;
profile.releases = scrapeLatest(html);
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const { hostname } = new URL(site.url);
const { hostname } = new URL(site.url);
if (hostname.match('private.com')) {
const res = await bhttp.get(`${site.url}/${page}/`);
if (hostname.match('private.com')) {
const res = await bhttp.get(`${site.url}/${page}/`);
return scrapeLatest(res.body.toString(), site);
}
return scrapeLatest(res.body.toString(), site);
}
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
return scrapeLatest(res.body.toString(), site);
return scrapeLatest(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');
if (modelRes.ok) {
const actorSlug = slugify(actorName);
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
if (modelRes.ok) {
const actorSlug = slugify(actorName);
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
if (model) {
const res = await get(model.el.href);
if (model) {
const res = await get(model.el.href);
return res.ok ? scrapeProfile(res.item) : res.status;
}
}
return res.ok ? scrapeProfile(res.item) : res.status;
}
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,7 +3,7 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -4,49 +4,49 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const {
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
scrapeLatestX,
fetchLatest,
fetchScene,
fetchProfile,
} = require('./mindgeek');
function scrapeLatestClassic(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const $ = cheerio.load(html, { normalizeWhitespace: true });
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
const stateTag = $('script:contains("initialState")').html();
const prefix = 'initialState = {';
const prefixIndex = stateTag.indexOf('initialState = {');
const suffix = '};';
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
const data = JSON.parse(stateString);
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
}
async function fetchClassic(site, page) {
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatestClassic(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatestWrap(site, page = 1) {
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
if (site.parameters?.classic) {
return fetchClassic(site, page);
}
return fetchLatest(site, page);
return fetchLatest(site, page);
}
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'realitykings');
return fetchProfile(actorName, 'realitykings');
}
module.exports = {
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest: fetchLatestWrap,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -7,255 +7,255 @@ const slugify = require('../utils/slugify');
const { heightToCm, lbsToKg } = require('../utils/convert');
function scrapePhotos(html) {
const { qis } = ex(html, '#photos-page');
const photos = qis('img');
const { qis } = ex(html, '#photos-page');
const photos = qis('img');
return photos.map(photo => [
photo
.replace('x_800', 'x_xl')
.replace('_tn', ''),
photo,
]);
return photos.map(photo => [
photo
.replace('x_800', 'x_xl')
.replace('_tn', ''),
photo,
]);
}
async function fetchPhotos(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapePhotos(res.body.toString(), url);
}
if (res.statusCode === 200) {
return scrapePhotos(res.body.toString(), url);
}
return [];
return [];
}
function scrapeAll(html, site) {
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
const release = {};
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
const release = {};
release.title = q('.title, .i-title', true);
release.title = q('.title, .i-title', true);
const linkEl = q('a');
const url = new URL(linkEl.href);
release.url = `${url.origin}${url.pathname}`;
const linkEl = q('a');
const url = new URL(linkEl.href);
release.url = `${url.origin}${url.pathname}`;
// this is a photo album, not a scene (used for profiles)
if (/photos\//.test(url)) return null;
// this is a photo album, not a scene (used for profiles)
if (/photos\//.test(url)) return null;
[release.entryId] = url.pathname.split('/').slice(-2);
[release.entryId] = url.pathname.split('/').slice(-2);
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|| qd('.dt-box', 'MMM.DD YYYY');
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
release.duration = ql('.i-amount, .amount');
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
release.duration = ql('.i-amount, .amount');
const posterEl = q('.item-img img');
const posterEl = q('.item-img img');
if (posterEl) {
release.poster = `https:${posterEl.src}`;
}
if (posterEl) {
release.poster = `https:${posterEl.src}`;
}
if (posterEl?.dataset.gifPreview) {
release.teaser = {
src: `https:${posterEl.dataset.gifPreview}`,
};
}
if (posterEl?.dataset.gifPreview) {
release.teaser = {
src: `https:${posterEl.dataset.gifPreview}`,
};
}
return release;
}).filter(Boolean);
return release;
}).filter(Boolean);
}
async function scrapeScene(html, url, site) {
const { qu } = ex(html, '#videos-page, #content');
const release = {};
const { qu } = ex(html, '#videos-page, #content');
const release = {};
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
release.description = qu.text('.p-desc, .desc');
release.description = qu.text('.p-desc, .desc');
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
if (release.actors.length === 0) {
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
const actorString = qu.text(actorEl);
if (release.actors.length === 0) {
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
const actorString = qu.text(actorEl);
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
}
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
}
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
release.tags = qu.all('a[href*=tag]', true);
release.tags = qu.all('a[href*=tag]', true);
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = qu.dur(durationEl);
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = qu.dur(durationEl);
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
const photosUrl = qu.url('.stat a[href*=photos]');
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
const photosUrl = qu.url('.stat a[href*=photos]');
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl);
} else {
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
photo.replace('_tn', ''),
photo,
]);
}
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl);
} else {
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
photo.replace('_tn', ''),
photo,
]);
}
const trailers = qu.all('a[href*=Trailers]');
const trailers = qu.all('a[href*=Trailers]');
if (trailers) {
release.trailer = trailers.map((trailer) => {
const src = `https:${trailer.href}`;
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
if (trailers) {
release.trailer = trailers.map((trailer) => {
const src = `https:${trailer.href}`;
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
return format === 'mp4' ? { src, quality } : null;
}).filter(Boolean);
}
return format === 'mp4' ? { src, quality } : null;
}).filter(Boolean);
}
const stars = qu.q('.rate-box').dataset.score;
if (stars) release.rating = { stars };
const stars = qu.q('.rate-box').dataset.score;
if (stars) release.rating = { stars };
return release;
return release;
}
function scrapeModels(html, actorName) {
const { qa } = ex(html);
const model = qa('.model a').find(link => link.title === actorName);
const { qa } = ex(html);
const model = qa('.model a').find(link => link.title === actorName);
return model?.href || null;
return model?.href || null;
}
async function fetchActorReleases(url, accReleases = []) {
const res = await get(url);
const res = await get(url);
if (res.ok) {
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
const nextPage = res.item.qu.url('.next-pg');
if (res.ok) {
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
const nextPage = res.item.qu.url('.next-pg');
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
return fetchActorReleases(nextPage, releases);
}
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
return fetchActorReleases(nextPage, releases);
}
return releases;
}
return releases;
}
return null;
return null;
}
async function scrapeProfile(html, actorUrl, withReleases) {
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
const { q, qa, qi } = ex(html, '#model-page');
const profile = { gender: 'female' };
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, '_');
const value = q(el, '.value', true);
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, '_');
const value = q(el, '.value', true);
return {
...acc,
[key]: value,
};
}, {});
return {
...acc,
[key]: value,
};
}, {});
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
if (bio.birthday) {
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
const [birthDay] = bio.birthday.match(/\d+/);
if (bio.birthday) {
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
const [birthDay] = bio.birthday.match(/\d+/);
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
}
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
}
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (bio.hair_color) profile.hair = bio.hair_color;
if (bio.height) profile.height = heightToCm(bio.height);
if (bio.weight) profile.weight = lbsToKg(bio.weight);
if (bio.height) profile.height = heightToCm(bio.height);
if (bio.weight) profile.weight = lbsToKg(bio.weight);
if (bio.bra_size) profile.bust = bio.bra_size;
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
if (bio.bra_size) profile.bust = bio.bra_size;
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
if (bio.occupation) profile.occupation = bio.occupation;
if (bio.occupation) profile.occupation = bio.occupation;
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
const avatar = qi('img');
if (avatar) profile.avatar = avatar;
if (withReleases) {
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
}
if (withReleases) {
const { origin, pathname } = new URL(actorUrl);
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const latestPath = site.parameters?.path || '/big-boob-videos';
const url = `${site.url}${latestPath}?page=${page}`;
const res = await bhttp.get(url);
const latestPath = site.parameters?.path || '/big-boob-videos';
const url = `${site.url}${latestPath}?page=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeAll(res.body.toString(), site);
}
return res.statusCode;
return res.statusCode;
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url, site);
}
return null;
return null;
}
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
const letter = actorName.charAt(0).toUpperCase();
const letter = actorName.charAt(0).toUpperCase();
const sources = [
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
];
const sources = [
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
];
const url = sources[source];
const url = sources[source];
const res = await bhttp.get(url, {
followRedirects: false,
});
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 200) {
const actorUrl = scrapeModels(res.body.toString(), actorName);
if (res.statusCode === 200) {
const actorUrl = scrapeModels(res.body.toString(), actorName);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorUrl) {
const actorRes = await bhttp.get(actorUrl);
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
}
if (actorRes.statusCode === 200) {
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
}
return null;
}
return null;
}
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
}
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
}
if (sources[source + 1]) {
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -65,143 +65,143 @@ const freeones = require('./freeones');
// const freeoneslegacy = require('./freeones_legacy');
module.exports = {
releases: {
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
adulttime,
amateurallure,
assylum,
aziani,
babes,
bamvisions,
bang,
bangbros,
blowpass,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
digitalplayground,
dogfart,
dogfartnetwork: dogfart,
evilangel,
fakehub,
famedigital,
fantasymassage,
fullpornnetwork,
girlsway,
girlgirl: julesjordan,
hussiepass: hush,
hushpass: hush,
insex,
interracialpass: hush,
jayrock,
jesseloadsmonsterfacials,
julesjordan,
kellymadison,
kink,
legalporno,
men,
metrohd,
mikeadriano,
milehighmedia,
mindgeek,
mofos,
naughtyamerica,
newsensations,
nubiles,
perfectgonzo,
pervcity,
pimpxxx: cherrypimps,
pornpros: whalemember,
private: privateNetwork,
puretaboo,
realitykings,
score,
sexyhub: mindgeek,
swallowsalon: julesjordan,
teamskeet,
twistys,
vivid,
vixen,
vogov,
whalemember,
wicked,
xempire,
},
actors: {
'21sextury': sextury,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
anilos: nubiles,
aziani,
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bangbros,
blacked: vixen,
blackedraw: vixen,
blowpass,
boobpedia,
brattysis: nubiles,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
deeper: vixen,
deeplush: nubiles,
digitalplayground,
dtfsluts: fullpornnetwork,
evilangel,
eyeontheguy: hush,
fakehub,
famedigital,
freeones,
gangbangcreampie: aziani,
girlfaction: fullpornnetwork,
gloryholesecrets: aziani,
hergape: fullpornnetwork,
homemadeanalwhores: fullpornnetwork,
hotcrazymess: nubiles,
hushpass: hush,
hussiepass: hush,
iconmale,
interracialpass: hush,
interracialpovs: hush,
jamesdeen: fullpornnetwork,
julesjordan,
kellymadison,
legalporno,
men,
metrohd,
milehighmedia,
mofos,
mugfucked: fullpornnetwork,
naughtyamerica,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
pornhub,
povperverts: fullpornnetwork,
povpornstars: hush,
private: privateNetwork,
realitykings,
score,
seehimfuck: hush,
sexyhub: mindgeek,
thatsitcomshow: nubiles,
transangels,
tushy: vixen,
tushyraw: vixen,
twistys,
vixen,
wicked,
xempire,
},
releases: {
'21naturals': naturals,
'21sextreme': sextreme,
'21sextury': sextury,
adulttime,
amateurallure,
assylum,
aziani,
babes,
bamvisions,
bang,
bangbros,
blowpass,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
digitalplayground,
dogfart,
dogfartnetwork: dogfart,
evilangel,
fakehub,
famedigital,
fantasymassage,
fullpornnetwork,
girlsway,
girlgirl: julesjordan,
hussiepass: hush,
hushpass: hush,
insex,
interracialpass: hush,
jayrock,
jesseloadsmonsterfacials,
julesjordan,
kellymadison,
kink,
legalporno,
men,
metrohd,
mikeadriano,
milehighmedia,
mindgeek,
mofos,
naughtyamerica,
newsensations,
nubiles,
perfectgonzo,
pervcity,
pimpxxx: cherrypimps,
pornpros: whalemember,
private: privateNetwork,
puretaboo,
realitykings,
score,
sexyhub: mindgeek,
swallowsalon: julesjordan,
teamskeet,
twistys,
vivid,
vixen,
vogov,
whalemember,
wicked,
xempire,
},
actors: {
'21sextury': sextury,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
anilos: nubiles,
aziani,
babes,
baddaddypov: fullpornnetwork,
bamvisions,
bangbros,
blacked: vixen,
blackedraw: vixen,
blowpass,
boobpedia,
brattysis: nubiles,
brazzers,
burningangel,
cherrypimps,
ddfnetwork,
deeper: vixen,
deeplush: nubiles,
digitalplayground,
dtfsluts: fullpornnetwork,
evilangel,
eyeontheguy: hush,
fakehub,
famedigital,
freeones,
gangbangcreampie: aziani,
girlfaction: fullpornnetwork,
gloryholesecrets: aziani,
hergape: fullpornnetwork,
homemadeanalwhores: fullpornnetwork,
hotcrazymess: nubiles,
hushpass: hush,
hussiepass: hush,
iconmale,
interracialpass: hush,
interracialpovs: hush,
jamesdeen: fullpornnetwork,
julesjordan,
kellymadison,
legalporno,
men,
metrohd,
milehighmedia,
mofos,
mugfucked: fullpornnetwork,
naughtyamerica,
nfbusty: nubiles,
nubilefilms: nubiles,
nubiles,
nubilesporn: nubiles,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
pimpxxx: cherrypimps,
pornhub,
povperverts: fullpornnetwork,
povpornstars: hush,
private: privateNetwork,
realitykings,
score,
seehimfuck: hush,
sexyhub: mindgeek,
thatsitcomshow: nubiles,
transangels,
tushy: vixen,
tushyraw: vixen,
twistys,
vixen,
wicked,
xempire,
},
};

View File

@@ -5,176 +5,176 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function extractTitle(pathname) {
return pathname
.split('/')
.slice(-2)[0]
.split('_')
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
.join(' ');
return pathname
.split('/')
.slice(-2)[0]
.split('_')
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
.join(' ');
}
function extractActors(str) {
return str
.split(/,|\band\b/ig)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
return str
.split(/,|\band\b/ig)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
return scenes.map((scene) => {
const release = { site };
return scenes.map((scene) => {
const release = { site };
const link = scene.querySelector('.info a');
const poster = scene.querySelector('img');
const { pathname } = new URL(link);
const link = scene.querySelector('.info a');
const poster = scene.querySelector('img');
const { pathname } = new URL(link);
[release.entryId] = poster.id.match(/\d+/);
[release.entryId] = poster.id.match(/\d+/);
release.url = `https://www.teamskeet.com${pathname}`;
release.title = extractTitle(pathname);
release.url = `https://www.teamskeet.com${pathname}`;
release.title = extractTitle(pathname);
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
[release.poster] = photos;
release.photos = photos.slice(1);
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
[release.poster] = photos;
release.photos = photos.slice(1);
const actors = scene.querySelector('div span[rel="test"]').textContent;
release.actors = extractActors(actors);
const actors = scene.querySelector('div span[rel="test"]').textContent;
release.actors = extractActors(actors);
return release;
});
return release;
});
}
function scrapeScene(html, site, url) {
const { document } = new JSDOM(html).window;
const release = { site };
const { document } = new JSDOM(html).window;
const release = { site };
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
release.url = url;
release.title = title;
release.actors = extractActors(actors);
release.channel = channel.toLowerCase();
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
release.url = url;
release.title = title;
release.actors = extractActors(actors);
release.channel = channel.toLowerCase();
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
const { poster } = document.querySelector('video');
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
const { poster } = document.querySelector('video');
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
const trailer = document.querySelector('div.right.gray a').href;
if (trailer) release.trailer = { src: trailer };
const trailer = document.querySelector('div.right.gray a').href;
if (trailer) release.trailer = { src: trailer };
return release;
return release;
}
function scrapeSceneA(html, site, sceneX, url) {
const scene = sceneX || new JSDOM(html).window.document;
const release = { site };
const scene = sceneX || new JSDOM(html).window.document;
const release = { site };
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
const durationString = scene.querySelector('.time').textContent.trim();
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
release.duration = moment.duration(duration).asSeconds();
const durationString = scene.querySelector('.time').textContent.trim();
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
release.duration = moment.duration(duration).asSeconds();
if (sceneX) {
const titleEl = scene.querySelector(':scope > a');
if (sceneX) {
const titleEl = scene.querySelector(':scope > a');
release.url = titleEl.href;
release.entryId = titleEl.id;
release.title = titleEl.title;
release.url = titleEl.href;
release.entryId = titleEl.id;
release.title = titleEl.title;
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
release.poster = [poster.replace('bio_big', 'video'), poster];
release.photos = photos;
}
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
release.poster = [poster.replace('bio_big', 'video'), poster];
release.photos = photos;
}
if (!sceneX) {
release.title = scene.querySelector('.title span').textContent;
release.url = url;
if (!sceneX) {
release.title = scene.querySelector('.title span').textContent;
release.url = url;
release.poster = scene.querySelector('video').poster;
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
}
release.poster = scene.querySelector('video').poster;
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
}
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
return release;
return release;
}
function scrapeLatestA(html, site) {
const { document } = new JSDOM(html).window;
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
return scenes.map(scene => scrapeSceneA(null, site, scene));
return scenes.map(scene => scrapeSceneA(null, site, scene));
}
async function fetchLatestTeamSkeet(site, page = 1) {
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatestA(site) {
const url = `${site.url}/scenes`;
const res = await bhttp.get(url);
const url = `${site.url}/scenes`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatestA(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatestA(res.body.toString(), site);
}
return null;
return null;
}
async function fetchLatest(site, page = 1) {
if (site.parameters.id) {
return fetchLatestTeamSkeet(site, page);
}
if (site.parameters.id) {
return fetchLatestTeamSkeet(site, page);
}
if (site.parameters.scraper === 'A') {
return fetchLatestA(site, page);
}
if (site.parameters.scraper === 'A') {
return fetchLatestA(site, page);
}
return null;
return null;
}
async function fetchScene(url, site) {
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
if (site.parameters?.scraper === 'A') {
return scrapeSceneA(res.body.toString(), site, null, url);
}
if (site.parameters?.scraper === 'A') {
return scrapeSceneA(res.body.toString(), site, null, url);
}
return scrapeScene(res.body.toString(), site, url);
return scrapeScene(res.body.toString(), site, url);
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,9 +3,9 @@
const { fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'transangels');
return fetchProfile(actorName, 'transangels');
}
module.exports = {
fetchProfile: networkFetchProfile,
fetchProfile: networkFetchProfile,
};

View File

@@ -3,11 +3,11 @@
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
async function networkFetchProfile(actorName) {
return fetchProfile(actorName, 'twistys');
return fetchProfile(actorName, 'twistys');
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchScene,
};

View File

@@ -8,128 +8,128 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = requir
const slugify = require('../utils/slugify');
function scrapeLatestNative(scenes, site) {
return scenes.map((scene) => {
const release = {};
return scenes.map((scene) => {
const release = {};
release.entryId = scene.id;
release.url = `${site.url}${scene.url}`;
release.entryId = scene.id;
release.url = `${site.url}${scene.url}`;
release.title = scene.name;
release.date = ed(scene.release_date, 'YYYY-MM-DD');
release.duration = parseInt(scene.runtime, 10) * 60;
release.title = scene.name;
release.date = ed(scene.release_date, 'YYYY-MM-DD');
release.duration = parseInt(scene.runtime, 10) * 60;
release.actors = scene.cast?.map(actor => ({
name: actor.stagename,
gender: actor.gender.toLowerCase(),
avatar: actor.placard,
})) || [];
release.actors = scene.cast?.map(actor => ({
name: actor.stagename,
gender: actor.gender.toLowerCase(),
avatar: actor.placard,
})) || [];
release.stars = Number(scene.rating);
release.poster = scene.placard_800 || scene.placard;
release.stars = Number(scene.rating);
release.poster = scene.placard_800 || scene.placard;
return release;
});
return release;
});
}
function scrapeSceneNative({ html, q, qa }, url, _site) {
const release = { url };
const release = { url };
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
release.title = q('.scene-h2-heading', true);
release.description = q('.indie-model-p', true);
release.title = q('.scene-h2-heading', true);
release.description = q('.indie-model-p', true);
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
const [hours, minutes] = duration.match(/\d+/g);
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
const [hours, minutes] = duration.match(/\d+/g);
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
release.tags = qa('h5 a[href*="/categories"]', true);
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
release.tags = qa('h5 a[href*="/categories"]', true);
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
release.poster = poster;
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
release.poster = poster;
if (trailer) {
release.trailer = {
src: trailer,
};
}
if (trailer) {
release.trailer = {
src: trailer,
};
}
const channel = q('h5 a[href*="/sites"]', true);
if (channel) release.channel = channel.replace(/\.\w+/, '');
const channel = q('h5 a[href*="/sites"]', true);
if (channel) release.channel = channel.replace(/\.\w+/, '');
return release;
return release;
}
async function fetchLatestNative(site, page = 1) {
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
if (site.parameters?.useGamma) {
return fetchApiLatest(site, page);
}
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
const res = await bhttp.get(apiUrl, {
decodeJSON: true,
});
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
const res = await bhttp.get(apiUrl, {
decodeJSON: true,
});
if (res.statusCode === 200 && res.body.code === 200) {
return scrapeLatestNative(res.body.responseData, site);
}
if (res.statusCode === 200 && res.body.code === 200) {
return scrapeLatestNative(res.body.responseData, site);
}
return null;
return null;
}
async function fetchUpcomingNative(site) {
if (site.parameters?.useGamma) {
return fetchApiUpcoming(site);
}
if (site.parameters?.useGamma) {
return fetchApiUpcoming(site);
}
return null;
return null;
}
async function fetchSceneNative(url, site, release) {
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
if (site.parameters?.useGamma) {
return fetchScene(url, site, release);
}
const res = await get(url);
const res = await get(url);
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
}
async function fetchSceneWrapper(url, site, release) {
const scene = await fetchScene(url, site, release);
const scene = await fetchScene(url, site, release);
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
// scene is probably still available on Vivid site, use search API to get URL and original date
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
const searchRes = await bhttp.get(searchUrl, {
decodeJSON: true,
});
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
// scene is probably still available on Vivid site, use search API to get URL and original date
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
const searchRes = await bhttp.get(searchUrl, {
decodeJSON: true,
});
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
if (sceneMatch) {
return {
...scene,
url: `${site.url}${sceneMatch.url}`,
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
};
}
}
}
if (sceneMatch) {
return {
...scene,
url: `${site.url}${sceneMatch.url}`,
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
};
}
}
}
return scene;
return scene;
}
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene: fetchSceneWrapper,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchUpcoming: fetchApiUpcoming,
fetchScene: fetchSceneWrapper,
};

View File

@@ -8,246 +8,246 @@ const { get, post } = require('../utils/http');
const slugify = require('../utils/slugify');
const genderMap = {
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
F: 'female',
M: 'male',
T: 'transsexual', // not yet observed
};
function getPosterFallbacks(poster) {
return poster
.filter(image => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat();
return poster
.filter(image => /landscape/i.test(image.name))
.sort((imageA, imageB) => imageB.height - imageA.height)
.map((image) => {
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
return image.height === 1080 ? sources : sources.reverse();
})
.flat();
}
function getTeaserFallbacks(teaser) {
return teaser
.filter(video => /landscape/i.test(video.name))
.map(video => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
return teaser
.filter(video => /landscape/i.test(video.name))
.map(video => ({
src: video.src,
type: video.type,
quality: Number(String(video.height).replace('353', '360')),
}));
}
function getAvatarFallbacks(avatar) {
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
return avatar
.sort((imageA, imageB) => imageB.height - imageA.height)
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
.flat();
}
async function getTrailer(scene, site, url) {
const qualities = [360, 480, 720, 1080, 2160];
const qualities = [360, 480, 720, 1080, 2160];
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, { referer: url });
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
file: scene.previewVideoUrl1080P,
sizes: qualities.join('+'),
type: 'trailer',
}, { referer: url });
if (!tokenRes.ok) {
return null;
}
if (!tokenRes.ok) {
return null;
}
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await post(trailerUrl, null, { referer: url });
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
const trailersRes = await post(trailerUrl, null, { referer: url });
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
src: trailersRes.body[quality].token,
quality,
} : null)).filter(Boolean);
}
if (trailersRes.ok) {
return qualities.map(quality => (trailersRes.body[quality] ? {
src: trailersRes.body[quality].token,
quality,
} : null)).filter(Boolean);
}
return null;
return null;
}
function scrapeAll(scenes, site, origin) {
return scenes.map((scene) => {
const release = {};
return scenes.map((scene) => {
const release = {};
release.title = scene.title;
release.title = scene.title;
release.entryId = String(scene.newId);
release.url = `${site?.url || origin}${scene.targetUrl}`;
release.entryId = String(scene.newId);
release.url = `${site?.url || origin}${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = scene.models;
release.stars = Number(scene.textRating) / 2;
release.actors = scene.models;
release.stars = Number(scene.textRating) / 2;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
return release;
});
return release;
});
}
function scrapeUpcoming(scene, site) {
if (!scene || scene.isPreReleasePeriod) return null;
if (!scene || scene.isPreReleasePeriod) return null;
const release = {};
const release = {};
release.title = scene.targetUrl
.slice(1)
.split('-')
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.title = scene.targetUrl
.slice(1)
.split('-')
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.join(' ');
release.url = `${site.url}${scene.targetUrl}`;
release.url = `${site.url}${scene.targetUrl}`;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = scene.models;
release.actors = scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
return [release];
return [release];
}
async function scrapeScene(data, url, site, baseRelease) {
const scene = data.video;
const scene = data.video;
const release = {
url,
title: scene.title,
description: scene.description,
actors: scene.models,
director: scene.directorNames,
duration: scene.runLength,
stars: scene.totalRateVal,
tags: scene.tags,
};
const release = {
url,
title: scene.title,
description: scene.description,
actors: scene.models,
director: scene.directorNames,
duration: scene.runLength,
stars: scene.totalRateVal,
tags: scene.tags,
};
release.entryId = scene.newId;
release.entryId = scene.newId;
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.date = moment.utc(scene.releaseDate).toDate();
release.shootDate = moment.utc(scene.shootDate).toDate();
release.actors = baseRelease?.actors || scene.models;
release.actors = baseRelease?.actors || scene.models;
release.poster = getPosterFallbacks(scene.images.poster);
release.photos = data.pictureset.map(photo => photo.main[0].src);
release.poster = getPosterFallbacks(scene.images.poster);
release.photos = data.pictureset.map(photo => photo.main[0].src);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
const trailer = await getTrailer(scene, site, url);
if (trailer) release.trailer = trailer;
const trailer = await getTrailer(scene, site, url);
if (trailer) release.trailer = trailer;
return release;
return release;
}
async function fetchActorReleases(pages, model, origin) {
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await get(url);
const releasesPerPage = await Promise.map(pages, async (page) => {
const url = `${origin}/api${model.targetUrl}?page=${page}`;
const res = await get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
if (res.code === 200) {
return scrapeAll(res.body.data.videos.videos, null, origin);
}
return [];
}, { concurrency: 3 });
return [];
}, { concurrency: 3 });
return releasesPerPage.flat();
return releasesPerPage.flat();
}
async function scrapeProfile(data, origin, withReleases) {
const model = data.model;
const profile = {};
const model = data.model;
const profile = {};
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.birthdate = new Date(model.dateOfBirth);
profile.gender = genderMap[model.sex];
profile.hair = model.hairColour;
profile.nationality = model.nationality;
profile.hair = model.hairColour;
profile.nationality = model.nationality;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.biography.trim().length > 0) profile.description = model.biography;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
profile.avatar = getAvatarFallbacks(model.images.listing);
profile.poster = getAvatarFallbacks(model.images.profile);
profile.banner = getAvatarFallbacks(model.images.poster);
const releases = scrapeAll(data.videos.videos, null, origin);
const releases = scrapeAll(data.videos.videos, null, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
if (withReleases) {
const pageCount = Math.ceil(data.videos.count / 6);
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
profile.releases = [...releases, ...otherReleases];
} else {
profile.releases = releases;
}
return profile;
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/api/videos?page=${page}`;
const res = await get(url);
const url = `${site.url}/api/videos?page=${page}`;
const res = await get(url);
if (res.code === 200) {
return scrapeAll(res.body.data.videos, site);
}
if (res.code === 200) {
return scrapeAll(res.body.data.videos, site);
}
return res.code;
return res.code;
}
async function fetchUpcoming(site) {
const apiUrl = `${site.url}/api`;
const res = await get(apiUrl);
const apiUrl = `${site.url}/api`;
const res = await get(apiUrl);
if (res.code === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
if (res.code === 200) {
return scrapeUpcoming(res.body.data.nextScene, site);
}
return res.code;
return res.code;
}
async function fetchScene(url, site, baseRelease) {
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api${pathname}`;
const { origin, pathname } = new URL(url);
const apiUrl = `${origin}/api${pathname}`;
const res = await get(apiUrl);
const res = await get(apiUrl);
if (res.code === 200) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
if (res.code === 200) {
return scrapeScene(res.body.data, url, site, baseRelease);
}
return res.code;
return res.code;
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const origin = `https://www.${scraperSlug}.com`;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await get(url);
const origin = `https://www.${scraperSlug}.com`;
const actorSlug = slugify(actorName);
const url = `${origin}/api/${actorSlug}`;
const res = await get(url);
if (res.code === 200) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
if (res.code === 200) {
return scrapeProfile(res.body.data, origin, include.scenes);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
fetchLatest,
fetchUpcoming,
fetchScene,
fetchProfile,
};

View File

@@ -5,199 +5,199 @@ const { ex, ctxa } = require('../utils/q');
// const slugify = require('../utils/slugify');
function getLicenseCode(html) {
const licensePrefix = 'license_code: \'';
const licenseStart = html.indexOf(licensePrefix);
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
const licensePrefix = 'license_code: \'';
const licenseStart = html.indexOf(licensePrefix);
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
const c = '16px';
let f;
let g;
let h;
let i;
let j;
let k;
let l;
let m;
let n;
const c = '16px';
let f;
let g;
let h;
let i;
let j;
let k;
let l;
let m;
let n;
for (f = '', g = 1; g < licenseCode.length; g += 1) {
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
}
for (f = '', g = 1; g < licenseCode.length; g += 1) {
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
}
for (j = parseInt(f.length / 2, 10),
k = parseInt(f.substring(0, j + 1), 10),
l = parseInt(f.substring(j), 10),
g = l - k,
g < 0 && (g = -g),
f = g,
g = k - l,
g < 0 && (g = -g),
f += g,
f *= 2,
f = String(f),
i = (parseInt(c, 10) / 2) + 2,
m = '',
g = 0; g < j + 1; g += 1) {
for (h = 1; h <= 4; h += 1) {
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
for (j = parseInt(f.length / 2, 10),
k = parseInt(f.substring(0, j + 1), 10),
l = parseInt(f.substring(j), 10),
g = l - k,
g < 0 && (g = -g),
f = g,
g = k - l,
g < 0 && (g = -g),
f += g,
f *= 2,
f = String(f),
i = (parseInt(c, 10) / 2) + 2,
m = '',
g = 0; g < j + 1; g += 1) {
for (h = 1; h <= 4; h += 1) {
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
if (n >= i) n -= i;
m += n;
}
}
if (n >= i) n -= i;
m += n;
}
}
return m;
return m;
}
function decodeTrailerUrl(html, encodedTrailerUrl) {
const licenseCode = getLicenseCode(html);
const i = licenseCode;
const licenseCode = getLicenseCode(html);
const i = licenseCode;
let j;
let k;
let l;
let m;
let n;
let o;
let j;
let k;
let l;
let m;
let n;
let o;
const d = '16px';
const g = encodedTrailerUrl.split('/').slice(2);
const d = '16px';
const g = encodedTrailerUrl.split('/').slice(2);
let h = g[5].substring(0, 2 * parseInt(d, 10));
let h = g[5].substring(0, 2 * parseInt(d, 10));
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
for (l = k, m = k; m < i.length; m += 1) {
l += parseInt(i[m], 10);
}
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
for (l = k, m = k; m < i.length; m += 1) {
l += parseInt(i[m], 10);
}
for (; l >= h.length;) {
l -= h.length;
}
for (; l >= h.length;) {
l -= h.length;
}
for (n = '', o = 0; o < h.length; o += 1) {
if (o === k) {
n += h[l];
} else {
n += (o === l ? h[k] : h[o]);
}
}
for (n = '', o = 0; o < h.length; o += 1) {
if (o === k) {
n += h[l];
} else {
n += (o === l ? h[k] : h[o]);
}
}
h = n;
}
h = n;
}
g[5] = g[5].replace(j, h);
const trailer = g.join('/');
g[5] = g[5].replace(j, h);
const trailer = g.join('/');
return trailer;
return trailer;
}
function scrapeLatest(html) {
const { document } = ex(html);
const { document } = ex(html);
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
const release = {};
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
const release = {};
// release.entryId = slugify(release.title);
release.entryId = q('.ico-fav-0').dataset.favVideoId;
// release.entryId = slugify(release.title);
release.entryId = q('.ico-fav-0').dataset.favVideoId;
const titleEl = q('.video-title-title');
release.title = titleEl.title;
release.url = titleEl.href;
const titleEl = q('.video-title-title');
release.title = titleEl.title;
release.url = titleEl.href;
release.date = qd('.video-data em', 'MMM DD, YYYY');
release.actors = qa('.video-model-list a', true);
release.date = qd('.video-data em', 'MMM DD, YYYY');
release.actors = qa('.video-model-list a', true);
const posterData = q('img.thumb').dataset;
release.poster = posterData.src;
release.trailer = posterData.preview;
const posterData = q('img.thumb').dataset;
release.poster = posterData.src;
release.trailer = posterData.preview;
return release;
});
return release;
});
}
function scrapeScene(html, url) {
const { qu } = ex(html);
const release = { url };
const { qu } = ex(html);
const release = { url };
// release.entryId = slugify(release.title);
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
// release.entryId = slugify(release.title);
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
release.actors = qu.all('.info-video-models a', true);
release.tags = qu.all('.info-video-category a', true);
release.actors = qu.all('.info-video-models a', true);
release.tags = qu.all('.info-video-category a', true);
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
release.poster = qu.meta('meta[property="og:image"');
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
release.poster = qu.meta('meta[property="og:image"');
if (!release.poster) {
const previewStart = html.indexOf('preview_url');
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
}
if (!release.poster) {
const previewStart = html.indexOf('preview_url');
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
}
const varsPrefix = 'flashvars = {';
const varsStart = html.indexOf(varsPrefix);
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
const varsPrefix = 'flashvars = {';
const varsStart = html.indexOf(varsPrefix);
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
const vars = varsString.split(',').reduce((acc, item) => {
const [prop, value] = item.split(': ');
acc[prop.trim()] = value.trim().replace(/'/g, '');
const vars = varsString.split(',').reduce((acc, item) => {
const [prop, value] = item.split(': ');
acc[prop.trim()] = value.trim().replace(/'/g, '');
return acc;
}, {});
return acc;
}, {});
release.trailer = [
{
src: decodeTrailerUrl(html, vars.video_url),
quality: parseInt(vars.video_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url),
quality: parseInt(vars.video_alt_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url2),
quality: parseInt(vars.video_alt_url2_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url3),
quality: parseInt(vars.video_alt_url3_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url4),
quality: parseInt(vars.video_alt_url4_text, 10),
},
];
release.trailer = [
{
src: decodeTrailerUrl(html, vars.video_url),
quality: parseInt(vars.video_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url),
quality: parseInt(vars.video_alt_url_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url2),
quality: parseInt(vars.video_alt_url2_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url3),
quality: parseInt(vars.video_alt_url3_text, 10),
},
{
src: decodeTrailerUrl(html, vars.video_alt_url4),
quality: parseInt(vars.video_alt_url4_text, 10),
},
];
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
const res = await bhttp.get(url);
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
return null;
}
async function fetchScene(url) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -5,86 +5,86 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const { origin } = new URL(site.url);
const { document } = new JSDOM(html).window;
const { origin } = new URL(site.url);
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
return Array.from(videos.querySelectorAll('.card'), (scene) => {
const release = { site };
return Array.from(videos.querySelectorAll('.card'), (scene) => {
const release = { site };
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
release.entryId = scene.dataset.videoId;
release.title = scene.querySelector('.card-title').textContent;
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
release.entryId = scene.dataset.videoId;
release.title = scene.querySelector('.card-title').textContent;
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
release.poster = `https:${scene.querySelector('.single-image').src}`;
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
const trailerEl = scene.querySelector('source');
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
return release;
});
return release;
});
}
function scrapeScene(html, site, url) {
const { document } = new JSDOM(html).window;
const release = { site };
const { document } = new JSDOM(html).window;
const release = { site };
const scene = document.querySelector('#t2019-2col');
const scene = document.querySelector('#t2019-2col');
release.url = url;
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
release.description = scene.querySelector('#t2019-description').textContent.trim();
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
release.url = url;
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
release.description = scene.querySelector('#t2019-description').textContent.trim();
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
if (durationEls.length > 1) {
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
} else {
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
}
if (durationEls.length > 1) {
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
} else {
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
}
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
const posterEl = scene.querySelector('#no-player-image');
const videoEl = scene.querySelector('video');
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
if (posterEl) release.poster = `https:${posterEl.src}`;
else if (videoEl) release.poster = `https:${videoEl.poster}`;
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };
const trailerEl = scene.querySelector('#t2019-video source');
if (trailerEl) release.trailer = { src: trailerEl.src };
return release;
return release;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}?page=${page}`;
const res = await bhttp.get(url);
const url = `${site.url}?page=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return [];
return [];
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url);
}
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), site, url);
}
return null;
return null;
}
module.exports = {
fetchLatest,
fetchScene,
fetchLatest,
fetchScene,
};

View File

@@ -3,8 +3,8 @@
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@@ -5,31 +5,31 @@ const bhttp = require('bhttp');
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const res = await bhttp.get(url);
const release = await scrapeScene(res.body.toString(), url, site);
const release = await scrapeScene(res.body.toString(), url, site);
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
release.channel = siteSlug;
release.director = 'Mason';
release.channel = siteSlug;
release.director = 'Mason';
return release;
return release;
}
function getActorReleasesUrl(actorPath, page = 1) {
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
}
async function networkFetchProfile(actorName, scraperSlug, site, include) {
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
}
module.exports = {
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene,
fetchLatest,
fetchProfile: networkFetchProfile,
fetchUpcoming,
fetchScene,
};