forked from DebaucheryLibrarian/traxxx
Switched to tabs. Adding missing actor entries when scraping actors, with batch ID.
This commit is contained in:
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,37 +3,37 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
function curateRelease(release, site) {
|
||||
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
|
||||
return {
|
||||
...release,
|
||||
title: release.title.split(/:|\|/)[1].trim(),
|
||||
};
|
||||
}
|
||||
if (['bubblegumdungeon', 'ladygonzo'].includes(site.slug)) {
|
||||
return {
|
||||
...release,
|
||||
title: release.title.split(/:|\|/)[1].trim(),
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
return curateRelease(scene, site);
|
||||
return curateRelease(scene, site);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const releases = await fetchApiLatest(site, page, false);
|
||||
const releases = await fetchApiLatest(site, page, false);
|
||||
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page = 1) {
|
||||
const releases = await fetchApiUpcoming(site, page, false);
|
||||
const releases = await fetchApiUpcoming(site, page, false);
|
||||
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
return releases.map(release => curateRelease(release, site));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,47 +3,47 @@
|
||||
const { fetchLatest, fetchScene } = require('./julesjordan');
|
||||
|
||||
function extractActors(scene) {
|
||||
const release = scene;
|
||||
const release = scene;
|
||||
|
||||
if (!scene.actors || scene.actors.length === 0) {
|
||||
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
|
||||
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
|
||||
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
|
||||
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
|
||||
if (!scene.actors || scene.actors.length === 0) {
|
||||
const introActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (\w+ \w+)/i);
|
||||
const introTwoActorMatches = scene.title.match(/(?:presents|introduces|features|welcomes) (?:(\w+)|(\w+ \w+)) and (\w+ \w+)/i);
|
||||
const returnActorMatches = scene.title.match(/(?:(^\w+)|(\w+ \w+))(?:,| (?:return|visit|pov|give|suck|lick|milk|love|enjoy|service|is))/i);
|
||||
const returnTwoActorMatches = scene.title.match(/(\w+ \w+) and (?:(\w+)|(\w+ \w+)) (?:return|visit|give|suck|lick|milk|love|enjoy|service|are)/i);
|
||||
|
||||
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
|
||||
const actors = rawActors?.filter((actor) => {
|
||||
if (!actor) return false;
|
||||
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
|
||||
const rawActors = (introTwoActorMatches || introActorMatches || returnTwoActorMatches || returnActorMatches)?.slice(1);
|
||||
const actors = rawActors?.filter((actor) => {
|
||||
if (!actor) return false;
|
||||
if (/swallow|\bcum|fuck|suck|give|giving|take|takes|taking|head|teen|babe|cute|beaut|naughty|teacher|nanny|adorable|brunette|blonde|bust|audition|from|\band\b|\bto\b/i.test(actor)) return false;
|
||||
|
||||
return true;
|
||||
});
|
||||
return true;
|
||||
});
|
||||
|
||||
if (actors) {
|
||||
release.actors = actors;
|
||||
}
|
||||
}
|
||||
if (actors) {
|
||||
release.actors = actors;
|
||||
}
|
||||
}
|
||||
|
||||
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
|
||||
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
|
||||
}
|
||||
if (release.actors?.length > 1 || /threesome|threeway/.test(scene.title)) {
|
||||
release.tags = scene.tags ? [...scene.tags, 'mff'] : ['mff'];
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1) {
|
||||
const latest = await fetchLatest(site, page);
|
||||
const latest = await fetchLatest(site, page);
|
||||
|
||||
return latest.map(scene => extractActors(scene));
|
||||
return latest.map(scene => extractActors(scene));
|
||||
}
|
||||
|
||||
async function fetchSceneWrap(url, site) {
|
||||
const scene = await fetchScene(url, site);
|
||||
const scene = await fetchScene(url, site);
|
||||
|
||||
return extractActors(scene);
|
||||
return extractActors(scene);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchScene: fetchSceneWrap,
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchScene: fetchSceneWrap,
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const { get, geta, ctxa } = require('../utils/q');
|
||||
|
||||
function extractActors(actorString) {
|
||||
return actorString
|
||||
return actorString
|
||||
?.replace(/.*:|\(.*\)|\d+(-|\s)year(-|\s)old|nurses?|tangled/ig, '') // remove Patient:, (date) and other nonsense
|
||||
.split(/\band\b|\bvs\b|\/|,|&/ig)
|
||||
.map(actor => actor.trim())
|
||||
@@ -12,120 +12,120 @@ function extractActors(actorString) {
|
||||
}
|
||||
|
||||
function matchActors(actorString, models) {
|
||||
return models
|
||||
.filter(model => new RegExp(model.name, 'i')
|
||||
.test(actorString));
|
||||
return models
|
||||
.filter(model => new RegExp(model.name, 'i')
|
||||
.test(actorString));
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site, models) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = qu.url('a.itemimg').slice(1);
|
||||
[release.entryId] = pathname.split('/').slice(-1);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
const pathname = qu.url('a.itemimg').slice(1);
|
||||
[release.entryId] = pathname.split('/').slice(-1);
|
||||
release.url = `${site.url}${pathname}`;
|
||||
|
||||
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||
release.title = qu.q('.itemimg img', 'alt') || qu.q('h4 a', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.movie_info2', 'MM/DD/YY', /\d{2}\/\d{2}\/\d{2}/);
|
||||
|
||||
const actorString = qu.q('.mas_description', true);
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
const actorString = qu.q('.mas_description', true);
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
|
||||
const posterPath = qu.img('.itemimg img');
|
||||
release.poster = `${site.url}/${posterPath}`;
|
||||
const posterPath = qu.img('.itemimg img');
|
||||
release.poster = `${site.url}/${posterPath}`;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url, site, models) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = qu.q('.mas_title', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = qu.q('.mas_title', true);
|
||||
release.description = qu.q('.mas_longdescription', true);
|
||||
release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, '');
|
||||
const actors = matchActors(actorString, models);
|
||||
if (actors.length > 0) release.actors = actors;
|
||||
else release.actors = extractActors(actorString);
|
||||
|
||||
release.tags = qu.all('.tags a', true);
|
||||
release.tags = qu.all('.tags a', true);
|
||||
|
||||
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||
release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`);
|
||||
|
||||
const posterIndex = 'splash:';
|
||||
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||
if (poster) release.poster = `${site.url}/${poster}`;
|
||||
const posterIndex = 'splash:';
|
||||
const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4);
|
||||
if (poster) release.poster = `${site.url}/${poster}`;
|
||||
|
||||
const trailerIndex = html.indexOf('video/mp4');
|
||||
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
|
||||
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||
const trailerIndex = html.indexOf('video/mp4');
|
||||
const trailer = html.slice(html.indexOf('/content', trailerIndex), html.indexOf('.mp4', trailerIndex) + 4);
|
||||
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function extractModels({ el }, site) {
|
||||
const models = ctxa(el, '.item');
|
||||
const models = ctxa(el, '.item');
|
||||
|
||||
return models.map(({ qu }) => {
|
||||
const actor = { gender: 'female' };
|
||||
return models.map(({ qu }) => {
|
||||
const actor = { gender: 'female' };
|
||||
|
||||
const avatar = qu.q('.itemimg img');
|
||||
actor.avatar = `${site.url}/${avatar.src}`;
|
||||
actor.name = avatar.alt
|
||||
.split(':').slice(-1)[0]
|
||||
.replace(/xtreme girl|nurse/ig, '')
|
||||
.trim();
|
||||
const avatar = qu.q('.itemimg img');
|
||||
actor.avatar = `${site.url}/${avatar.src}`;
|
||||
actor.name = avatar.alt
|
||||
.split(':').slice(-1)[0]
|
||||
.replace(/xtreme girl|nurse/ig, '')
|
||||
.trim();
|
||||
|
||||
const actorPath = qu.url('.itemimg');
|
||||
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||
const actorPath = qu.url('.itemimg');
|
||||
actor.url = `${site.url}${actorPath.slice(1)}`;
|
||||
|
||||
return actor;
|
||||
});
|
||||
return actor;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchModels(site, page = 1, accModels = []) {
|
||||
const url = `${site.url}/?models/${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/?models/${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const models = extractModels(res.item, site);
|
||||
const nextPage = res.item.qa('.pagenumbers', true)
|
||||
.map(pageX => Number(pageX))
|
||||
.filter(Boolean) // remove << and >>
|
||||
.includes(page + 1);
|
||||
if (res.ok) {
|
||||
const models = extractModels(res.item, site);
|
||||
const nextPage = res.item.qa('.pagenumbers', true)
|
||||
.map(pageX => Number(pageX))
|
||||
.filter(Boolean) // remove << and >>
|
||||
.includes(page + 1);
|
||||
|
||||
if (nextPage) {
|
||||
return fetchModels(site, page + 1, accModels.concat(models));
|
||||
}
|
||||
if (nextPage) {
|
||||
return fetchModels(site, page + 1, accModels.concat(models));
|
||||
}
|
||||
|
||||
return accModels.concat(models, { name: 'Dr. Gray' });
|
||||
}
|
||||
return accModels.concat(models, { name: 'Dr. Gray' });
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, models) {
|
||||
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
|
||||
const res = await geta(url, '.item');
|
||||
const url = `${site.url}/show.php?a=${site.parameters.a}_${page}`;
|
||||
const res = await geta(url, '.item');
|
||||
|
||||
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
|
||||
return res.ok ? scrapeLatest(res.items, site, models) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release, beforeFetchLatest) {
|
||||
const models = beforeFetchLatest || await fetchModels(site);
|
||||
const res = await get(url);
|
||||
const models = beforeFetchLatest || await fetchModels(site);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, models) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
beforeFetchLatest: fetchModels,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
beforeFetchLatest: fetchModels,
|
||||
};
|
||||
|
||||
@@ -5,141 +5,141 @@ const { get, getAll, initAll, extractDate } = require('../utils/qu');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function getFallbacks(source) {
|
||||
return [
|
||||
source.replace('-1x.jpg', '-4x.jpg'),
|
||||
source.replace('-1x.jpg', '-3x.jpg'),
|
||||
source.replace('-1x.jpg', '-2x.jpg'),
|
||||
source,
|
||||
];
|
||||
return [
|
||||
source.replace('-1x.jpg', '-4x.jpg'),
|
||||
source.replace('-1x.jpg', '-3x.jpg'),
|
||||
source.replace('-1x.jpg', '-2x.jpg'),
|
||||
source,
|
||||
];
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.url = qu.url('a');
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.url = qu.url('a');
|
||||
|
||||
release.title = qu.q('h5 a', true);
|
||||
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
|
||||
release.title = qu.q('h5 a', true);
|
||||
release.date = qu.date('.icon-calendar + strong', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
|
||||
release.actors = qu.q('h3', true).replace(/featuring:\s?/i, '').split(', ');
|
||||
|
||||
const photoCount = qu.q('.stdimage', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
|
||||
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
|
||||
const photoCount = qu.q('.stdimage', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: Number(photoCount) }, (value, index) => {
|
||||
const source = qu.img('.stdimage', `src${index}_1x`, site.url);
|
||||
|
||||
return getFallbacks(source);
|
||||
});
|
||||
return getFallbacks(source);
|
||||
});
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
release.entryId = qu.q('.stdimage', 'id', true).match(/set-target-(\d+)/)[1];
|
||||
|
||||
release.title = qu.q('h2', true);
|
||||
release.description = qu.q('p', true);
|
||||
release.title = qu.q('h2', true);
|
||||
release.description = qu.q('p', true);
|
||||
|
||||
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
|
||||
release.date = extractDate(html, 'MM/DD/YYYY', /\b\d{2}\/\d{2}\/\d{4}\b/);
|
||||
|
||||
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
|
||||
name: qu.q(actor, null, true),
|
||||
url: qu.url(actor, null),
|
||||
}));
|
||||
release.actors = qu.all('h5:not(.video_categories) a').map(actor => ({
|
||||
name: qu.q(actor, null, true),
|
||||
url: qu.url(actor, null),
|
||||
}));
|
||||
|
||||
release.tags = qu.all('.video_categories a', true);
|
||||
release.tags = qu.all('.video_categories a', true);
|
||||
|
||||
release.duration = qu.dur('.video_categories + p');
|
||||
release.duration = qu.dur('.video_categories + p');
|
||||
|
||||
const poster = qu.img('a img');
|
||||
const poster = qu.img('a img');
|
||||
|
||||
release.poster = getFallbacks(poster);
|
||||
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
|
||||
release.poster = getFallbacks(poster);
|
||||
release.photos = qu.imgs('.featured-video img', 'src0_1x').map(source => getFallbacks(source));
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, qu }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
|
||||
const nextNode = nodes[index + 1];
|
||||
const bio = Array.from(qu.q('.widget-content').childNodes).reduce((acc, node, index, nodes) => {
|
||||
const nextNode = nodes[index + 1];
|
||||
|
||||
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
|
||||
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
|
||||
}
|
||||
if (node.tagName === 'STRONG' && nextNode?.nodeType === 3) {
|
||||
acc[slugify(node.textContent, '_')] = nextNode.textContent.trim();
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
|
||||
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
|
||||
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.height && /\d{3}/.test(bio.height)) profile.height = Number(bio.height.match(/\d+/)[0]);
|
||||
if (bio.height && /\d[;']\d/.test(bio.height)) profile.height = feetInchesToCm(bio.height);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust && /\d+[a-zA-Z]+/.test(bust)) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
|
||||
if (bio.bust_size && !profile.bust) profile.bust = bio.bust_size.toUpperCase();
|
||||
|
||||
if (bio.birth_location) profile.birthPlace = bio.birth_location;
|
||||
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
|
||||
if (bio.birth_location) profile.birthPlace = bio.birth_location;
|
||||
if (bio.status_married_or_single) profile.relationship = bio.status_married_or_single;
|
||||
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color;
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color;
|
||||
|
||||
const avatar = qu.img('.tac img');
|
||||
profile.avatar = getFallbacks(avatar);
|
||||
const avatar = qu.img('.tac img');
|
||||
profile.avatar = getFallbacks(avatar);
|
||||
|
||||
profile.releases = scrapeAll(initAll(el, '.featured-video'));
|
||||
profile.releases = scrapeAll(initAll(el, '.featured-video'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page) {
|
||||
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
|
||||
const res = await getAll(url, '.featured-video');
|
||||
const url = `${site.url}/tour/categories/movies_${page}_d.html`;
|
||||
const res = await getAll(url, '.featured-video');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.page-content .row');
|
||||
const res = await get(url, '.page-content .row');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = `${site.url}/tour/models/${actorSlug}.html`;
|
||||
const res = await get(url, '.page-content .row');
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = `${site.url}/tour/models/${actorSlug}.html`;
|
||||
const res = await get(url, '.page-content .row');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'babes');
|
||||
return fetchProfile(actorName, 'babes');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -6,144 +6,144 @@ const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
release.title = qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
|
||||
release.actors = qu.all('a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
release.actors = qu.all('a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
|
||||
.map(source => [
|
||||
source.getAttribute('src0_3x'),
|
||||
source.getAttribute('src0_2x'),
|
||||
source.getAttribute('src0_1x'),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
|
||||
[release.poster, ...release.photos] = qu.all('.item-thumbs img')
|
||||
.map(source => [
|
||||
source.getAttribute('src0_3x'),
|
||||
source.getAttribute('src0_2x'),
|
||||
source.getAttribute('src0_1x'),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.map(fallback => (/^http/.test(fallback) ? fallback : `${site.url}${fallback}`)));
|
||||
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, url, site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.item-episode h4 a', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
release.title = qu.q('.item-episode h4 a', true);
|
||||
release.date = qu.date('.item-meta li', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.item-meta li:nth-child(2)');
|
||||
release.description = qu.q('.description', true);
|
||||
|
||||
release.actors = qu.all('.item-episode a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
release.actors = qu.all('.item-episode a[href*="/models"]', true);
|
||||
if (/bts/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
|
||||
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
|
||||
const posterPath = html.match(/poster="(.*.jpg)"/)?.[1];
|
||||
const trailerPath = html.match(/video src="(.*.mp4)"/)?.[1];
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
if (trailerPath) {
|
||||
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
|
||||
release.trailer = { src: trailer };
|
||||
}
|
||||
if (trailerPath) {
|
||||
const trailer = /^http/.test(trailerPath) ? trailerPath : `${site.url}${trailerPath}`;
|
||||
release.trailer = { src: trailer };
|
||||
}
|
||||
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
release.entryId = `${formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(actorId, site, page = 1, accScenes = []) {
|
||||
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/sets.php?id=${actorId}&page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) return [];
|
||||
if (!res.ok) return [];
|
||||
|
||||
const quReleases = initAll(res.item.el, '.item-episode');
|
||||
const releases = scrapeAll(quReleases, site);
|
||||
const quReleases = initAll(res.item.el, '.item-episode');
|
||||
const releases = scrapeAll(quReleases, site);
|
||||
|
||||
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
|
||||
const nextPage = res.item.qu.q(`a[href*="page=${page + 1}"]`);
|
||||
|
||||
if (nextPage) {
|
||||
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
|
||||
}
|
||||
if (nextPage) {
|
||||
return fetchActorReleases(actorId, site, page + 1, accScenes.concat(releases));
|
||||
}
|
||||
|
||||
return accScenes.concat(releases);
|
||||
return accScenes.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ qu }, site, withScenes) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
||||
const [key, value] = row.split(':');
|
||||
return { ...acc, [slugify(key, '_')]: value.trim() };
|
||||
}, {});
|
||||
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
||||
const [key, value] = row.split(':');
|
||||
return { ...acc, [slugify(key, '_')]: value.trim() };
|
||||
}, {});
|
||||
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
profile.avatar = [
|
||||
qu.q('.profile-pic img', 'src0_3x'),
|
||||
qu.q('.profile-pic img', 'src0_2x'),
|
||||
qu.q('.profile-pic img', 'src0_1x'),
|
||||
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
|
||||
profile.avatar = [
|
||||
qu.q('.profile-pic img', 'src0_3x'),
|
||||
qu.q('.profile-pic img', 'src0_2x'),
|
||||
qu.q('.profile-pic img', 'src0_1x'),
|
||||
].filter(Boolean).map(source => (/^http/.test(source) ? source : `${site.url}${source}`));
|
||||
|
||||
if (withScenes) {
|
||||
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
if (withScenes) {
|
||||
const actorId = qu.q('.profile-pic img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
|
||||
if (actorId) {
|
||||
profile.releases = await fetchActorReleases(actorId, site);
|
||||
}
|
||||
}
|
||||
if (actorId) {
|
||||
profile.releases = await fetchActorReleases(actorId, site);
|
||||
}
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies/${page}/latest/`;
|
||||
const res = await geta(url, '.item-episode');
|
||||
const url = `${site.url}/categories/movies/${page}/latest/`;
|
||||
const res = await geta(url, '.item-episode');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
||||
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
|
||||
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
||||
const res = resA.ok ? resA : await get(`${site.url}/models/${actorSlugB}.html`);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, site, include.scenes) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -8,99 +8,99 @@ const clusterId = '617fb597b659459bafe6472470d9073a';
|
||||
const authKey = 'YmFuZy1yZWFkOktqVDN0RzJacmQ1TFNRazI=';
|
||||
|
||||
const genderMap = {
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
F: 'female',
|
||||
};
|
||||
|
||||
function getScreenUrl(item, scene) {
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
return `https://i.bang.com/screenshots/${scene.dvd.id}/movie/${scene.order}/${item.screenId}.jpg`;
|
||||
}
|
||||
|
||||
function encodeId(id) {
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
return Buffer
|
||||
.from(id, 'hex')
|
||||
.toString('base64')
|
||||
.replace(/\+/g, '-')
|
||||
.replace(/\//g, '_')
|
||||
.replace(/=/g, ',');
|
||||
}
|
||||
|
||||
function decodeId(id) {
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
const restoredId = id
|
||||
.replace(/-/g, '+')
|
||||
.replace(/_/g, '/')
|
||||
.replace(/,/g, '=');
|
||||
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
return Buffer
|
||||
.from(restoredId, 'base64')
|
||||
.toString('hex');
|
||||
}
|
||||
|
||||
function scrapeScene(scene, site) {
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
const release = {
|
||||
site,
|
||||
entryId: scene.id,
|
||||
title: scene.name,
|
||||
description: scene.description,
|
||||
tags: scene.genres.concat(scene.actions).map(genre => genre.name),
|
||||
duration: scene.duration,
|
||||
};
|
||||
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
const slug = slugify(release.title);
|
||||
release.url = `https://www.bang.com/video/${encodeId(release.entryId)}/${slug}`;
|
||||
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
const date = new Date(scene.releaseDate);
|
||||
release.date = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
||||
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
release.actors = scene.actors.map(actor => ({ name: actor.name, gender: genderMap[actor.gender] }));
|
||||
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
if (scene.is4k) release.tags.push('4k');
|
||||
if (scene.gay) release.tags.push('gay');
|
||||
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
const defaultPoster = scene.screenshots.find(photo => photo.default === true);
|
||||
const photoset = scene.screenshots.filter(photo => photo.default === false);
|
||||
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
const photos = defaultPoster ? photoset : photoset.slice(1);
|
||||
const poster = defaultPoster || photoset[0];
|
||||
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
release.poster = getScreenUrl(poster, scene);
|
||||
release.photos = photos.map(photo => getScreenUrl(photo, scene));
|
||||
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
release.trailer = {
|
||||
src: `https://i.bang.com/v/${scene.dvd.id}/${scene.identifier}/preview.mp4`,
|
||||
};
|
||||
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
release.channel = scene.series.name
|
||||
.replace(/[! .]/g, '')
|
||||
.replace('&', 'and');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site) {
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
return scenes.map(({ _source: scene }) => scrapeScene(scene, site));
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
status: 'ok',
|
||||
},
|
||||
},
|
||||
{
|
||||
range: {
|
||||
releaseDate: {
|
||||
lte: 'now',
|
||||
},
|
||||
},
|
||||
},
|
||||
/*
|
||||
* global fetch
|
||||
{
|
||||
nested: {
|
||||
@@ -122,66 +122,66 @@ async function fetchLatest(site, page = 1) {
|
||||
},
|
||||
},
|
||||
*/
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
{
|
||||
nested: {
|
||||
path: 'series',
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{
|
||||
match: {
|
||||
'series.id': {
|
||||
operator: 'AND',
|
||||
query: site.parameters.siteId,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
must_not: [
|
||||
{
|
||||
match: {
|
||||
type: 'trailer',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
sort: [
|
||||
{
|
||||
releaseDate: {
|
||||
order: 'desc',
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
return scrapeLatest(res.body.hits.hits, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
return scrapeScene(res.body._source, site); // eslint-disable-line no-underscore-dangle
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -10,44 +10,44 @@ const slugify = require('../utils/slugify');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
|
||||
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
|
||||
const photoElement = $(element).find('.rollover-image');
|
||||
const poster = `https:${photoElement.attr('data-original')}`;
|
||||
const photoElement = $(element).find('.rollover-image');
|
||||
const poster = `https:${photoElement.attr('data-original')}`;
|
||||
|
||||
const photosUrl = photoElement.attr('data-rollover-url');
|
||||
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
const photosUrl = photoElement.attr('data-rollover-url');
|
||||
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/* no dates available, breaks database
|
||||
@@ -80,63 +80,63 @@ function scrapeUpcoming(html, site) {
|
||||
*/
|
||||
|
||||
function scrapeScene(html, url, _site) {
|
||||
const { qu } = ex(html, '.playerSection');
|
||||
const release = {};
|
||||
const { qu } = ex(html, '.playerSection');
|
||||
const release = {};
|
||||
|
||||
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||
release.description = qu.q('.vdoDesc', true);
|
||||
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||
release.description = qu.q('.vdoDesc', true);
|
||||
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('.vdoTags a', true);
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('.vdoTags a', true);
|
||||
|
||||
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
|
||||
const poster = qu.img('img#player-overlay-image');
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
const poster = qu.img('img#player-overlay-image');
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
|
||||
release.trailer = { src: qu.trailer() };
|
||||
release.trailer = { src: qu.trailer() };
|
||||
|
||||
// all scenes seem to have 12 album photos available, not always included on the page
|
||||
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
// all scenes seem to have 12 album photos available, not always included on the page
|
||||
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
else release.channel = channel;
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
else release.channel = channel;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = scrape(html);
|
||||
profile.releases = scrape(html);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileSearch(html, actorName) {
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/${page}`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -148,43 +148,43 @@ async function fetchUpcoming(site) {
|
||||
*/
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
if (!release?.date) {
|
||||
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||
}
|
||||
if (!release?.date) {
|
||||
logger.warn(`Scraping Bang Bros scene from URL without release date: ${url}`);
|
||||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
const { origin } = new URL(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
// fetchUpcoming, no dates available
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
// fetchUpcoming, no dates available
|
||||
};
|
||||
|
||||
@@ -5,33 +5,33 @@
|
||||
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
|
||||
|
||||
async function fetchSceneWrapper(url, site, baseRelease) {
|
||||
const release = await fetchScene(url, site, baseRelease);
|
||||
const release = await fetchScene(url, site, baseRelease);
|
||||
|
||||
if (site.isFallback && release.channel) {
|
||||
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
|
||||
if (site.isNetwork && release.channel) {
|
||||
const channelUrl = url.replace('blowpass.com', `${release.channel}.com`);
|
||||
|
||||
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
|
||||
return release;
|
||||
}
|
||||
if (['onlyteenblowjobs', 'mommyblowsbest'].includes(release.channel)) {
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'scene/');
|
||||
return release;
|
||||
}
|
||||
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
|
||||
}
|
||||
release.url = channelUrl.replace(/video\/\w+\//, 'video/');
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
|
||||
return `https://www.blowpass.com/en/videos/blowpass/latest/All-Categories/0${actorPath}/${page}`;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
|
||||
@@ -5,90 +5,90 @@ const bhttp = require('bhttp');
|
||||
const { ex } = require('../utils/q');
|
||||
|
||||
function scrapeProfile(html) {
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const profile = {};
|
||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map(detail => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
const bio = qu.all('.infobox tr[valign="top"]')
|
||||
.map(detail => qu.all(detail, 'td', true))
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.slice(0, -1).replace(/[\s+|/]/g, '_')]: value }), {});
|
||||
|
||||
|
||||
/* unreliable, see: Syren De Mer
|
||||
/* unreliable, see: Syren De Mer
|
||||
const catlinks = qa('#mw-normal-catlinks a', true);
|
||||
const isTrans = catlinks.some(link => link.match(/shemale|transgender/i));
|
||||
profile.gender = isTrans ? 'transsexual' : 'female';
|
||||
*/
|
||||
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
profile.birthdate = qu.date('.bday', 'YYYY-MM-DD');
|
||||
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
profile.description = qu.q('#mw-content-text > p', true);
|
||||
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Born) profile.birthPlace = bio.Born.slice(bio.Born.lastIndexOf(')') + 1);
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
if (bio.Measurements) {
|
||||
const measurements = bio.Measurements
|
||||
.match(/\d+(\w+)?-\d+-\d+/g)
|
||||
if (bio.Measurements) {
|
||||
const measurements = bio.Measurements
|
||||
.match(/\d+(\w+)?-\d+-\d+/g)
|
||||
?.slice(-1)[0] // allow for both '34C-25-36' and '86-64-94 cm / 34-25-37 in'
|
||||
.split('-');
|
||||
|
||||
// account for measuemrents being just e.g. '32EE'
|
||||
if (measurements) {
|
||||
const [bust, waist, hip] = measurements;
|
||||
// account for measuemrents being just e.g. '32EE'
|
||||
if (measurements) {
|
||||
const [bust, waist, hip] = measurements;
|
||||
|
||||
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
|
||||
if (/[a-zA-Z]/.test(bust)) profile.bust = bust; // only use bust if cup size is included
|
||||
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
profile.waist = Number(waist);
|
||||
profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
|
||||
}
|
||||
if (/^\d+\w+$/.test(bio.Measurements)) profile.bust = bio.Measurements;
|
||||
}
|
||||
|
||||
if (bio.Bra_cup_size) {
|
||||
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
|
||||
if (bust) [profile.bust] = bust;
|
||||
}
|
||||
if (bio.Bra_cup_size) {
|
||||
const bust = bio.Bra_cup_size.match(/^\d+\w+/);
|
||||
if (bust) [profile.bust] = bust;
|
||||
}
|
||||
|
||||
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
|
||||
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
|
||||
if (bio.Boobs === 'Enhanced') profile.naturalBoobs = false;
|
||||
if (bio.Boobs === 'Natural') profile.naturalBoobs = true;
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\d+\.\d+/g).slice(-1)[0]) * 100;
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\d+/g)[1]);
|
||||
|
||||
if (bio.Eye_color) profile.eyes = bio.Eye_color;
|
||||
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
|
||||
if (bio.Eye_color) profile.eyes = bio.Eye_color;
|
||||
if (bio.Hair) [profile.hair] = bio.Hair.split(',');
|
||||
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
if (bio.Blood_group) profile.blood = bio.Blood_group;
|
||||
if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');
|
||||
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
const avatarThumbPath = qu.img('.image img');
|
||||
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
|
||||
const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
|
||||
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
};
|
||||
}
|
||||
profile.avatar = {
|
||||
src: `http://www.boobpedia.com${avatarPath}`,
|
||||
copyright: null,
|
||||
};
|
||||
}
|
||||
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
profile.social = qu.urls('.infobox a.external');
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.replace(/\s+/, '_');
|
||||
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
const actorSlug = actorName.replace(/\s+/, '_');
|
||||
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -11,216 +11,216 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
const hairMap = {
|
||||
Blonde: 'blonde',
|
||||
Brunette: 'brown',
|
||||
'Black Hair': 'black',
|
||||
Redhead: 'red',
|
||||
Blonde: 'blonde',
|
||||
Brunette: 'brown',
|
||||
'Black Hair': 'black',
|
||||
Redhead: 'red',
|
||||
};
|
||||
|
||||
function scrapeAll(html, site, upcoming) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.release-card.scene').toArray();
|
||||
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const isUpcoming = $(element).find('.icon-upcoming.active').length === 1;
|
||||
|
||||
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
|
||||
return acc;
|
||||
}
|
||||
if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
const sceneLinkElement = $(element).find('a');
|
||||
const sceneLinkElement = $(element).find('a');
|
||||
|
||||
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const entryId = url.split('/').slice(-3, -2)[0];
|
||||
|
||||
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray();
|
||||
|
||||
const likes = Number($(element).find('.label-rating .like-amount').text());
|
||||
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
|
||||
const likes = Number($(element).find('.label-rating .like-amount').text());
|
||||
const dislikes = Number($(element).find('.label-rating .dislike-amount').text());
|
||||
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
|
||||
const channel = slugify($(element).find('.collection').attr('title'), '');
|
||||
const channel = slugify($(element).find('.collection').attr('title'), '');
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
return acc.concat({
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
channel,
|
||||
site,
|
||||
});
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = {};
|
||||
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
const videoJson = $('script:contains("window.videoUiOptions")').html();
|
||||
const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
[release.entryId] = url.split('/').slice(-3, -2);
|
||||
release.title = $('.scene-title[itemprop="name"]').text();
|
||||
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
release.description = $('#scene-description p[itemprop="description"]')
|
||||
.contents()
|
||||
.first()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
|
||||
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||
|
||||
return {
|
||||
name: $(actorElement).attr('title'),
|
||||
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||
};
|
||||
}).toArray();
|
||||
return {
|
||||
name: $(actorElement).attr('title'),
|
||||
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||
};
|
||||
}).toArray();
|
||||
|
||||
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img');
|
||||
if (posterPath) release.poster = `https:${posterPath}`;
|
||||
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
if (videoData) {
|
||||
release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({
|
||||
src: `https:${path}`,
|
||||
quality: Number(quality.match(/\d{3,}/)[0]),
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
|
||||
return actorLink ? actorLink.href : null;
|
||||
return actorLink ? actorLink.href : null;
|
||||
}
|
||||
|
||||
async function fetchActorReleases({ qu, html }, accReleases = []) {
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu.url('.pagination .next a');
|
||||
const releases = scrapeAll(html);
|
||||
const next = qu.url('.pagination .next a');
|
||||
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const res = await get(url);
|
||||
if (next) {
|
||||
const url = `https://www.brazzers.com${next}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName) {
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
const qProfile = ex(html);
|
||||
const { q, qa } = qProfile;
|
||||
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim());
|
||||
const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim());
|
||||
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
profile.description = q('.model-profile-specs p', true);
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
|
||||
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate();
|
||||
if (bio['Birth Location']) profile.birthPlace = bio['Birth Location'];
|
||||
if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase();
|
||||
|
||||
if (bio.Height) profile.height = heightToCm(bio.Height);
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Height) profile.height = heightToCm(bio.Height);
|
||||
if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]);
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true;
|
||||
if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false;
|
||||
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true;
|
||||
if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true;
|
||||
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
const avatarEl = q('.big-pic-model-container img');
|
||||
if (avatarEl) profile.avatar = `https:${avatarEl.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
profile.releases = await fetchActorReleases(qProfile);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
const res = await bhttp.get(`${site.url}/page/${page}/`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
return scrapeAll(res.body.toString(), site, false);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
const res = await bhttp.get(`${site.url}/`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
return scrapeAll(res.body.toString(), site, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const searchUrl = 'https://brazzers.com/pornstars-search/';
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
headers: {
|
||||
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
|
||||
},
|
||||
});
|
||||
const searchUrl = 'https://brazzers.com/pornstars-search/';
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
headers: {
|
||||
Cookie: `textSearch=${encodeURIComponent(actorName)};`,
|
||||
},
|
||||
});
|
||||
|
||||
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
|
||||
const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
|
||||
|
||||
if (actorLink) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await bhttp.get(url);
|
||||
if (actorLink) {
|
||||
const url = `https://brazzers.com${actorLink}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
}
|
||||
return scrapeProfile(res.body.toString(), url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,139 +4,139 @@ const { get, geta, ctxa, ed } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const url = qu.url('.text-thumb a');
|
||||
const { pathname } = new URL(url);
|
||||
const channelUrl = qu.url('.badge');
|
||||
return scenes.map(({ qu }) => {
|
||||
const url = qu.url('.text-thumb a');
|
||||
const { pathname } = new URL(url);
|
||||
const channelUrl = qu.url('.badge');
|
||||
|
||||
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
|
||||
return null;
|
||||
}
|
||||
if (site?.parameters?.extract && qu.q('.badge', true) !== site.name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
release.title = qu.q('.text-thumb a', true);
|
||||
release.url = channelUrl ? `${channelUrl}${pathname}` : url;
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
release.title = qu.q('.text-thumb a', true);
|
||||
|
||||
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
||||
release.date = qu.date('.date', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.date', /(\d{2}:)?\d{2}:\d{2}/);
|
||||
|
||||
release.actors = qu.all('.category a', true);
|
||||
release.actors = qu.all('.category a', true);
|
||||
|
||||
release.poster = qu.img('img.video_placeholder, .video-images img');
|
||||
release.teaser = { src: qu.trailer() };
|
||||
release.poster = qu.img('img.video_placeholder, .video-images img');
|
||||
release.teaser = { src: qu.trailer() };
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qd, qa }, url, _site, baseRelease) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/\/\d+/)[0].slice(1);
|
||||
|
||||
release.title = q('.trailer-block_title', true);
|
||||
release.description = q('.info-block:nth-child(3) .text', true);
|
||||
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.title = q('.trailer-block_title', true);
|
||||
release.description = q('.info-block:nth-child(3) .text', true);
|
||||
release.date = qd('.info-block_data .text', 'MMMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
const duration = baseRelease?.duration || Number(q('.info-block_data .text', true).match(/(\d+)\s+min/)?.[1]) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.actors = qa('.info-block_data a[href*="/models"]', true);
|
||||
release.tags = qa('.info-block a[href*="/categories"]', true);
|
||||
release.actors = qa('.info-block_data a[href*="/models"]', true);
|
||||
release.tags = qa('.info-block a[href*="/categories"]', true);
|
||||
|
||||
const posterEl = q('.update_thumb');
|
||||
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
|
||||
const posterEl = q('.update_thumb');
|
||||
const poster = posterEl.getAttribute('src0_3x') || posterEl.getAttribute('src0_2x') || posterEl.dataset.src;
|
||||
|
||||
if (poster && baseRelease?.poster) release.photos = [poster];
|
||||
else if (poster) release.poster = poster;
|
||||
if (poster && baseRelease?.poster) release.photos = [poster];
|
||||
else if (poster) release.poster = poster;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ q, qa, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
||||
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
||||
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
|
||||
if (bio.race) profile.ethnicity = bio.race;
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
|
||||
if (bio.race) profile.ethnicity = bio.race;
|
||||
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (!/\?/.test(bust)) profile.bust = bust;
|
||||
if (!/\?/.test(waist)) profile.waist = waist;
|
||||
if (!/\?/.test(hip)) profile.hip = hip;
|
||||
}
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (!/\?/.test(bust)) profile.bust = bust;
|
||||
if (!/\?/.test(waist)) profile.waist = waist;
|
||||
if (!/\?/.test(hip)) profile.hip = hip;
|
||||
}
|
||||
|
||||
if (bio.hair) profile.hair = bio.hair;
|
||||
if (bio.eyes) profile.eyes = bio.eyes;
|
||||
if (bio.hair) profile.hair = bio.hair;
|
||||
if (bio.eyes) profile.eyes = bio.eyes;
|
||||
|
||||
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
else if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (/various/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
else if (/none/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
else if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
else if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
if (/various/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
else if (/none/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
else if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
|
||||
const avatar = q('.model-img img');
|
||||
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
|
||||
const avatar = q('.model-img img');
|
||||
profile.avatar = avatar.getAttribute('src0_3x') || avatar.getAttribute('src0_2x') || avatar.dataset.src;
|
||||
|
||||
const releases = qa('.video-thumb');
|
||||
profile.releases = scrapeAll(ctxa(releases));
|
||||
const releases = qa('.video-thumb');
|
||||
profile.releases = scrapeAll(ctxa(releases));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.extract
|
||||
? `https://cherrypimps.com/categories/movies_${page}.html`
|
||||
: `${site.url}/categories/movies_${page}.html`;
|
||||
const res = await geta(url, 'div.video-thumb');
|
||||
const url = site.parameters?.extract
|
||||
? `https://cherrypimps.com/categories/movies_${page}.html`
|
||||
: `${site.url}/categories/movies_${page}.html`;
|
||||
const res = await geta(url, 'div.video-thumb');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, release) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug2 = slugify(actorName, '');
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug2 = slugify(actorName, '');
|
||||
|
||||
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
|
||||
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
|
||||
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
|
||||
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
|
||||
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
|
||||
: [`https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug}.html`, `https://${scraperSlug.replace('xxx', '')}.xxx/models/${actorSlug2}.html`];
|
||||
|
||||
const res = await get(url);
|
||||
if (res.ok) return scrapeProfile(res.item);
|
||||
const res = await get(url);
|
||||
if (res.ok) return scrapeProfile(res.item);
|
||||
|
||||
const res2 = await get(url2);
|
||||
return res2.ok ? scrapeProfile(res2.item) : res2.status;
|
||||
const res2 = await get(url2);
|
||||
return res2.ok ? scrapeProfile(res2.item) : res2.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -7,182 +7,182 @@ const slugify = require('../utils/slugify');
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
function scrapeAll(html, site, origin) {
|
||||
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
return exa(html, '.card.m-1:not(.pornstar-card)').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = q('a', 'title');
|
||||
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
|
||||
[release.entryId] = release.url.split('/').slice(-1);
|
||||
release.title = q('a', 'title');
|
||||
release.url = `${site?.url || origin || 'https://ddfnetwork.com'}${q('a', 'href')}`;
|
||||
[release.entryId] = release.url.split('/').slice(-1);
|
||||
|
||||
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
|
||||
release.actors = qa('.card-subtitle a', true).filter(Boolean);
|
||||
release.date = qd('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime');
|
||||
release.actors = qa('.card-subtitle a', true).filter(Boolean);
|
||||
|
||||
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
const duration = parseInt(q('.card-info div:nth-child(2) .card-text', true), 10) * 60;
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.poster = q('img').dataset.src;
|
||||
release.poster = q('img').dataset.src;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, _site) {
|
||||
const { qu } = ex(html);
|
||||
const release = {};
|
||||
const { qu } = ex(html);
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
|
||||
release.title = qu.meta('itemprop=name');
|
||||
release.description = qu.q('.descr-box p', true);
|
||||
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
release.title = qu.meta('itemprop=name');
|
||||
release.description = qu.q('.descr-box p', true);
|
||||
release.date = qu.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content')
|
||||
|| qu.date('.title-border:nth-child(2) p', 'MM.DD.YYYY');
|
||||
|
||||
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||
release.tags = qu.all('.tags-tab .tags a', true);
|
||||
release.actors = qu.all('.pornstar-card > a', 'title');
|
||||
release.tags = qu.all('.tags-tab .tags a', true);
|
||||
|
||||
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||
release.duration = parseInt(qu.q('.icon-video-red + span', true), 10) * 60;
|
||||
release.likes = Number(qu.q('.icon-like-red + span', true));
|
||||
|
||||
release.poster = qu.poster();
|
||||
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||
release.poster = qu.poster();
|
||||
release.photos = qu.urls('.photo-slider-guest .card a');
|
||||
|
||||
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||
src: trailer.src,
|
||||
quality: Number(trailer.attributes.res.value),
|
||||
}));
|
||||
release.trailer = qu.all('source[type="video/mp4"]').map(trailer => ({
|
||||
src: trailer.src,
|
||||
quality: Number(trailer.attributes.res.value),
|
||||
}));
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(urls) {
|
||||
// DDF Network and DDF Network Stream list all scenes, exclude
|
||||
const sources = urls.filter(url => !/ddfnetwork/.test(url));
|
||||
// DDF Network and DDF Network Stream list all scenes, exclude
|
||||
const sources = urls.filter(url => !/ddfnetwork/.test(url));
|
||||
|
||||
const releases = await Promise.all(sources.map(async (url) => {
|
||||
const { html } = await get(url);
|
||||
const releases = await Promise.all(sources.map(async (url) => {
|
||||
const { html } = await get(url);
|
||||
|
||||
return scrapeAll(html, null, new URL(url).origin);
|
||||
}));
|
||||
return scrapeAll(html, null, new URL(url).origin);
|
||||
}));
|
||||
|
||||
// DDF cross-releases scenes between sites, filter duplicates by entryId
|
||||
return Object.values(releases
|
||||
.flat()
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
|
||||
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
|
||||
// DDF cross-releases scenes between sites, filter duplicates by entryId
|
||||
return Object.values(releases
|
||||
.flat()
|
||||
.sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains
|
||||
.reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {}));
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { qu } = ex(html);
|
||||
const { qu } = ex(html);
|
||||
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
}
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
}
|
||||
|
||||
return el.textContent.trim();
|
||||
});
|
||||
return el.textContent.trim();
|
||||
});
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => {
|
||||
if (values[index] === '-') return acc;
|
||||
const bio = keys.reduce((acc, key, index) => {
|
||||
if (values[index] === '-') return acc;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: values[index],
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[key]: values[index],
|
||||
};
|
||||
}, {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
profile.description = qu.q('.description-box', true);
|
||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||
profile.description = qu.q('.description-box', true);
|
||||
profile.birthdate = ed(bio.birthday, 'MMMM DD, YYYY');
|
||||
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
|
||||
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
|
||||
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
|
||||
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
|
||||
if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/);
|
||||
if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]);
|
||||
if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]);
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]);
|
||||
|
||||
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
|
||||
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
|
||||
if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false;
|
||||
if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true;
|
||||
|
||||
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
|
||||
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
|
||||
if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true;
|
||||
if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true;
|
||||
|
||||
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
|
||||
if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase();
|
||||
if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase();
|
||||
|
||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||
if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]);
|
||||
|
||||
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||
const avatarEl = qu.q('.pornstar-details .card-img-top');
|
||||
if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`;
|
||||
|
||||
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||
profile.releases = await fetchActorReleases(qu.urls('.find-me-tab li a'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.native
|
||||
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
|
||||
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
|
||||
const url = site.parameters?.native
|
||||
? `${site.url}/videos/search/latest/ever/allsite/-/${page}`
|
||||
: `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`;
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
// DDF's main site moved to Porn World
|
||||
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||
const res = await bhttp.get(url);
|
||||
// DDF's main site moved to Porn World
|
||||
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
|
||||
{
|
||||
type: 'hints',
|
||||
word: actorName,
|
||||
},
|
||||
{
|
||||
decodeJSON: true,
|
||||
headers: {
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
|
||||
{
|
||||
type: 'hints',
|
||||
word: actorName,
|
||||
},
|
||||
{
|
||||
decodeJSON: true,
|
||||
headers: {
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
|
||||
return null;
|
||||
}
|
||||
if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
|
||||
return null;
|
||||
}
|
||||
if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const [actor] = resSearch.body.list.pornstarsName;
|
||||
const url = `https://ddfnetwork.com${actor.href}`;
|
||||
const [actor] = resSearch.body.list.pornstarsName;
|
||||
const url = `https://ddfnetwork.com${actor.href}`;
|
||||
|
||||
const resActor = await bhttp.get(url);
|
||||
const resActor = await bhttp.get(url);
|
||||
|
||||
if (resActor.statusCode !== 200) {
|
||||
return null;
|
||||
}
|
||||
if (resActor.statusCode !== 200) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return scrapeProfile(resActor.body.toString(), url, actorName);
|
||||
return scrapeProfile(resActor.body.toString(), url, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
|
||||
return fetchProfile(actorName, 'digitalplayground', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,136 +7,136 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function getPhotos(albumUrl) {
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const { document } = new JSDOM(html).window;
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
const lastPhotoPage = Array.from(document.querySelectorAll('.preview-image-container a')).slice(-1)[0].href;
|
||||
const lastPhotoIndex = parseInt(lastPhotoPage.match(/\d+.jpg/)[0], 10);
|
||||
|
||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
|
||||
const photoUrls = Array.from({ length: lastPhotoIndex }, (value, index) => {
|
||||
const pageUrl = `https://blacksonblondes.com${lastPhotoPage.replace(/\d+.jpg/, `${(index + 1).toString().padStart(3, '0')}.jpg`)}`;
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
|
||||
};
|
||||
});
|
||||
return {
|
||||
url: pageUrl,
|
||||
extract: ({ qu }) => qu.q('.scenes-module img', 'src'),
|
||||
};
|
||||
});
|
||||
|
||||
return photoUrls;
|
||||
return photoUrls;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = Array.from(document.querySelectorAll('.recent-updates'));
|
||||
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const siteUrl = element.querySelector('.help-block').textContent;
|
||||
return sceneElements.reduce((acc, element) => {
|
||||
const siteUrl = element.querySelector('.help-block').textContent;
|
||||
|
||||
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
// different dogfart site
|
||||
return acc;
|
||||
}
|
||||
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
|
||||
// different dogfart site
|
||||
return acc;
|
||||
}
|
||||
|
||||
const sceneLinkElement = element.querySelector('.thumbnail');
|
||||
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
|
||||
const sceneLinkElement = element.querySelector('.thumbnail');
|
||||
const url = `https://dogfartnetwork.com${sceneLinkElement.href}`;
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = `${site.slug}_${pathname.split('/')[4]}`;
|
||||
|
||||
const title = element.querySelector('.scene-title').textContent;
|
||||
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
|
||||
const title = element.querySelector('.scene-title').textContent;
|
||||
const actors = title.split(/[,&]|\band\b/).map(actor => actor.trim());
|
||||
|
||||
const poster = `https:${element.querySelector('img').src}`;
|
||||
const teaser = sceneLinkElement.dataset.preview_clip_url;
|
||||
const poster = `https:${element.querySelector('img').src}`;
|
||||
const teaser = sceneLinkElement.dataset.preview_clip_url;
|
||||
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
site,
|
||||
},
|
||||
];
|
||||
}, []);
|
||||
return [
|
||||
...acc,
|
||||
{
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
site,
|
||||
},
|
||||
];
|
||||
}, []);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const title = document.querySelector('.description-title').textContent;
|
||||
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
|
||||
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
|
||||
const description = metaDescription
|
||||
? metaDescription.content
|
||||
: document.querySelector('.description')
|
||||
.textContent
|
||||
.replace(/[ \t\n]{2,}/g, ' ')
|
||||
.replace('...read more', '')
|
||||
.trim();
|
||||
const title = document.querySelector('.description-title').textContent;
|
||||
const actors = Array.from(document.querySelectorAll('.more-scenes a')).map(({ textContent }) => textContent);
|
||||
const metaDescription = document.querySelector('meta[itemprop="description"]').content;
|
||||
const description = metaDescription
|
||||
? metaDescription.content
|
||||
: document.querySelector('.description')
|
||||
.textContent
|
||||
.replace(/[ \t\n]{2,}/g, ' ')
|
||||
.replace('...read more', '')
|
||||
.trim();
|
||||
|
||||
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const { origin, pathname } = new URL(url);
|
||||
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const { origin, pathname } = new URL(url);
|
||||
const entryId = `${channel}_${pathname.split('/').slice(-2)[0]}`;
|
||||
|
||||
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
|
||||
const duration = moment
|
||||
.duration(`00:${document
|
||||
.querySelectorAll('.extra-info p')[1]
|
||||
.textContent
|
||||
.match(/\d+:\d+$/)[0]}`)
|
||||
.asSeconds();
|
||||
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
|
||||
const duration = moment
|
||||
.duration(`00:${document
|
||||
.querySelectorAll('.extra-info p')[1]
|
||||
.textContent
|
||||
.match(/\d+:\d+$/)[0]}`)
|
||||
.asSeconds();
|
||||
|
||||
const trailerElement = document.querySelector('.html5-video');
|
||||
const poster = `https:${trailerElement.dataset.poster}`;
|
||||
const { trailer } = trailerElement.dataset;
|
||||
const trailerElement = document.querySelector('.html5-video');
|
||||
const poster = `https:${trailerElement.dataset.poster}`;
|
||||
const { trailer } = trailerElement.dataset;
|
||||
|
||||
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
|
||||
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
|
||||
const lastPhotosUrl = Array.from(document.querySelectorAll('.pagination a')).slice(-1)[0].href;
|
||||
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
|
||||
|
||||
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
|
||||
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]')?.textContent || document.querySelector('span[itemprop="ratingValue"]')?.textContent) / 2);
|
||||
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
|
||||
return {
|
||||
entryId,
|
||||
url: `${origin}${pathname}`,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
},
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
entryId,
|
||||
url: `${origin}${pathname}`,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
},
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
||||
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'fakehub', 'modelprofile');
|
||||
return fetchProfile(actorName, 'fakehub', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -1,115 +1,115 @@
|
||||
'use strict';
|
||||
|
||||
const {
|
||||
fetchLatest,
|
||||
fetchApiLatest,
|
||||
fetchUpcoming,
|
||||
fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
fetchLatest,
|
||||
fetchApiLatest,
|
||||
fetchUpcoming,
|
||||
fetchApiUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchApiProfile,
|
||||
scrapeAll,
|
||||
} = require('./gamma');
|
||||
const { get } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractLowArtActors(release) {
|
||||
const actors = release.title
|
||||
.replace(/solo/i, '')
|
||||
.split(/,|\band\b/ig)
|
||||
.map(actor => actor.trim());
|
||||
const actors = release.title
|
||||
.replace(/solo/i, '')
|
||||
.split(/,|\band\b/ig)
|
||||
.map(actor => actor.trim());
|
||||
|
||||
return {
|
||||
...release,
|
||||
actors,
|
||||
};
|
||||
return {
|
||||
...release,
|
||||
actors,
|
||||
};
|
||||
}
|
||||
|
||||
async function networkFetchLatest(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiLatest(site, page, false);
|
||||
if (site.parameters?.api) return fetchApiLatest(site, page, false);
|
||||
|
||||
const releases = await fetchLatest(site, page);
|
||||
const releases = await fetchLatest(site, page);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return releases.map(release => extractLowArtActors(release));
|
||||
}
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return releases.map(release => extractLowArtActors(release));
|
||||
}
|
||||
|
||||
return releases;
|
||||
return releases;
|
||||
}
|
||||
|
||||
async function networkFetchScene(url, site) {
|
||||
const release = await fetchScene(url, site);
|
||||
const release = await fetchScene(url, site);
|
||||
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return extractLowArtActors(release);
|
||||
}
|
||||
if (site.slug === 'lowartfilms') {
|
||||
return extractLowArtActors(release);
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function networkFetchUpcoming(site, page = 1) {
|
||||
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
|
||||
if (site.parameters?.api) return fetchApiUpcoming(site, page, true);
|
||||
|
||||
return fetchUpcoming(site, page);
|
||||
return fetchUpcoming(site, page);
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
|
||||
return `https://www.peternorth.com/en/videos/All-Categories/0${actorPath}/All-Dvds/0/latest/${page}`;
|
||||
}
|
||||
|
||||
async function fetchClassicProfile(actorName, siteSlug) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
const url = `https://${siteSlug}.com/en/pornstars`;
|
||||
const pornstarsRes = await get(url);
|
||||
const url = `https://${siteSlug}.com/en/pornstars`;
|
||||
const pornstarsRes = await get(url);
|
||||
|
||||
if (!pornstarsRes.ok) return null;
|
||||
if (!pornstarsRes.ok) return null;
|
||||
|
||||
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||
.find(el => slugify(el.textContent) === actorSlug)
|
||||
const actorPath = pornstarsRes.item.qa('option[value*="/pornstar"]')
|
||||
.find(el => slugify(el.textContent) === actorSlug)
|
||||
?.value;
|
||||
|
||||
if (actorPath) {
|
||||
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
||||
const res = await get(actorUrl);
|
||||
if (actorPath) {
|
||||
const actorUrl = `https://${siteSlug}.com${actorPath}`;
|
||||
const res = await get(actorUrl);
|
||||
|
||||
if (res.ok) {
|
||||
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
|
||||
if (res.ok) {
|
||||
const releases = scrapeAll(res.item, null, `https://www.${siteSlug}.com`, false);
|
||||
|
||||
return { releases };
|
||||
}
|
||||
}
|
||||
return { releases };
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
// not all Fame Digital sites offer Gamma actors
|
||||
const [devils, rocco, peter, silvia] = await Promise.all([
|
||||
fetchApiProfile(actorName, 'devilsfilm', true),
|
||||
fetchApiProfile(actorName, 'roccosiffredi'),
|
||||
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
|
||||
]);
|
||||
// not all Fame Digital sites offer Gamma actors
|
||||
const [devils, rocco, peter, silvia] = await Promise.all([
|
||||
fetchApiProfile(actorName, 'devilsfilm', true),
|
||||
fetchApiProfile(actorName, 'roccosiffredi'),
|
||||
include.scenes ? fetchProfile(actorName, 'peternorth', true, getActorReleasesUrl, include) : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silviasaint') : [],
|
||||
include.scenes ? fetchClassicProfile(actorName, 'silverstonedvd') : [],
|
||||
]);
|
||||
|
||||
if (devils || rocco || peter) {
|
||||
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
|
||||
if (devils || rocco || peter) {
|
||||
const releases = [].concat(devils?.releases || [], rocco?.releases || [], peter?.releases || [], silvia?.releases || []);
|
||||
|
||||
return {
|
||||
...peter,
|
||||
...rocco,
|
||||
...devils,
|
||||
releases,
|
||||
};
|
||||
}
|
||||
return {
|
||||
...peter,
|
||||
...rocco,
|
||||
...devils,
|
||||
releases,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: networkFetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming: networkFetchUpcoming,
|
||||
fetchLatest: networkFetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene: networkFetchScene,
|
||||
fetchUpcoming: networkFetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,7 +4,7 @@ const { fetchLatest, fetchUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
|
||||
@@ -5,89 +5,89 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
const bio = Array.from(document.querySelectorAll('a[href^="/babes"]'), el => decodeURI(el.href)).reduce((acc, item) => {
|
||||
const keyMatch = item.match(/\[\w+\]/);
|
||||
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
if (keyMatch) {
|
||||
const key = keyMatch[0].slice(1, -1);
|
||||
const [, value] = item.split('=');
|
||||
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
// both hip and waist link to 'waist', assume biggest value is hip
|
||||
if (key === 'waist' && acc.waist) {
|
||||
if (acc.waist > value) {
|
||||
acc.hip = acc.waist;
|
||||
acc.waist = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.hip = value;
|
||||
acc.hip = value;
|
||||
|
||||
return acc;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc[key] = value;
|
||||
}
|
||||
acc[key] = value;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
if (bio.dateOfBirth) profile.birthdate = moment.utc(bio.dateOfBirth, 'YYYY-MM-DD').toDate();
|
||||
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
if (profile.placeOfBirth || bio.country) profile.birthPlace = `${bio.placeOfBirth}, ${bio.country}`;
|
||||
profile.eyes = bio.eyeColor;
|
||||
profile.hair = bio.hairColor;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
profile.bust = bio.bra;
|
||||
if (bio.waist) profile.waist = Number(bio.waist.split(',')[0]);
|
||||
if (bio.hip) profile.hip = Number(bio.hip.split(',')[0]);
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.split(',')[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.split(',')[0]);
|
||||
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
profile.social = Array.from(document.querySelectorAll('.profile-meta-item a.social-icons'), el => el.href);
|
||||
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
const avatar = document.querySelector('.profile-image-large img').src;
|
||||
if (!avatar.match('placeholder')) profile.avatar = { src: avatar, copyright: null };
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeSearch(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
return document.querySelector('a.image-link')?.href || null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
if (actorPath) {
|
||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function scrapeProfileFrontpage(html, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
||||
|
||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const profile = {
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
const profile = {
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
|
||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
||||
|
||||
return {
|
||||
profile,
|
||||
url: bioUrl,
|
||||
};
|
||||
return {
|
||||
profile,
|
||||
url: bioUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeProfileBio(html, frontpageProfile, url, name) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const bioEl = document.querySelector('#biographyTable');
|
||||
|
||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
||||
|
||||
const profile = {
|
||||
...frontpageProfile,
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
const profile = {
|
||||
...frontpageProfile,
|
||||
name,
|
||||
gender: 'female',
|
||||
};
|
||||
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
const birthdateString = bio['Date of Birth:'];
|
||||
const measurementsString = bio['Measurements:'];
|
||||
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
const birthCityString = bio['Place of Birth:'];
|
||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
||||
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
const birthCountryString = bio['Country of Origin:'];
|
||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
||||
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
const piercingsString = bio['Piercings:'];
|
||||
const tattoosString = bio['Tattoos:'];
|
||||
|
||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
||||
|
||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
||||
profile.ethnicity = bio['Ethnicity:'];
|
||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
||||
profile.ethnicity = bio['Ethnicity:'];
|
||||
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
||||
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
||||
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
||||
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
||||
|
||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
||||
const slug = actorName.replace(' ', '_');
|
||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
||||
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
||||
|
||||
if (resFrontpage.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
if (resFrontpage.statusCode === 200) {
|
||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
||||
}
|
||||
|
||||
// apparently some actors are appended 'Babe' as their surname...
|
||||
const fallbackSlug = `${slug}_Babe`;
|
||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
// apparently some actors are appended 'Babe' as their surname...
|
||||
const fallbackSlug = `${slug}_Babe`;
|
||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
||||
const resFallback = await bhttp.get(fallbackUrl);
|
||||
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
if (resFallback.statusCode === 200) {
|
||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
||||
const resBio = await bhttp.get(url);
|
||||
|
||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
||||
}
|
||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -4,93 +4,93 @@ const { get, geta, ctxa } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.url = qu.url('.title');
|
||||
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
release.title = qu.q('.title', true);
|
||||
release.description = qu.q('.title', 'title');
|
||||
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
|
||||
release.duration = qu.dur('.video-data > span');
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.q('.update_thumb', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ q, qa, qd, qtx }, url, _site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
release.entryId = q('#image_parent img', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
|
||||
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
release.title = q('.trailer_title', true);
|
||||
release.description = qtx('.text p');
|
||||
release.date = qd('span[data-dateadded]', 'YYYY-MM-DD', null, 'data-dateadded');
|
||||
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
release.actors = qa('.update_models a', true);
|
||||
release.tags = qa('.video-info a[href*="/categories"]', true);
|
||||
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
const poster = q('#image_parent img', 'src0_1x');
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, q, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
const description = qtx('.model-bio');
|
||||
if (description) profile.description = description;
|
||||
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
profile.avatar = [
|
||||
q('.model-image img', 'src0_2x'),
|
||||
q('.model-image img', 'src0_1x'),
|
||||
];
|
||||
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
profile.releases = scrapeAll(ctxa(el, '.update'));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
const url = `${site.url}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.latest-updates .update');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url, '.content-wrapper');
|
||||
const res = await get(url, '.content-wrapper');
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -8,404 +8,403 @@ const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
async function getChannelRegExp(site) {
|
||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||
if (!['hushpass', 'interracialpass'].includes(site.network.slug)) return null;
|
||||
|
||||
const sites = await knex('sites').where('network_id', site.network.id);
|
||||
const sites = await knex('sites').where('network_id', site.network.id);
|
||||
|
||||
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||
}
|
||||
|
||||
function deriveEntryId(release) {
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
if (release.date && release.title) {
|
||||
return `${slugify(fd(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractPoster(posterPath, site, baseRelease) {
|
||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const posterSources = [
|
||||
poster,
|
||||
// upscaled
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster.replace('-1x', '-3x'),
|
||||
];
|
||||
if (posterPath && !/400.jpg/.test(posterPath)) {
|
||||
const poster = `${site.parameters?.media || site.url}${posterPath}`;
|
||||
const posterSources = [
|
||||
poster,
|
||||
// upscaled
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster.replace('-1x', '-3x'),
|
||||
];
|
||||
|
||||
if (baseRelease?.poster) {
|
||||
return [posterSources, [baseRelease.poster]];
|
||||
}
|
||||
if (baseRelease?.poster) {
|
||||
return [posterSources, [baseRelease.poster]];
|
||||
}
|
||||
|
||||
return [posterSources, []];
|
||||
}
|
||||
return [posterSources, []];
|
||||
}
|
||||
|
||||
return [baseRelease?.poster || null, []];
|
||||
return [baseRelease?.poster || null, []];
|
||||
}
|
||||
|
||||
function getImageWithFallbacks(q, selector, site, el) {
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
];
|
||||
const sources = el
|
||||
? [
|
||||
q(el, selector, 'src0_3x'),
|
||||
q(el, selector, 'src0_2x'),
|
||||
q(el, selector, 'src0_1x'),
|
||||
]
|
||||
: [
|
||||
q(selector, 'src0_3x'),
|
||||
q(selector, 'src0_2x'),
|
||||
q(selector, 'src0_1x'),
|
||||
];
|
||||
|
||||
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
return sources.filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`);
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
release.title = qu.q('h3 a', 'title') || qu.q('h3 a', true);
|
||||
release.url = qu.url('h3 a');
|
||||
|
||||
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.modeldata p');
|
||||
release.date = qu.date('.modeldata p', 'YYYY-MM-DD', /\d{4}-\d{2}-\d{2}/);
|
||||
release.duration = qu.dur('.modeldata p');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
|
||||
release.poster = getImageWithFallbacks(qu.q, '.modelimg img', site);
|
||||
|
||||
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.modelimg img', 'id').match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeAllT1(scenes, site, accSiteReleases) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.title = qu.q('h4 a', 'title') || qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
|
||||
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = qu.dur('.more-info-div');
|
||||
release.date = qu.date('.more-info-div', 'MMM D, YYYY');
|
||||
release.duration = qu.dur('.more-info-div');
|
||||
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
if (/bts|behind the scenes/i.test(release.title)) release.tags = ['behind the scenes'];
|
||||
|
||||
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
|
||||
const posterPath = qu.q('.img-div img', 'src0_1x') || qu.img('img.video_placeholder');
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`;
|
||||
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
release.poster = [
|
||||
poster.replace('-1x', '-3x'),
|
||||
poster.replace('-1x', '-2x'),
|
||||
poster,
|
||||
];
|
||||
}
|
||||
|
||||
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.img-div img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
|
||||
// filter out releases that were already scraped from a categorized site
|
||||
return null;
|
||||
}
|
||||
if (site.parameters?.accFilter && accSiteReleases?.map(accRelease => accRelease.entryId).includes(release.entryId)) {
|
||||
// filter out releases that were already scraped from a categorized site
|
||||
return null;
|
||||
}
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeAllTour(scenes) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('a');
|
||||
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('a');
|
||||
release.date = qu.date('.tour_update_models + span', 'YYYY-MM-DD');
|
||||
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = qu.img('a img');
|
||||
release.poster = qu.img('a img');
|
||||
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ html, qu }, site, url, baseRelease) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.centerwrap h2', true);
|
||||
release.description = qu.q('.videocontent p', true);
|
||||
release.title = qu.q('.centerwrap h2', true);
|
||||
release.description = qu.q('.videocontent p', true);
|
||||
|
||||
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
|
||||
release.duration = qu.dur('.videodetails .date');
|
||||
release.date = qu.date('.videodetails .date', 'MM/DD/YYYY');
|
||||
release.duration = qu.dur('.videodetails .date');
|
||||
|
||||
release.actors = qu.all('.modelname a', true);
|
||||
release.actors = qu.all('.modelname a', true);
|
||||
|
||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1];
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
|
||||
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
||||
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
const trailerPath = html.match(/\/trailers\/.*.mp4/);
|
||||
if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
|
||||
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
|
||||
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = html.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||
release.description = qu.text('.row .update-info-block');
|
||||
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||
release.description = qu.text('.row .update-info-block');
|
||||
|
||||
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.update-info-row:nth-child(2)');
|
||||
release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.duration = qu.dur('.update-info-row:nth-child(2)');
|
||||
|
||||
release.actors = qu.all('.models-list-thumbs a').map(el => ({
|
||||
name: qu.q(el, 'span', true),
|
||||
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
|
||||
}));
|
||||
release.actors = qu.all('.models-list-thumbs a').map(el => ({
|
||||
name: qu.q(el, 'span', true),
|
||||
avatar: getImageWithFallbacks(qu.q, 'img', site, el),
|
||||
}));
|
||||
|
||||
release.tags = qu.all('.tags a', true);
|
||||
release.tags = qu.all('.tags a', true);
|
||||
|
||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||
const posterPath = qu.q('.player-thumb img', 'src0_1x');
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
// const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
|
||||
const posterPath = qu.q('.player-thumb img', 'src0_1x');
|
||||
[release.poster, release.photos] = extractPoster(posterPath, site, baseRelease);
|
||||
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||
const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1];
|
||||
if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url };
|
||||
else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url };
|
||||
|
||||
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||
if (stars) release.stars = Number(stars);
|
||||
|
||||
if (channelRegExp) {
|
||||
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
||||
if (channelRegExp) {
|
||||
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
||||
|
||||
if (channel) {
|
||||
release.channel = {
|
||||
force: true,
|
||||
slug: slugify(channel, ''),
|
||||
};
|
||||
}
|
||||
}
|
||||
if (channel) {
|
||||
release.channel = {
|
||||
force: true,
|
||||
slug: slugify(channel, ''),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
// release.entryId = q('.player-thumb img', 'id')?.match(/set-target-(\d+)/)[1];
|
||||
release.entryId = deriveEntryId(release);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneTour({ html, qu }, site, url) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
if (url) release.url = url;
|
||||
release.title = qu.q('.update_title, .video-title', true);
|
||||
release.description = qu.q('.latest_update_description, .video-summary', true);
|
||||
if (url) release.url = url;
|
||||
release.title = qu.q('.update_title, .video-title', true);
|
||||
release.description = qu.q('.latest_update_description, .video-summary', true);
|
||||
|
||||
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
|
||||
if (date) release.date = date;
|
||||
const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD');
|
||||
if (date) release.date = date;
|
||||
|
||||
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
|
||||
release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true);
|
||||
release.tags = qu.all('.update_tags a, .tour_update_tags a', true);
|
||||
|
||||
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
|
||||
if (poster || photo) release.poster = poster || photo;
|
||||
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||
const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)');
|
||||
if (poster || photo) release.poster = poster || photo;
|
||||
if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
|
||||
|
||||
if (release.date) release.entryId = deriveEntryId(release);
|
||||
if (release.date) release.entryId = deriveEntryId(release);
|
||||
|
||||
const trailerCode = qu.q('.update_image a', 'onclick');
|
||||
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
const trailerCode = qu.q('.update_image a', 'onclick');
|
||||
const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0];
|
||||
if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath };
|
||||
else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.texts('.stats p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.texts('.stats p').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||
profile.releases = scrapeAll(qReleases, site);
|
||||
const qReleases = ctxa(el, '.modelFeatures .modelfeature');
|
||||
profile.releases = scrapeAll(qReleases, site);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileT1({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
if (!value) return acc;
|
||||
if (!value) return acc;
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.age) profile.age = Number(bio.age);
|
||||
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
const heightMetric = bio.height?.match(/(\d{3})(\b|c)/);
|
||||
const heightImperial = bio.height?.match(/\d{1}(\.\d)?/g);
|
||||
if (heightMetric) profile.height = Number(heightMetric[1]);
|
||||
if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1]));
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.item-video');
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
const qReleases = ctxa(el, '.item-video');
|
||||
profile.releases = scrapeAllT1(qReleases, site);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeProfileTour({ el, qu }, site) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qu.texts('.model_bio').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
const bio = qu.texts('.model_bio').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
if (bio.date_of_birth) profile.birthdate = ed(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
if (bio.birthplace) profile.birthPlace = bio.birthplace;
|
||||
if (bio.fun_fact) profile.description = bio.fun_fact;
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d{2,3}/)?.[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d{2,3}/)?.[0]);
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
if (bio.natural_breasts && /yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (bio.natural_breasts && /no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
if (bio.tattoos && /yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (bio.tattoos && /no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
if (bio.piercings && /yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (bio.piercings && /no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim());
|
||||
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
|
||||
profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site);
|
||||
|
||||
const qReleases = ctxa(el, '.update_block');
|
||||
profile.releases = qReleases.map((qRelease) => {
|
||||
const url = qRelease.qu.url('.update_image a[href]');
|
||||
const release = scrapeSceneTour(qRelease, site);
|
||||
const qReleases = ctxa(el, '.update_block');
|
||||
profile.releases = qReleases.map((qRelease) => {
|
||||
const url = qRelease.qu.url('.update_image a[href]');
|
||||
const release = scrapeSceneTour(qRelease, site);
|
||||
|
||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.site = site;
|
||||
if (!/\/(signup|join)/i.test(url)) release.url = url;
|
||||
release.entryId = deriveEntryId(release);
|
||||
release.site = site;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, _beforeFetchLatest, accSiteReleases) {
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
const url = (site.parameters?.latest && util.format(site.parameters.latest, page))
|
||||
|| (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`)
|
||||
|| `${site.url}/categories/movies_${page}_d.html`;
|
||||
|
||||
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||
const res = await geta(url, '.modelfeature, .item-video, .updateItem');
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeAllT1(res.items, site, accSiteReleases);
|
||||
if (site.parameters?.tour) return scrapeAllTour(res.items, site, accSiteReleases);
|
||||
|
||||
return scrapeAll(res.items, site, accSiteReleases);
|
||||
return scrapeAll(res.items, site, accSiteReleases);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
|
||||
const res = await get(url);
|
||||
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
|
||||
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
|
||||
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
||||
|
||||
return scrapeScene(res.item, site, url, baseRelease);
|
||||
return scrapeScene(res.item, site, url, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
|
||||
const res1 = site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugA))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
|
||||
const res1 = site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugA))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugA}.html`);
|
||||
|
||||
const res = (res1.ok && res1)
|
||||
|| (site.parameters?.profile
|
||||
? await get(util.format(site.parameters.profile, actorSlugB))
|
||||
: await get(`${site.url}/${t1}models/${actorSlugB}.html`));
|
||||
const res = (res1.ok && res1)
|
||||
|| (site.parameters?.profile && await get(util.format(site.parameters.profile, actorSlugB)))
|
||||
|| await get(`${site.url}/${t1}models/${actorSlugB}.html`);
|
||||
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
|
||||
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
|
||||
if (!res.ok) return res.status;
|
||||
if (site.parameters?.t1) return scrapeProfileT1(res.item, site);
|
||||
if (site.parameters?.tour) return scrapeProfileTour(res.item, site);
|
||||
|
||||
return scrapeProfile(res.item, site);
|
||||
return scrapeProfile(res.item, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
beforeFetchLatest: getChannelRegExp,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
beforeFetchLatest: getChannelRegExp,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'iconmale');
|
||||
return fetchProfile(actorName, 'iconmale');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -4,104 +4,104 @@ const bhttp = require('bhttp');
|
||||
const { get, exa, ed } = require('../utils/q');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const scenes = site.slug === 'paintoy'
|
||||
? exa(html, '#articleTable table[cellspacing="2"]')
|
||||
: exa(html, 'body > table');
|
||||
const scenes = site.slug === 'paintoy'
|
||||
? exa(html, '#articleTable table[cellspacing="2"]')
|
||||
: exa(html, 'body > table');
|
||||
|
||||
return scenes.map(({ qu }) => {
|
||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||
const release = {};
|
||||
|
||||
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
||||
const titleEl = qu.q('.galleryTitleText, .articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const date = qu.date('.articlePostDateText td', 'MMM D, YYYY');
|
||||
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
|
||||
if (date) {
|
||||
release.title = title.trim();
|
||||
release.date = date;
|
||||
} else {
|
||||
// title should contain date instead, not applicable in brief mode
|
||||
release.title = title.slice(title.indexOf(':') + 1).trim();
|
||||
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
||||
}
|
||||
if (date) {
|
||||
release.title = title.trim();
|
||||
release.date = date;
|
||||
} else {
|
||||
// title should contain date instead, not applicable in brief mode
|
||||
release.title = title.slice(title.indexOf(':') + 1).trim();
|
||||
release.date = ed(title.slice(0, title.indexOf(':')), 'MMM D, YYYY');
|
||||
}
|
||||
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
|
||||
const description = qu.q('.articleCopyText', true);
|
||||
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
||||
const description = qu.q('.articleCopyText', true);
|
||||
if (description) release.description = description.slice(0, description.lastIndexOf('('));
|
||||
|
||||
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
||||
if (duration) release.duration = duration;
|
||||
const duration = qu.dur('.articleCopyText a:nth-child(2)');
|
||||
if (duration) release.duration = duration;
|
||||
|
||||
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
||||
release.likes = parseInt(qu.q('.articlePostDateText td:nth-child(3)', true), 10);
|
||||
|
||||
const cover = qu.img('a img');
|
||||
release.covers = [[
|
||||
cover.replace('_thumbnail', ''),
|
||||
cover,
|
||||
]];
|
||||
const cover = qu.img('a img');
|
||||
release.covers = [[
|
||||
cover.replace('_thumbnail', ''),
|
||||
cover,
|
||||
]];
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ qu }, site) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const titleEl = qu.q('.articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
const titleEl = qu.q('.articleTitleText');
|
||||
const [title, ...actors] = titleEl.textContent.split('|');
|
||||
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
const url = qu.url(titleEl, 'a');
|
||||
[release.entryId] = url.split('/').slice(-2);
|
||||
release.url = `${site.url}${url}`;
|
||||
|
||||
release.title = title.trim();
|
||||
release.description = qu.q('.articleCopyText', true);
|
||||
release.title = title.trim();
|
||||
release.description = qu.q('.articleCopyText', true);
|
||||
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
||||
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
||||
release.actors = actors.map(actor => actor.trim());
|
||||
release.date = qu.date('.articlePostDateText', 'MMMM D, YYYY');
|
||||
release.duration = qu.dur('.articlePostDateText a:nth-child(2)');
|
||||
|
||||
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
||||
release.covers = [cover];
|
||||
release.photos = photos;
|
||||
const [cover, ...photos] = qu.imgs('img[src*="images"]');
|
||||
release.covers = [cover];
|
||||
release.photos = photos;
|
||||
|
||||
release.poster = qu.poster();
|
||||
release.poster = qu.poster();
|
||||
|
||||
const trailer = qu.trailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = qu.trailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
|
||||
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
|
||||
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||
const url = site.slug === 'paintoy' // paintoy's site is partially broken, use front page
|
||||
? `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`
|
||||
: `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
type: 'brief',
|
||||
page,
|
||||
});
|
||||
const res = await bhttp.get(url, {
|
||||
type: 'brief',
|
||||
page,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(site.slug === 'paintoy' ? res.body.toString() : res.body.html, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, site) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -9,116 +9,116 @@ const slugify = require('../utils/slugify');
|
||||
const { fetchApiLatest, fetchScene } = require('./gamma');
|
||||
|
||||
async function fetchToken(site) {
|
||||
const res = await bhttp.get(site.url);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(site.url);
|
||||
const html = res.body.toString();
|
||||
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
const time = html.match(/"aet":\d+/)[0].split(':')[1];
|
||||
const ah = html.match(/"ah":"[\w-]+"/)[0].split(':')[1].slice(1, -1);
|
||||
const token = ah.split('').reverse().join('');
|
||||
|
||||
return { time, token };
|
||||
return { time, token };
|
||||
}
|
||||
|
||||
async function fetchActors(entryId, site, { token, time }) {
|
||||
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/sapi/${token}/${time}/model.getModelContent?_method=model.getModelContent&tz=1&fields[0]=modelId.stageName&fields[1]=_last&fields[2]=modelId.upsellLink&fields[3]=modelId.upsellText&limit=25&transitParameters[contentId]=${entryId}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status === true) {
|
||||
return Object.values(res.body.response.collection).map(actor => Object.values(actor.modelId.collection)[0].stageName);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchTrailerLocation(entryId, site) {
|
||||
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
|
||||
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
try {
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
if (res.statusCode === 302) {
|
||||
return res.headers.location;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene(scene, site, tokens) {
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
const release = {
|
||||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, { encode: true })}`;
|
||||
release.date = new Date(scene.sites.collection[scene.id].publishDate);
|
||||
release.poster = scene._resources.primary[0].url;
|
||||
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
if (scene.tags) release.tags = Object.values(scene.tags.collection).map(tag => tag.alias);
|
||||
if (scene._resources.base) release.photos = scene._resources.base.map(resource => resource.url);
|
||||
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, site, tokens),
|
||||
fetchTrailerLocation(release.entryId, site),
|
||||
]);
|
||||
const [actors, trailer] = await Promise.all([
|
||||
fetchActors(release.entryId, site, tokens),
|
||||
fetchTrailerLocation(release.entryId, site),
|
||||
]);
|
||||
|
||||
release.actors = actors;
|
||||
if (trailer) release.trailer = { src: trailer, quality: 1080 };
|
||||
release.actors = actors;
|
||||
if (trailer) release.trailer = { src: trailer, quality: 1080 };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatest(scenes, site, tokens) {
|
||||
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
|
||||
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const { time, token } = await fetchToken(site);
|
||||
const { time, token } = await fetchToken(site);
|
||||
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await bhttp.get(url);
|
||||
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
|
||||
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatest(res.body.response.collection, site, { time, token });
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeLatest(res.body.response.collection, site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchNetworkScene(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await bhttp.get(apiUrl);
|
||||
const apiUrl = `${site.url}/sapi/${token}/${time}/content.load?filter[id][fields][0]=id&filter[id][values][0]=${entryId}&transitParameters[v1]=ykYa8ALmUD&transitParameters[preset]=scene`;
|
||||
const res = await bhttp.get(apiUrl);
|
||||
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeScene(res.body.response.collection[0], site, { time, token });
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status) {
|
||||
return scrapeScene(res.body.response.collection[0], site, { time, token });
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene: fetchNetworkScene,
|
||||
fetchLatest,
|
||||
fetchScene: fetchNetworkScene,
|
||||
};
|
||||
|
||||
@@ -3,83 +3,83 @@
|
||||
const { get, initAll } = require('../utils/qu');
|
||||
|
||||
function scrapeLatest(scenes, dates, site) {
|
||||
return scenes.map(({ qu }, index) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }, index) => {
|
||||
const release = {};
|
||||
|
||||
const path = qu.url('a');
|
||||
release.url = `${site.url}/visitors/${path}`;
|
||||
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
|
||||
const path = qu.url('a');
|
||||
release.url = `${site.url}/visitors/${path}`;
|
||||
release.entryId = path.match(/videos\/([a-zA-Z0-9]+)(?:_hd)?_trailer/)?.[1];
|
||||
|
||||
if (dates && dates[index]) {
|
||||
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
|
||||
}
|
||||
if (dates && dates[index]) {
|
||||
release.date = dates[index].qu.date(null, 'MM/DD/YYYY');
|
||||
}
|
||||
|
||||
release.description = qu.q('tbody tr:nth-child(3) font', true);
|
||||
release.description = qu.q('tbody tr:nth-child(3) font', true);
|
||||
|
||||
const infoLine = qu.q('font[color="#663366"]', true);
|
||||
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
|
||||
const infoLine = qu.q('font[color="#663366"]', true);
|
||||
if (infoLine) release.duration = Number(infoLine.match(/(\d+) min/)[1]) * 60;
|
||||
|
||||
const poster = qu.img('img[src*="photos/"][width="400"]');
|
||||
release.poster = `${site.url}/visitors/${poster}`;
|
||||
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
|
||||
const poster = qu.img('img[src*="photos/"][width="400"]');
|
||||
release.poster = `${site.url}/visitors/${poster}`;
|
||||
release.photos = qu.imgs('img[src*="photos/"]:not([width="400"])').map(source => `${site.url}/visitors/${source}`);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ qu }, url, site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.match(/videos\/(\w+)_hd_trailer/)[1];
|
||||
|
||||
const actor = qu.q('font[color="#990033"] strong', true);
|
||||
release.actors = [actor];
|
||||
const actor = qu.q('font[color="#990033"] strong', true);
|
||||
release.actors = [actor];
|
||||
|
||||
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
|
||||
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
|
||||
const hdTrailer = qu.url('a[href*="hd_trailer.mp4"]');
|
||||
const sdTrailer = qu.url('a[href*="hd_trailer_mobile.mp4"]');
|
||||
|
||||
release.trailer = [
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${hdTrailer}`,
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${sdTrailer}`,
|
||||
quality: 270,
|
||||
},
|
||||
];
|
||||
release.trailer = [
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${hdTrailer}`,
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: `${site.url}/visitors/videos/${sdTrailer}`,
|
||||
quality: 270,
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
|
||||
const res = await get(url);
|
||||
const url = `https://jesseloadsmonsterfacials.com/visitors/tour_${page.toString().padStart(2, '0')}.html`;
|
||||
const res = await get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
}
|
||||
|
||||
const { el } = res.item;
|
||||
const { el } = res.item;
|
||||
|
||||
const scenes = initAll(el, 'table[width="880"]');
|
||||
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
|
||||
const scenes = initAll(el, 'table[width="880"]');
|
||||
const dates = initAll(el, 'font[color="#000000"] strong:not(:empty)');
|
||||
|
||||
return scrapeLatest(scenes, dates, site);
|
||||
return scrapeLatest(scenes, dates, site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -13,406 +13,406 @@ const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return res.body.toString();
|
||||
return res.body.toString();
|
||||
}
|
||||
|
||||
function scrapePhotos(html, type) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
const photos = $('.photo_gallery_thumbnail_wrapper .thumbs')
|
||||
.toArray()
|
||||
.map((photoElement) => {
|
||||
const src = $(photoElement).attr('src');
|
||||
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
if (type === 'caps') {
|
||||
return [
|
||||
src.replace('capthumbs/', 'caps/'),
|
||||
src,
|
||||
];
|
||||
}
|
||||
// high res often available in alternative directories, but not always, provide original as fallback
|
||||
if (type === 'caps') {
|
||||
return [
|
||||
src.replace('capthumbs/', 'caps/'),
|
||||
src,
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1600watermarked/'),
|
||||
src.replace('thumbs/', '1280watermarked/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
return [
|
||||
src.replace('thumbs/', 'photos/'),
|
||||
src.replace('thumbs/', '1600watermarked/'),
|
||||
src.replace('thumbs/', '1280watermarked/'),
|
||||
src.replace('thumbs/', '1024watermarked/'),
|
||||
src,
|
||||
];
|
||||
});
|
||||
|
||||
return photos;
|
||||
return photos;
|
||||
}
|
||||
|
||||
async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
const albumUrl = `${site.url}/trial/gallery.php?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
|
||||
logger.warn(`Jules Jordan is using legacy photo scraper for ${albumUrl} (page ${page})`);
|
||||
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
// don't add first URL to pages to prevent unnecessary duplicate request
|
||||
const photos = scrapePhotos(html, type);
|
||||
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
|
||||
// don't add first URL to pages to prevent unnecessary duplicate request
|
||||
const photos = scrapePhotos(html, type);
|
||||
const pages = Array.from(new Set($('.page_numbers a').toArray().map(el => $(el).attr('href'))));
|
||||
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(pages, async (pageX) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
const otherPhotos = pages
|
||||
? await Promise.map(pages, async (pageX) => {
|
||||
const pageUrl = `https://www.julesjordan.com/trial/${pageX}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return scrapePhotos(pageHtml, type);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
return scrapePhotos(pageHtml, type);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
})
|
||||
: [];
|
||||
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
const allPhotos = photos.concat(otherPhotos.flat());
|
||||
|
||||
if (allPhotos.length === 0 && type === 'highres') {
|
||||
// photos not available, try for screencaps instead
|
||||
return getPhotosLegacy(entryId, site, 'caps', 1);
|
||||
}
|
||||
if (allPhotos.length === 0 && type === 'highres') {
|
||||
// photos not available, try for screencaps instead
|
||||
return getPhotosLegacy(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
return allPhotos;
|
||||
return allPhotos;
|
||||
}
|
||||
|
||||
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
const sources = sourceLines.reduce((acc, sourceLine) => {
|
||||
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
|
||||
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
const sources = sourceLines.reduce((acc, sourceLine) => {
|
||||
const quality = sourceLine.match(/\["\w+"\]/)[0].slice(2, -2);
|
||||
const sourceStart = sourceLine.match(/\/trial|\/tour|\/content/);
|
||||
|
||||
if (!sourceStart) return acc;
|
||||
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
|
||||
if (!sourceStart) return acc;
|
||||
const source = sourceLine.slice(sourceStart.index, sourceLine.indexOf('.jpg') + 4);
|
||||
|
||||
if (!source) return acc;
|
||||
if (!acc[quality]) acc[quality] = [];
|
||||
if (!source) return acc;
|
||||
if (!acc[quality]) acc[quality] = [];
|
||||
|
||||
acc[quality].push(`${site.url}${source}`);
|
||||
acc[quality].push(`${site.url}${source}`);
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (type === 'highres') {
|
||||
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
|
||||
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
|
||||
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
|
||||
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
|
||||
if (type === 'highres') {
|
||||
if (sources['1600'] && sources['1600'].length > 0) return sources['1600'];
|
||||
if (sources['1280'] && sources['1280'].length > 0) return sources['1280'];
|
||||
if (sources['1024'] && sources['1024'].length > 0) return sources['1024'];
|
||||
if (sources.Thumbs && sources.Thumbs.length > 0) return sources.Thumbs;
|
||||
|
||||
// no photos available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 'caps', 1);
|
||||
}
|
||||
// no photos available, try for screencaps instead
|
||||
return getPhotos(entryId, site, 'caps', 1);
|
||||
}
|
||||
|
||||
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
|
||||
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
|
||||
if (sources.jpg && sources.jpg.length > 0) return sources.jpg;
|
||||
if (sources['Video Cap Thumbs'] && sources['Video Cap Thumbs'].length > 0) return sources['Video Cap Thumbs'];
|
||||
|
||||
// no screencaps available either, try legacy scraper just in case
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
// no screencaps available either, try legacy scraper just in case
|
||||
return getPhotosLegacy(entryId, site, 'highres', 1);
|
||||
}
|
||||
|
||||
function getEntryId(html) {
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
|
||||
if (entryId) {
|
||||
return entryId[1];
|
||||
}
|
||||
if (entryId) {
|
||||
return entryId[1];
|
||||
}
|
||||
|
||||
const setIdIndex = html.indexOf('setid:"');
|
||||
const setIdIndex = html.indexOf('setid:"');
|
||||
|
||||
if (setIdIndex) {
|
||||
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
||||
}
|
||||
if (setIdIndex) {
|
||||
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
|
||||
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
release.url = qu.url('.update_title, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
}).filter(Boolean);
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
}).filter(Boolean);
|
||||
|
||||
const teaserScript = qu.html('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
}
|
||||
const teaserScript = qu.html('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
}
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
return scenesElements.map((element) => {
|
||||
const entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
|
||||
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
.remove();
|
||||
const details = $(element).find('.update_details_comingsoon')
|
||||
.eq(1)
|
||||
.children()
|
||||
.remove();
|
||||
|
||||
const title = details
|
||||
.end()
|
||||
.text()
|
||||
.trim();
|
||||
const title = details
|
||||
.end()
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
const actors = details
|
||||
.text()
|
||||
.trim()
|
||||
.split(', ');
|
||||
const actors = details
|
||||
.text()
|
||||
.trim()
|
||||
.split(', ');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
const date = moment
|
||||
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
|
||||
.toDate();
|
||||
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
const photoElement = $(element).find('a img.thumbs');
|
||||
const posterPath = photoElement.attr('src');
|
||||
const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
|
||||
const videoClass = $(element).find('.update_thumbnail div').attr('class');
|
||||
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
|
||||
const teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
|
||||
|
||||
return {
|
||||
url: null,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url: null,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
poster,
|
||||
teaser: {
|
||||
src: teaser,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ html, qu }, url, site, include) {
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = getEntryId(html);
|
||||
release.title = qu.q('.title_bar_hilite', true);
|
||||
release.description = qu.q('.update_description', true);
|
||||
release.entryId = getEntryId(html);
|
||||
release.title = qu.q('.title_bar_hilite', true);
|
||||
release.description = qu.q('.update_description', true);
|
||||
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
|
||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = qu.all('.update_tags a', true);
|
||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = qu.all('.update_tags a', true);
|
||||
|
||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
if (posterPath) {
|
||||
const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`;
|
||||
|
||||
if (poster) {
|
||||
release.poster = {
|
||||
src: poster,
|
||||
referer: site.url,
|
||||
};
|
||||
}
|
||||
}
|
||||
if (poster) {
|
||||
release.poster = {
|
||||
src: poster,
|
||||
referer: site.url,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (include.trailer && site.slug !== 'manuelferrara') {
|
||||
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
if (include.trailer && site.slug !== 'manuelferrara') {
|
||||
const trailerLines = html.split('\n').filter(line => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
|
||||
if (trailerLines.length) {
|
||||
release.trailer = trailerLines.map((trailerLine) => {
|
||||
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
|
||||
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
|
||||
if (trailerLines.length) {
|
||||
release.trailer = trailerLines.map((trailerLine) => {
|
||||
const src = trailerLine.match(/path:"([\w:/.&=?%]+)"/)?.[1];
|
||||
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
|
||||
|
||||
return src && {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
quality: quality && Number(quality.replace('558', '540')),
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
}
|
||||
return src && {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
quality: quality && Number(quality.replace('558', '540')),
|
||||
};
|
||||
}).filter(Boolean);
|
||||
}
|
||||
}
|
||||
|
||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||
|
||||
if (qu.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
url: qu.url('.update_dvds a'),
|
||||
title: qu.q('.update_dvds a', true),
|
||||
};
|
||||
}
|
||||
if (qu.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
url: qu.url('.update_dvds a'),
|
||||
title: qu.q('.update_dvds a', true),
|
||||
};
|
||||
}
|
||||
|
||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
if (stars) release.stars = stars;
|
||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
if (stars) release.stars = stars;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeMovie({ el, qu }, url, site) {
|
||||
const movie = { url, site };
|
||||
const movie = { url, site };
|
||||
|
||||
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = qu.q('.title_bar span', true);
|
||||
movie.covers = qu.urls('#dvd-cover-flip > a');
|
||||
movie.channel = slugify(qu.q('.update_date a', true), '');
|
||||
movie.entryId = qu.q('.dvd_details_overview .rating_box').dataset.id;
|
||||
movie.title = qu.q('.title_bar span', true);
|
||||
movie.covers = qu.urls('#dvd-cover-flip > a');
|
||||
movie.channel = slugify(qu.q('.update_date a', true), '');
|
||||
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQus = ctxa(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQus, site);
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQus = ctxa(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQus, site);
|
||||
|
||||
const curatedScenes = scenes
|
||||
const curatedScenes = scenes
|
||||
?.map(scene => ({ ...scene, movie }))
|
||||
.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
||||
|
||||
movie.date = curatedScenes?.[0].date;
|
||||
movie.date = curatedScenes?.[0].date;
|
||||
|
||||
return {
|
||||
...movie,
|
||||
...(curatedScenes && { scenes: curatedScenes }),
|
||||
};
|
||||
return {
|
||||
...movie,
|
||||
...(curatedScenes && { scenes: curatedScenes }),
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
const heightString = bio.match(/\d+ feet \d+ inches/);
|
||||
const ageString = bio.match(/Age:\s*(\d{2})/);
|
||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
if (measurementsString) {
|
||||
const [bust, waist, hip] = measurementsString[0].split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
if (avatarEl) {
|
||||
const avatarSources = [
|
||||
avatarEl.getAttribute('src0_3x'),
|
||||
avatarEl.getAttribute('src0_2x'),
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), el => el.href);
|
||||
|
||||
console.log(profile);
|
||||
console.log(profile);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
if (site.parameters?.upcoming === false) return null;
|
||||
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeUpcoming(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, preflight, include) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
|
||||
const resA = await bhttp.get(urlA);
|
||||
const resA = await bhttp.get(urlA);
|
||||
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
const resB = await bhttp.get(urlB);
|
||||
const resB = await bhttp.get(urlB);
|
||||
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
|
||||
return profile;
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,184 +7,184 @@ const moment = require('moment');
|
||||
const { feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
const siteMapByKey = {
|
||||
PF: 'pornfidelity',
|
||||
TF: 'teenfidelity',
|
||||
KM: 'kellymadison',
|
||||
PF: 'pornfidelity',
|
||||
TF: 'teenfidelity',
|
||||
KM: 'kellymadison',
|
||||
};
|
||||
|
||||
const siteMapBySlug = Object.entries(siteMapByKey).reduce((acc, [key, value]) => ({ ...acc, [value]: key }), {});
|
||||
|
||||
function extractTextNode(parentEl) {
|
||||
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||
return Array.from(parentEl).reduce((acc, el) => (el.nodeType === 3 ? `${acc}${el.textContent.trim()}` : acc), '');
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||
const release = { site };
|
||||
return Array.from(document.querySelectorAll('.episode'), (scene) => {
|
||||
const release = { site };
|
||||
|
||||
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||
release.shootId = scene.querySelector('.card-meta .text-right').textContent.trim();
|
||||
|
||||
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
const siteId = release.shootId.match(/\w{2}/)[0];
|
||||
const siteSlug = siteMapByKey[siteId];
|
||||
|
||||
if (site.slug !== siteSlug) {
|
||||
// using generic network overview, scene is not from the site we want
|
||||
return null;
|
||||
}
|
||||
if (site.slug !== siteSlug) {
|
||||
// using generic network overview, scene is not from the site we want
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationEl = scene.querySelector('.content a');
|
||||
const durationEl = scene.querySelector('.content a');
|
||||
|
||||
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||
[release.entryId] = durationEl.href.match(/\d+$/);
|
||||
release.url = `${site.url}/episodes/${release.entryId}`;
|
||||
|
||||
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||
release.title = scene.querySelector('h5 a').textContent.trim();
|
||||
|
||||
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
const dateEl = scene.querySelector('.card-meta .text-left').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
|
||||
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||
release.date = moment.utc(dateString, ['MMM D', 'MMM D, YYYY']).toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.models a'), el => el.textContent);
|
||||
|
||||
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||
const durationString = durationEl.textContent.match(/\d+ min/);
|
||||
if (durationString) release.duration = Number(durationString[0].match(/\d+/)[0]) * 60;
|
||||
|
||||
release.poster = scene.querySelector('.card-img-top').dataset.src;
|
||||
release.teaser = {
|
||||
src: scene.querySelector('video').src,
|
||||
};
|
||||
release.poster = scene.querySelector('.card-img-top').dataset.src;
|
||||
release.teaser = {
|
||||
src: scene.querySelector('video').src,
|
||||
};
|
||||
|
||||
return release;
|
||||
}).filter(scene => scene);
|
||||
return release;
|
||||
}).filter(scene => scene);
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site, baseRelease) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, site };
|
||||
|
||||
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||
const titleString = extractTextNode(titleEl);
|
||||
const titleEl = document.querySelector('.card-header.row h4').childNodes;
|
||||
const titleString = extractTextNode(titleEl);
|
||||
|
||||
if (!baseRelease) [release.entryId] = url.match(/\d+/);
|
||||
if (!baseRelease) [release.entryId] = url.match(/\d+/);
|
||||
|
||||
release.title = titleString
|
||||
.replace('Trailer: ', '')
|
||||
.replace(/- \w+ #\d+$/, '')
|
||||
.trim();
|
||||
release.title = titleString
|
||||
.replace('Trailer: ', '')
|
||||
.replace(/- \w+ #\d+$/, '')
|
||||
.trim();
|
||||
|
||||
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||
release.channel = titleString.match(/\w+ #\d+$/)[0].match(/\w+/)[0].toLowerCase();
|
||||
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
const episode = titleString.match(/#\d+$/)[0];
|
||||
const siteKey = siteMapBySlug[release.channel];
|
||||
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||
release.shootId = `${siteKey} ${episode}`;
|
||||
release.description = document.querySelector('p.card-text').textContent.trim();
|
||||
|
||||
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
const dateEl = document.querySelector('.card-body h4.card-title:nth-child(3)').childNodes;
|
||||
const dateString = extractTextNode(dateEl);
|
||||
|
||||
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||
release.date = moment.utc(dateString, 'YYYY-MM-DD').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.card-body h4.card-title:nth-child(4) a'), el => el.textContent);
|
||||
|
||||
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||
const durationRaw = document.querySelector('.card-body h4.card-title:nth-child(1)').textContent;
|
||||
const durationString = durationRaw.match(/\d+:\d+/)[0];
|
||||
|
||||
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||
release.duration = moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||
const trailerStart = document.body.innerHTML.indexOf('player.updateSrc');
|
||||
const trailerString = document.body.innerHTML.slice(trailerStart, document.body.innerHTML.indexOf(');', trailerStart));
|
||||
|
||||
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||
const resolution = Number(res.match(/\d+/)[0]);
|
||||
const trailers = trailerString.match(/https:\/\/.*.mp4/g);
|
||||
const resolutions = trailerString.match(/res: '\d+'/g).map((res) => {
|
||||
const resolution = Number(res.match(/\d+/)[0]);
|
||||
|
||||
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||
});
|
||||
return resolution === 4000 ? 2160 : resolution; // 4k is not 4000 pixels high
|
||||
});
|
||||
|
||||
release.trailer = trailers.map((trailer, index) => ({
|
||||
src: trailer,
|
||||
quality: resolutions[index],
|
||||
}));
|
||||
release.trailer = trailers.map((trailer, index) => ({
|
||||
src: trailer,
|
||||
quality: resolutions[index],
|
||||
}));
|
||||
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
const posterPrefix = html.indexOf('poster:');
|
||||
const poster = html.slice(html.indexOf('http', posterPrefix), html.indexOf('.jpg', posterPrefix) + 4);
|
||||
|
||||
if (baseRelease?.poster) release.photos = [poster];
|
||||
else release.poster = poster;
|
||||
if (baseRelease?.poster) release.photos = [poster];
|
||||
else release.poster = poster;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const profile = { name: actorName };
|
||||
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
const bioKeys = Array.from(document.querySelectorAll('table.table td:nth-child(1)'), el => el.textContent.slice(0, -1));
|
||||
const bioValues = Array.from(document.querySelectorAll('table.table td:nth-child(2)'), el => el.textContent);
|
||||
const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {});
|
||||
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
if (bio.Measurements) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio.Birthplace) profile.birthPlace = bio.Birthplace;
|
||||
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
if (bio.Height) {
|
||||
const [feet, inches] = bio.Height.match(/\d+/g);
|
||||
profile.height = feetInchesToCm(feet, inches);
|
||||
}
|
||||
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity;
|
||||
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
const avatarEl = Array.from(document.querySelectorAll('img')).find(photo => photo.src.match('model'));
|
||||
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const url = `https://kellymadison.com/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await bhttp.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||
return scrapeLatest(res.body.html, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.status === 'success') {
|
||||
return scrapeLatest(res.body.html, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { pathname } = new URL(url);
|
||||
const { pathname } = new URL(url);
|
||||
|
||||
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const res = await bhttp.get(`https://www.kellymadison.com${pathname}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, baseRelease);
|
||||
return scrapeScene(res.body.toString(), url, site, baseRelease);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const res = await bhttp.get(`https://www.kellymadison.com/models/${actorSlug}`, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,116 +5,116 @@ const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.shoot-list .shoot').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.shoot-list .shoot').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text().trim();
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.shoot-thumb-title a');
|
||||
const href = sceneLinkElement.attr('href');
|
||||
const url = `https://kink.com${href}`;
|
||||
const shootId = href.split('/')[2];
|
||||
const title = sceneLinkElement.text().trim();
|
||||
|
||||
const poster = $(element).find('.adimage').attr('src');
|
||||
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
|
||||
const poster = $(element).find('.adimage').attr('src');
|
||||
const photos = $(element).find('.rollover .roll-image').map((photoIndex, photoElement) => $(photoElement).attr('data-imagesrc')).toArray();
|
||||
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
|
||||
const date = moment.utc($(element).find('.date').text(), 'MMM DD, YYYY').toDate();
|
||||
const actors = $(element).find('.shoot-thumb-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const stars = $(element).find('.average-rating').attr('data-rating') / 10;
|
||||
|
||||
const timestamp = $(element).find('.video span').text();
|
||||
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
|
||||
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
|
||||
const timestamp = $(element).find('.video span').text();
|
||||
const timestampComponents = timestamp.split(':'); // fix mixed hh:mm:ss and mm:ss format
|
||||
const duration = moment.duration(timestampComponents.length > 2 ? timestamp : `0:${timestamp}`).asSeconds();
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
photos,
|
||||
poster,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
duration,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
photos,
|
||||
poster,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
duration,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, shootId, ratingRes, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
|
||||
const actorsRaw = $('.shoot-info p.starring');
|
||||
// const title = $('h1.shoot-title').text().replace(/\ue800/, ''); // fallback, special character is 'like'-heart
|
||||
const title = $('h1.shoot-title span.favorite-button').attr('data-title');
|
||||
const actorsRaw = $('.shoot-info p.starring');
|
||||
|
||||
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
|
||||
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
|
||||
const trailerPoster = $('.player video#kink-player').attr('poster');
|
||||
const photos = $('.gallery .thumb img').map((photoIndex, photoElement) => $(photoElement).attr('data-image-file')).toArray();
|
||||
const trailerVideo = $('.player span[data-type="trailer-src"]').attr('data-url');
|
||||
const trailerPoster = $('.player video#kink-player').attr('poster');
|
||||
|
||||
const date = moment.utc($(actorsRaw)
|
||||
.prev()
|
||||
.text()
|
||||
.trim()
|
||||
.replace('Date: ', ''),
|
||||
'MMMM DD, YYYY')
|
||||
.toDate();
|
||||
const date = moment.utc($(actorsRaw)
|
||||
.prev()
|
||||
.text()
|
||||
.trim()
|
||||
.replace('Date: ', ''),
|
||||
'MMMM DD, YYYY')
|
||||
.toDate();
|
||||
|
||||
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.shoot-info .description').text().trim();
|
||||
const actors = $(actorsRaw).find('span.names a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const description = $('.shoot-info .description').text().trim();
|
||||
|
||||
const { average: stars } = ratingRes.body;
|
||||
const { average: stars } = ratingRes.body;
|
||||
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
const siteName = $('.shoot-logo a').attr('href').split('/')[2];
|
||||
const siteSlug = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const channel = siteSlug;
|
||||
const tags = $('.tag-list > a[href*="/tag"]').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const channel = siteSlug;
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
photos,
|
||||
poster: trailerPoster,
|
||||
trailer: {
|
||||
src: trailerVideo,
|
||||
quality: 480,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId: shootId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
description,
|
||||
photos,
|
||||
poster: trailerPoster,
|
||||
trailer: {
|
||||
src: trailerVideo,
|
||||
quality: 480,
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/latest/page/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
const shootId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
const [res, ratingRes] = await Promise.all([
|
||||
bhttp.get(url),
|
||||
bhttp.get(`https://kink.com/api/ratings/${shootId}`),
|
||||
]);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
return scrapeScene(res.body.toString(), url, shootId, ratingRes, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,201 +8,201 @@ const moment = require('moment');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OT)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
return { shootId, title };
|
||||
return { shootId, title };
|
||||
}
|
||||
|
||||
function getPoster(posterElement, sceneId) {
|
||||
const posterStyle = posterElement.attr('style');
|
||||
const posterStyle = posterElement.attr('style');
|
||||
|
||||
if (posterStyle) {
|
||||
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
}
|
||||
if (posterStyle) {
|
||||
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
}
|
||||
|
||||
const posterRange = posterElement.attr('data-casting');
|
||||
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||
const posterRange = posterElement.attr('data-casting');
|
||||
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||
|
||||
if (!posterTimeRange) {
|
||||
return null;
|
||||
}
|
||||
if (!posterTimeRange) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof posterTimeRange === 'number') {
|
||||
// poster time is already a single time value
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||
}
|
||||
if (typeof posterTimeRange === 'number') {
|
||||
// poster time is already a single time value
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||
}
|
||||
|
||||
const [max, min] = posterTimeRange.split('-');
|
||||
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||
const [max, min] = posterTimeRange.split('-');
|
||||
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thumbnail-title a');
|
||||
const url = sceneLinkElement.attr('href');
|
||||
|
||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const entryId = new URL(url).pathname.split('/')[2];
|
||||
const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const entryId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
const sceneId = $(element).attr('data-content');
|
||||
const posterElement = $(element).find('.thumbnail-avatar');
|
||||
const sceneId = $(element).attr('data-content');
|
||||
const posterElement = $(element).find('.thumbnail-avatar');
|
||||
|
||||
const poster = getPoster(posterElement, sceneId);
|
||||
const poster = getPoster(posterElement, sceneId);
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
poster,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
poster,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site, useGallery) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new WatchPage")').html();
|
||||
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
|
||||
const data = playerData && JSON.parse(playerData);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new WatchPage")').html();
|
||||
const playerData = playerObject && playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.lastIndexOf('},') + 1);
|
||||
const data = playerData && JSON.parse(playerData);
|
||||
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
|
||||
release.shootId = shootId;
|
||||
release.entryId = new URL(url).pathname.split('/')[2];
|
||||
release.shootId = shootId;
|
||||
release.entryId = new URL(url).pathname.split('/')[2];
|
||||
|
||||
release.title = title;
|
||||
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
release.title = title;
|
||||
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
|
||||
|
||||
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
|
||||
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
|
||||
|
||||
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|
||||
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|
||||
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
|
||||
|
||||
release.actors = $(actorsElement)
|
||||
.find('a[href*="com/model"]')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.actors = $(actorsElement)
|
||||
.find('a[href*="com/model"]')
|
||||
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
const photos = useGallery
|
||||
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||
const photos = useGallery
|
||||
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||
|
||||
release.photos = photos.map((source) => {
|
||||
// source without parameters sometimes serves larger preview photo
|
||||
const { origin, pathname } = new URL(source);
|
||||
release.photos = photos.map((source) => {
|
||||
// source without parameters sometimes serves larger preview photo
|
||||
const { origin, pathname } = new URL(source);
|
||||
|
||||
return `${origin}${pathname}`;
|
||||
return `${origin}${pathname}`;
|
||||
|
||||
/* disable thumbnail as fallback, usually enough high res photos available
|
||||
/* disable thumbnail as fallback, usually enough high res photos available
|
||||
return [
|
||||
`${origin}${pathname}`,
|
||||
source,
|
||||
];
|
||||
*/
|
||||
});
|
||||
});
|
||||
|
||||
const posterStyle = $('#player').attr('style');
|
||||
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
const posterStyle = $('#player').attr('style');
|
||||
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
|
||||
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
|
||||
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
|
||||
|
||||
if (data) {
|
||||
const qualityMap = {
|
||||
web: 240,
|
||||
vga: 480,
|
||||
hd: 720,
|
||||
'1080p': 1080,
|
||||
};
|
||||
if (data) {
|
||||
const qualityMap = {
|
||||
web: 240,
|
||||
vga: 480,
|
||||
hd: 720,
|
||||
'1080p': 1080,
|
||||
};
|
||||
|
||||
release.trailer = data.clip.qualities.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: qualityMap[trailer.quality] || trailer.quality,
|
||||
}));
|
||||
}
|
||||
release.trailer = data.clip.qualities.map(trailer => ({
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: qualityMap[trailer.quality] || trailer.quality,
|
||||
}));
|
||||
}
|
||||
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = slugify(studioName, '');
|
||||
const studioName = $('.watchpage-studioname').first().text().trim();
|
||||
release.studio = slugify(studioName, '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
const avatarEl = document.querySelector('.model--avatar img[src^="http"]');
|
||||
const entries = Array.from(document.querySelectorAll('.model--description tr'), el => el.textContent.replace(/\n/g, '').split(':'));
|
||||
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
const bio = entries
|
||||
.filter(entry => entry.length === 2) // ignore entries without ':' (About section, see Blanche Bradburry)
|
||||
.reduce((acc, [key, value]) => ({ ...acc, [key.trim()]: value.trim() }), {});
|
||||
|
||||
profile.birthPlace = bio.Nationality;
|
||||
profile.birthPlace = bio.Nationality;
|
||||
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
if (bio.Age) profile.age = bio.Age;
|
||||
if (avatarEl) profile.avatar = avatarEl.src;
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
const res = await bhttp.get(`${site.url}/new-videos/${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const useGallery = true;
|
||||
const useGallery = true;
|
||||
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
// TODO: fall back on screenshots when gallery is not available
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
|
||||
const data = res.body;
|
||||
const res = await bhttp.get(`https://www.legalporno.com/api/autocomplete/search?q=${actorName.replace(' ', '+')}`);
|
||||
const data = res.body;
|
||||
|
||||
const result = data.terms.find(item => item.type === 'model');
|
||||
const result = data.terms.find(item => item.type === 'model');
|
||||
|
||||
if (result) {
|
||||
const bioRes = await bhttp.get(result.url);
|
||||
const html = bioRes.body.toString();
|
||||
if (result) {
|
||||
const bioRes = await bhttp.get(result.url);
|
||||
const html = bioRes.body.toString();
|
||||
|
||||
return scrapeProfile(html, result.url, actorName);
|
||||
}
|
||||
return scrapeProfile(html, result.url, actorName);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'men', 'modelprofile');
|
||||
return fetchProfile(actorName, 'men', 'modelprofile');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'devianthardcore');
|
||||
return fetchProfile(actorName, 'devianthardcore');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,232 +8,232 @@ const moment = require('moment');
|
||||
const { get } = require('../utils/http');
|
||||
|
||||
const descriptionTags = {
|
||||
'anal cream pie': 'anal creampie',
|
||||
'ass to mouth': 'ass to mouth',
|
||||
'cream pie in her ass': 'anal creampie',
|
||||
'eats ass': 'ass eating',
|
||||
facial: 'facial',
|
||||
gaped: 'gaping',
|
||||
gapes: 'gaping',
|
||||
gape: 'gaping',
|
||||
'rectal cream pie': 'anal creampie',
|
||||
rimming: 'ass eating',
|
||||
'anal cream pie': 'anal creampie',
|
||||
'ass to mouth': 'ass to mouth',
|
||||
'cream pie in her ass': 'anal creampie',
|
||||
'eats ass': 'ass eating',
|
||||
facial: 'facial',
|
||||
gaped: 'gaping',
|
||||
gapes: 'gaping',
|
||||
gape: 'gaping',
|
||||
'rectal cream pie': 'anal creampie',
|
||||
rimming: 'ass eating',
|
||||
};
|
||||
|
||||
function deriveTagsFromDescription(description) {
|
||||
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
|
||||
const matches = (description || '').toLowerCase().match(new RegExp(Object.keys(descriptionTags).join('|'), 'g'));
|
||||
|
||||
return matches
|
||||
? matches.map(match => descriptionTags[match])
|
||||
: [];
|
||||
return matches
|
||||
? matches.map(match => descriptionTags[match])
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-item-large, .content-item');
|
||||
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
|
||||
const titleElement = element.querySelector('h3.title a');
|
||||
const title = titleElement.textContent;
|
||||
const url = titleElement.href;
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const titleElement = element.querySelector('h3.title a');
|
||||
const title = titleElement.textContent;
|
||||
const url = titleElement.href;
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
|
||||
const descriptionElement = element.querySelector('.desc');
|
||||
const description = descriptionElement && descriptionElement.textContent.trim();
|
||||
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
|
||||
const descriptionElement = element.querySelector('.desc');
|
||||
const description = descriptionElement && descriptionElement.textContent.trim();
|
||||
const date = moment(element.querySelector('.date, time').textContent, 'Do MMM YYYY').toDate();
|
||||
|
||||
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
|
||||
const actors = Array.from(element.querySelectorAll('h4.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const ratingElement = element.querySelector('.rating');
|
||||
const stars = ratingElement && ratingElement.dataset.rating;
|
||||
const ratingElement = element.querySelector('.rating');
|
||||
const stars = ratingElement && ratingElement.dataset.rating;
|
||||
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-top, .thumb-bottom')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
const photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
}));
|
||||
return scene;
|
||||
}));
|
||||
}
|
||||
|
||||
async function scrapeLatestB(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-border');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const sceneElements = document.querySelectorAll('.content-border');
|
||||
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
const release = {
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
return Promise.all(Array.from(sceneElements, async (element) => {
|
||||
const $ = cheerio.load(element.innerHTML, { normalizeWhitespace: true });
|
||||
const release = {
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
|
||||
const titleElement = element.querySelector('.content-title-wrap a');
|
||||
release.title = titleElement.title || titleElement.textContent.trim();
|
||||
release.url = titleElement.href;
|
||||
release.entryId = release.url.split('/').slice(-2)[0];
|
||||
const titleElement = element.querySelector('.content-title-wrap a');
|
||||
release.title = titleElement.title || titleElement.textContent.trim();
|
||||
release.url = titleElement.href;
|
||||
release.entryId = release.url.split('/').slice(-2)[0];
|
||||
|
||||
release.description = element.querySelector('.content-description').textContent.trim();
|
||||
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|
||||
release.description = element.querySelector('.content-description').textContent.trim();
|
||||
release.date = (moment(element.querySelector('.mobile-date').textContent, 'MM/DD/YYYY')
|
||||
|| moment(element.querySelector('.date').textContent, 'Do MMM YYYY')).toDate();
|
||||
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
|
||||
release.actors = Array.from(element.querySelectorAll('.content-models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-mouseover')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
const [poster, ...primaryPhotos] = Array.from(element.querySelectorAll('a img'), imageElement => imageElement.src);
|
||||
const secondaryPhotos = $('.thumb-mouseover')
|
||||
.map((photoIndex, photoElement) => $(photoElement).css()['background-image'])
|
||||
.toArray()
|
||||
.map(photoUrl => photoUrl.slice(photoUrl.indexOf('http'), photoUrl.indexOf('.jpg') + 4));
|
||||
|
||||
release.poster = poster;
|
||||
release.photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
release.poster = poster;
|
||||
release.photos = [...primaryPhotos, ...secondaryPhotos];
|
||||
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
return release;
|
||||
}));
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
return release;
|
||||
}));
|
||||
}
|
||||
|
||||
async function scrapeSceneA(html, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const release = {
|
||||
url,
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const release = {
|
||||
url,
|
||||
director: 'Mike Adriano',
|
||||
};
|
||||
|
||||
release.entryId = url.split('/').slice(-2)[0];
|
||||
release.title = element.querySelector('.title').textContent.trim();
|
||||
release.description = element.querySelector('.desc').textContent.trim();
|
||||
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
release.entryId = url.split('/').slice(-2)[0];
|
||||
release.title = element.querySelector('.title').textContent.trim();
|
||||
release.description = element.querySelector('.desc').textContent.trim();
|
||||
release.date = moment(element.querySelector('.post-date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
|
||||
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
release.actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is sometimes 00:00, sometimes 0:00:00
|
||||
release.duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const { poster } = document.querySelector('.content-page-header video');
|
||||
const { src, type } = document.querySelector('.content-page-header source');
|
||||
const { poster } = document.querySelector('.content-page-header video');
|
||||
const { src, type } = document.querySelector('.content-page-header source');
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src, type };
|
||||
release.poster = poster;
|
||||
release.trailer = { src, type };
|
||||
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
release.tags = deriveTagsFromDescription(release.description);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeSceneB(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
const { document } = new JSDOM(html).window;
|
||||
const element = document.querySelector('.content-page-info');
|
||||
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const title = element.querySelector('.title').textContent.trim();
|
||||
const description = element.querySelector('.desc').textContent.trim();
|
||||
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
const entryId = url.split('/').slice(-2)[0];
|
||||
const title = element.querySelector('.title').textContent.trim();
|
||||
const description = element.querySelector('.desc').textContent.trim();
|
||||
const date = moment(element.querySelector('.date').textContent.trim(), 'Do MMM YYYY').toDate();
|
||||
|
||||
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
const actors = Array.from(element.querySelectorAll('.models a'), actorElement => actorElement.textContent);
|
||||
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
const durationString = element.querySelector('.total-time').textContent.trim();
|
||||
// timestamp is somethines 00:00, sometimes 0:00:00
|
||||
const duration = durationString.split(':').length === 3
|
||||
? moment.duration(durationString).asSeconds()
|
||||
: moment.duration(`00:${durationString}`).asSeconds();
|
||||
|
||||
const { poster } = document.querySelector('.content-page-header-inner video');
|
||||
const { src, type } = document.querySelector('.content-page-header-inner source');
|
||||
const { poster } = document.querySelector('.content-page-header-inner video');
|
||||
const { src, type } = document.querySelector('.content-page-header-inner source');
|
||||
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
const tags = deriveTagsFromDescription(description);
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
trailer: {
|
||||
src,
|
||||
type,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
director: 'Mike Adriano',
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
trailer: {
|
||||
src,
|
||||
type,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
return scene;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const { host } = new URL(site.url);
|
||||
const url = `https://tour.${host}/videos?page=${page}`;
|
||||
const { host } = new URL(site.url);
|
||||
const url = `https://tour.${host}/videos?page=${page}`;
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeLatestA(res.html, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeLatestA(res.html, site);
|
||||
}
|
||||
|
||||
return scrapeLatestB(res.html, site);
|
||||
}
|
||||
return scrapeLatestB(res.html, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const { host } = new URL(site.url);
|
||||
const res = await get(url);
|
||||
const { host } = new URL(site.url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeSceneA(res.body.toString(), url, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
if (host === 'trueanal.com' || host === 'swallowed.com') {
|
||||
return scrapeSceneA(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return scrapeSceneB(res.body.toString(), url, site);
|
||||
}
|
||||
return scrapeSceneB(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'milehighmedia');
|
||||
return fetchProfile(actorName, 'milehighmedia');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -13,257 +13,257 @@ const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||
const { cookieToData } = require('../utils/cookies');
|
||||
|
||||
function getThumbs(scene) {
|
||||
if (scene.images.poster) {
|
||||
return scene.images.poster.map(image => image.xl.url);
|
||||
}
|
||||
if (scene.images.poster) {
|
||||
return scene.images.poster.map(image => image.xl.url);
|
||||
}
|
||||
|
||||
if (scene.images.card_main_rect) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map(image => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
if (scene.images.card_main_rect) {
|
||||
return scene.images.card_main_rect
|
||||
.concat(scene.images.card_secondary_rect || [])
|
||||
.map(image => image.xl.url.replace('.thumb', ''));
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeLatestX(data, site) {
|
||||
if (site.parameters?.extract === true && data.collections.length > 0) {
|
||||
// release should not belong to any channel
|
||||
return null;
|
||||
}
|
||||
if (site.parameters?.extract === true && data.collections.length > 0) {
|
||||
// release should not belong to any channel
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
|
||||
// release should belong to specific channel
|
||||
return null;
|
||||
}
|
||||
if (typeof site.parameters?.extract === 'string' && !data.collections.some(collection => collection.shortName === site.parameters.extract)) {
|
||||
// release should belong to specific channel
|
||||
return null;
|
||||
}
|
||||
|
||||
const release = {
|
||||
entryId: data.id,
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
};
|
||||
const release = {
|
||||
entryId: data.id,
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
};
|
||||
|
||||
const hostname = site.parameters?.native ? site.url : site.network.url;
|
||||
const hostname = site.parameters?.native ? site.url : site.network.url;
|
||||
|
||||
release.url = `${hostname}/scene/${release.entryId}/`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
release.url = `${hostname}/scene/${release.entryId}/`;
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
|
||||
release.duration = data.videos.mediabook?.length;
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
release.duration = data.videos.mediabook?.length;
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function scrapeLatest(items, site) {
|
||||
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
|
||||
const latestReleases = await Promise.all(items.map(async data => scrapeLatestX(data, site)));
|
||||
|
||||
return latestReleases.filter(Boolean);
|
||||
return latestReleases.filter(Boolean);
|
||||
}
|
||||
|
||||
function scrapeScene(data, url, _site, networkName) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const { id: entryId, title, description } = data;
|
||||
const { id: entryId, title, description } = data;
|
||||
|
||||
release.entryId = data.id;
|
||||
release.title = title;
|
||||
release.description = description;
|
||||
release.entryId = data.id;
|
||||
release.title = title;
|
||||
release.description = description;
|
||||
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
release.date = new Date(data.dateReleased);
|
||||
release.actors = data.actors.map(actor => ({ name: actor.name, gender: actor.gender }));
|
||||
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
release.tags = data.tags.map(tag => tag.name);
|
||||
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
[release.poster, ...release.photos] = getThumbs(data);
|
||||
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
const teaserSources = data.videos.mediabook?.files;
|
||||
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
if (teaserSources) {
|
||||
release.teaser = Object.values(teaserSources).map(teaser => ({
|
||||
src: teaser.urls.view,
|
||||
quality: parseInt(teaser.format, 10),
|
||||
}));
|
||||
}
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getUrl(site) {
|
||||
const { search } = new URL(site.url);
|
||||
const { search } = new URL(site.url);
|
||||
|
||||
if (search.match(/\?site=\d+/)) {
|
||||
return site.url;
|
||||
}
|
||||
if (search.match(/\?site=\d+/)) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
throw new Error(`Mind Geek site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(url) {
|
||||
const cookieJar = new CookieJar();
|
||||
const session = bhttp.session({ cookieJar });
|
||||
const cookieJar = new CookieJar();
|
||||
const session = bhttp.session({ cookieJar });
|
||||
|
||||
await session.get(url);
|
||||
await session.get(url);
|
||||
|
||||
const cookieString = await cookieJar.getCookieStringAsync(url);
|
||||
const { instance_token: instanceToken } = cookieToData(cookieString);
|
||||
const cookieString = await cookieJar.getCookieStringAsync(url);
|
||||
const { instance_token: instanceToken } = cookieToData(cookieString);
|
||||
|
||||
return { session, instanceToken };
|
||||
return { session, instanceToken };
|
||||
}
|
||||
|
||||
function scrapeProfile(data, html, releases = [], networkName) {
|
||||
const { qa, qd } = ex(html);
|
||||
const { qa, qd } = ex(html);
|
||||
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases,
|
||||
};
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases,
|
||||
};
|
||||
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
const [bust, waist, hip] = data.measurements.split('-');
|
||||
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
profile.gender = data.gender === 'other' ? 'transsexual' : data.gender;
|
||||
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
}
|
||||
if (profile.gender === 'female') {
|
||||
if (bust) profile.bust = bust.toUpperCase();
|
||||
if (waist) profile.waist = waist;
|
||||
if (hip) profile.hip = hip;
|
||||
}
|
||||
|
||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
||||
if (data.height) profile.height = inchesToCm(data.height);
|
||||
if (data.weight) profile.weight = lbsToKg(data.weight);
|
||||
if (data.birthPlace) profile.birthPlace = data.birthPlace;
|
||||
if (data.height) profile.height = inchesToCm(data.height);
|
||||
if (data.weight) profile.weight = lbsToKg(data.weight);
|
||||
|
||||
if (data.images.card_main_rect?.[0]) {
|
||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||
if (data.images.card_main_rect?.[0]) {
|
||||
profile.avatar = data.images.card_main_rect[0].xl?.url
|
||||
|| data.images.card_main_rect[0].lg?.url
|
||||
|| data.images.card_main_rect[0].md?.url
|
||||
|| data.images.card_main_rect[0].sm?.url
|
||||
|| data.images.card_main_rect[0].xs?.url;
|
||||
}
|
||||
}
|
||||
|
||||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
const birthdate = qa('li').find(el => /Date of Birth/.test(el.textContent));
|
||||
if (birthdate) profile.birthdate = qd(birthdate, 'span', 'MMMM Do, YYYY');
|
||||
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
profile.releases = releases.map(release => scrapeScene(release, null, null, networkName));
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = getUrl(site);
|
||||
const { search } = new URL(url);
|
||||
const siteId = new URLSearchParams(search).get('site');
|
||||
const url = getUrl(site);
|
||||
const { search } = new URL(url);
|
||||
const siteId = new URLSearchParams(search).get('site');
|
||||
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await session.get(apiUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
const res = await session.get(apiUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const entryId = url.match(/\d+/)[0];
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const entryId = url.match(/\d+/)[0];
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, networkName, actorPath = 'model') {
|
||||
const url = `https://www.${networkName}.com`;
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
const url = `https://www.${networkName}.com`;
|
||||
const { session, instanceToken } = await getSession(url);
|
||||
|
||||
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find(actor => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
if (actorData) {
|
||||
const actorUrl = `https://www.${networkName}.com/${actorPath}/${actorData.id}/`;
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
bhttp.get(actorUrl),
|
||||
session.get(actorReleasesUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||
bhttp.get(actorUrl),
|
||||
session.get(actorReleasesUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
|
||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||
}
|
||||
if (actorRes.statusCode === 200 && actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), actorReleasesRes.body.result, networkName);
|
||||
}
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorData, actorRes.body.toString(), null, networkName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'mofos');
|
||||
return fetchProfile(actorName, 'mofos');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -9,149 +9,149 @@ const slugify = require('../utils/slugify');
|
||||
const { ex, get } = require('../utils/q');
|
||||
|
||||
function titleExtractor(pathname) {
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
const entryId = components.slice(-1)[0];
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
const entryId = components.slice(-1)[0];
|
||||
|
||||
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
|
||||
const title = components.slice(0, -1).reduce((accTitle, word, index) => `${accTitle}${index > 0 ? ' ' : ''}${word.slice(0, 1).toUpperCase()}${word.slice(1)}`, '');
|
||||
|
||||
return { title, entryId };
|
||||
return { title, entryId };
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.site-list .scene-item').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.site-list .scene-item').toArray();
|
||||
|
||||
return sceneElements.map((item) => {
|
||||
const element = $(item);
|
||||
return sceneElements.map((item) => {
|
||||
const element = $(item);
|
||||
|
||||
const sceneLinkElement = element.find('a').first();
|
||||
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
|
||||
const url = `${protocol}//${hostname}${pathname}`;
|
||||
const { title, entryId } = titleExtractor(pathname);
|
||||
const sceneLinkElement = element.find('a').first();
|
||||
const { protocol, hostname, pathname } = new URL(sceneLinkElement.attr('href'));
|
||||
const url = `${protocol}//${hostname}${pathname}`;
|
||||
const { title, entryId } = titleExtractor(pathname);
|
||||
|
||||
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const date = moment.utc(element.find('.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = element.find('.contain-actors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
|
||||
const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60;
|
||||
|
||||
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
|
||||
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
|
||||
const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src');
|
||||
const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
rating: null,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElement = $('.scene-info');
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElement = $('.scene-info');
|
||||
|
||||
const { protocol, hostname, pathname } = new URL(url);
|
||||
const originalUrl = `${protocol}//${hostname}${pathname}`;
|
||||
const { protocol, hostname, pathname } = new URL(url);
|
||||
const originalUrl = `${protocol}//${hostname}${pathname}`;
|
||||
|
||||
const entryId = originalUrl.split('-').slice(-1)[0];
|
||||
const title = sceneElement.find('h1.scene-title.grey-text').text();
|
||||
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
|
||||
const entryId = originalUrl.split('-').slice(-1)[0];
|
||||
const title = sceneElement.find('h1.scene-title.grey-text').text();
|
||||
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
|
||||
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
|
||||
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
|
||||
|
||||
const poster = `https:${$('video, dl8-video').attr('poster')}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
|
||||
const poster = `https:${$('video, dl8-video').attr('poster')}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
|
||||
|
||||
const trailerEl = $('source');
|
||||
const trailerSrc = trailerEl.attr('src');
|
||||
const trailerType = trailerEl.attr('type');
|
||||
const trailerEl = $('source');
|
||||
const trailerSrc = trailerEl.attr('src');
|
||||
const trailerType = trailerEl.attr('type');
|
||||
|
||||
const siteName = sceneElement.find('a.site-title').text();
|
||||
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
|
||||
const siteName = sceneElement.find('a.site-title').text();
|
||||
const channel = siteName.replace(/[\s']+/g, '').toLowerCase();
|
||||
|
||||
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
photos,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailerSrc,
|
||||
type: trailerType,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
tags,
|
||||
photos,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailerSrc,
|
||||
type: trailerType,
|
||||
},
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchActorReleases(url) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { qu } = ex(html);
|
||||
const profile = {};
|
||||
const { qu } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}?page=${page}`);
|
||||
const res = await bhttp.get(`${site.url}?page=${page}`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug = slugify(actorName);
|
||||
|
||||
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
|
||||
const res = await bhttp.get(`https://www.naughtyamerica.com/pornstar/${actorSlug}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString());
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,75 +3,75 @@
|
||||
const { geta, ed } = require('../utils/q');
|
||||
|
||||
function scrapeBlockLatest(scenes) {
|
||||
return scenes.map(({ html, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ html, qu }) => {
|
||||
const release = {};
|
||||
|
||||
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
||||
const entryId = qu.q('div[class*="videothumb"]', 'class').match(/videothumb_(\d+)/)
|
||||
|| qu.q('div[id*="videothumb"]', 'id').match(/videothumb_(\d+)/);
|
||||
|
||||
release.entryId = entryId[1];
|
||||
release.entryId = entryId[1];
|
||||
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
release.title = qu.q('h4 a', true);
|
||||
release.url = qu.url('h4 a');
|
||||
release.date = ed(html, 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
release.actors = qu.all('.tour_update_models a', true);
|
||||
|
||||
release.poster = qu.q('div img').dataset.src;
|
||||
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
||||
release.poster = qu.q('div img').dataset.src;
|
||||
release.photos = [qu.q('div img', 'src0_4x') || qu.q('div img', 'src0_3x') || qu.q('div img', 'src0_2x')];
|
||||
|
||||
release.teaser = qu.video();
|
||||
release.teaser = qu.video();
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeClassicLatest(scenes) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid;
|
||||
release.url = qu.url('a');
|
||||
release.entryId = el.dataset.setid;
|
||||
release.url = qu.url('a');
|
||||
|
||||
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
||||
release.title = qu.q('.update_title_small', true) || qu.q('a:nth-child(2)', true);
|
||||
|
||||
const description = qu.q('a', 'title');
|
||||
if (description) release.description = description;
|
||||
const description = qu.q('a', 'title');
|
||||
if (description) release.description = description;
|
||||
|
||||
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
||||
if (date) release.date = date;
|
||||
const date = qu.date('.date_small, .update_date', 'MM/DD/YYYY');
|
||||
if (date) release.date = date;
|
||||
|
||||
const durationLine = qu.q('.update_counts', true);
|
||||
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
||||
const durationLine = qu.q('.update_counts', true);
|
||||
if (durationLine) release.duration = Number(durationLine.match(/(\d+) min/i)[1]) * 60;
|
||||
|
||||
const actors = qu.all('.update_models a', true);
|
||||
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
||||
const actors = qu.all('.update_models a', true);
|
||||
release.actors = actors.length > 0 ? actors : qu.q('.update_models', true).split(/,\s*/);
|
||||
|
||||
const photoCount = qu.q('.update_thumb', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
||||
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
||||
const photoCount = qu.q('.update_thumb', 'cnt');
|
||||
[release.poster, ...release.photos] = Array.from({ length: photoCount })
|
||||
.map((value, index) => qu.q('.update_thumb', `src${index}_3x`)
|
||||
|| qu.q('.update_thumb', `src${index}_2x`)
|
||||
|| qu.q('.update_thumb', `src${index}_1x`));
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (!site.parameters) {
|
||||
return null;
|
||||
}
|
||||
if (!site.parameters) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
|
||||
const url = `${site.url}/tour_${site.parameters.siteId}/categories/movies_${page}_d.html`;
|
||||
const res = await geta(url, '.updatesBlock .movieBlock, .updatesBlock .videoBlock, .latest_updates_block .update_details, .category_listing_block .update_details');
|
||||
|
||||
if (res.ok && site.parameters.block) {
|
||||
return scrapeBlockLatest(res.items, site);
|
||||
}
|
||||
if (res.ok && site.parameters.block) {
|
||||
return scrapeBlockLatest(res.items, site);
|
||||
}
|
||||
|
||||
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
|
||||
return res.ok ? scrapeClassicLatest(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchLatest,
|
||||
};
|
||||
|
||||
@@ -5,161 +5,161 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
|
||||
const slugUrlMap = {
|
||||
nubiles: 'https://www.nubiles.net',
|
||||
nubilesporn: 'https://www.nubiles-porn.com',
|
||||
nubiles: 'https://www.nubiles.net',
|
||||
nubilesporn: 'https://www.nubiles-porn.com',
|
||||
};
|
||||
|
||||
async function getPhotos(albumUrl) {
|
||||
const res = await geta(albumUrl, '.photo-thumb');
|
||||
const res = await geta(albumUrl, '.photo-thumb');
|
||||
|
||||
return res.ok
|
||||
? res.items.map(({ q }) => q('source').srcset)
|
||||
: [];
|
||||
return res.ok
|
||||
? res.items.map(({ q }) => q('source').srcset)
|
||||
: [];
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
return scenes.map(({ qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = qu.q('.title a', true);
|
||||
release.title = qu.q('.title a', true);
|
||||
|
||||
const url = qu.url('.title a').split('?')[0];
|
||||
const channelUrl = qu.url('.site-link');
|
||||
const url = qu.url('.title a').split('?')[0];
|
||||
const channelUrl = qu.url('.site-link');
|
||||
|
||||
if (/^http/.test(url)) {
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.split('/')[3];
|
||||
if (/^http/.test(url)) {
|
||||
const { pathname } = new URL(url);
|
||||
release.entryId = pathname.split('/')[3];
|
||||
|
||||
if (channelUrl) release.url = `${channelUrl}${pathname}`;
|
||||
else release.url = url;
|
||||
} else if (!/\/join/.test(url)) {
|
||||
release.entryId = url.split('/')[3];
|
||||
if (channelUrl) release.url = `${channelUrl}${pathname}`;
|
||||
else release.url = url;
|
||||
} else if (!/\/join/.test(url)) {
|
||||
release.entryId = url.split('/')[3];
|
||||
|
||||
if (channelUrl) release.url = `${channelUrl}${url}`;
|
||||
else if (site?.url) release.url = `${site.url}${url}`;
|
||||
else if (origin) release.url = `${origin}${url}`;
|
||||
} else {
|
||||
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
|
||||
}
|
||||
if (channelUrl) release.url = `${channelUrl}${url}`;
|
||||
else if (site?.url) release.url = `${site.url}${url}`;
|
||||
else if (origin) release.url = `${origin}${url}`;
|
||||
} else {
|
||||
release.entryId = qu.q('a img', 'tube_tour_thumb_id');
|
||||
}
|
||||
|
||||
release.date = qu.date('.date', 'MMM D, YYYY');
|
||||
release.actors = qu.all('.models a.model', true);
|
||||
release.date = qu.date('.date', 'MMM D, YYYY');
|
||||
release.actors = qu.all('.models a.model', true);
|
||||
|
||||
const poster = qu.q('img').dataset.original;
|
||||
release.poster = [
|
||||
poster.replace('_640', '_1280'),
|
||||
poster,
|
||||
];
|
||||
const poster = qu.q('img').dataset.original;
|
||||
release.poster = [
|
||||
poster.replace('_640', '_1280'),
|
||||
poster,
|
||||
];
|
||||
|
||||
release.stars = Number(qu.q('.rating', true));
|
||||
release.likes = Number(qu.q('.likes', true));
|
||||
release.stars = Number(qu.q('.rating', true));
|
||||
release.likes = Number(qu.q('.likes', true));
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ qu }, url, site) {
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
const { origin, pathname } = new URL(url);
|
||||
release.url = `${origin}${pathname}`;
|
||||
const { origin, pathname } = new URL(url);
|
||||
release.url = `${origin}${pathname}`;
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[3];
|
||||
release.title = qu.q('.content-pane-title h2', true);
|
||||
release.description = qu.q('.content-pane-column div', true);
|
||||
release.entryId = new URL(url).pathname.split('/')[3];
|
||||
release.title = qu.q('.content-pane-title h2', true);
|
||||
release.description = qu.q('.content-pane-column div', true);
|
||||
|
||||
release.date = qu.q('.date', 'MMM D, YYYY');
|
||||
release.date = qu.q('.date', 'MMM D, YYYY');
|
||||
|
||||
release.actors = qu.all('.content-pane-performers .model', true);
|
||||
release.tags = qu.all('.categories a', true);
|
||||
release.actors = qu.all('.content-pane-performers .model', true);
|
||||
release.tags = qu.all('.categories a', true);
|
||||
|
||||
release.poster = qu.poster() || qu.img('.fake-video-player img');
|
||||
release.trailer = qu.all('source').map(source => ({
|
||||
src: source.src,
|
||||
quality: Number(source.getAttribute('res')),
|
||||
}));
|
||||
release.poster = qu.poster() || qu.img('.fake-video-player img');
|
||||
release.trailer = qu.all('source').map(source => ({
|
||||
src: source.src,
|
||||
quality: Number(source.getAttribute('res')),
|
||||
}));
|
||||
|
||||
release.stars = Number(qu.q('.score', true));
|
||||
release.likes = Number(qu.q('#likecount', true));
|
||||
release.stars = Number(qu.q('.score', true));
|
||||
release.likes = Number(qu.q('#likecount', true));
|
||||
|
||||
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
|
||||
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
||||
const albumLink = qu.url('.content-pane-related-links a[href*="gallery"]');
|
||||
if (albumLink) release.photos = await getPhotos(`${site.url}${albumLink}`);
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ qu }, _actorName, origin) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const keys = qu.all('.model-profile h5', true);
|
||||
const values = qu.all('.model-profile h5 + p', true);
|
||||
const keys = qu.all('.model-profile h5', true);
|
||||
const values = qu.all('.model-profile h5 + p', true);
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
profile.age = Number(bio.age);
|
||||
profile.description = qu.q('.model-bio', true);
|
||||
profile.age = Number(bio.age);
|
||||
profile.description = qu.q('.model-bio', true);
|
||||
|
||||
profile.residencePlace = bio.location;
|
||||
profile.residencePlace = bio.location;
|
||||
|
||||
profile.height = heightToCm(bio.height);
|
||||
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
||||
profile.height = heightToCm(bio.height);
|
||||
[profile.bust, profile.waist, profile.hip] = bio.figure.split('-').map(v => Number(v) || v);
|
||||
|
||||
profile.avatar = qu.img('.model-profile img');
|
||||
profile.avatar = qu.img('.model-profile img');
|
||||
|
||||
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
|
||||
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
||||
const releases = qu.all('.content-grid-item').filter(el => /video\//.test(qu.url(el, '.img-wrapper a'))); // filter out photos
|
||||
profile.releases = scrapeAll(ctxa(releases), null, origin);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
const url = `${site.url}/video/gallery/${(page - 1) * 12}`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
if (site.parameters?.upcoming) {
|
||||
const url = `${site.url}/video/upcoming`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
if (site.parameters?.upcoming) {
|
||||
const url = `${site.url}/video/upcoming`;
|
||||
const res = await geta(url, '.content-grid-item');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, siteSlug) {
|
||||
const firstLetter = actorName.charAt(0).toLowerCase();
|
||||
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
|
||||
const firstLetter = actorName.charAt(0).toLowerCase();
|
||||
const origin = slugUrlMap[siteSlug] || `https://www.${siteSlug}.com`;
|
||||
|
||||
const url = `${origin}/model/alpha/${firstLetter}`;
|
||||
const resModels = await get(url);
|
||||
const url = `${origin}/model/alpha/${firstLetter}`;
|
||||
const resModels = await get(url);
|
||||
|
||||
if (!resModels.ok) return resModels.status;
|
||||
if (!resModels.ok) return resModels.status;
|
||||
|
||||
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
||||
const modelPath = resModels.item.qu.all('.content-grid-item a.title').find(el => slugify(el.textContent) === slugify(actorName));
|
||||
|
||||
if (modelPath) {
|
||||
const modelUrl = `${origin}${modelPath}`;
|
||||
const resModel = await get(modelUrl);
|
||||
if (modelPath) {
|
||||
const modelUrl = `${origin}${modelPath}`;
|
||||
const resModel = await get(modelUrl);
|
||||
|
||||
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
|
||||
}
|
||||
return resModel.ok ? scrapeProfile(resModel.item, actorName, origin) : resModel.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -7,143 +7,143 @@ const knex = require('../knex');
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
|
||||
async function getSiteSlugs() {
|
||||
return knex('sites')
|
||||
.pluck('sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'perfectgonzo');
|
||||
return knex('sites')
|
||||
.pluck('sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'perfectgonzo');
|
||||
}
|
||||
|
||||
function getHash(identifier) {
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
||||
|
||||
hash.update(Buffer.from(identifier));
|
||||
hash.update(Buffer.from(identifier));
|
||||
|
||||
return hash.digest('hex');
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
function extractMaleModelsFromTags(tagContainer) {
|
||||
if (!tagContainer) {
|
||||
return [];
|
||||
}
|
||||
if (!tagContainer) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
||||
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
||||
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
||||
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
||||
|
||||
if (modelLabelIndex > -1) {
|
||||
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
||||
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
||||
if (modelLabelIndex > -1) {
|
||||
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
||||
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
||||
|
||||
return maleModels.map(model => model.text);
|
||||
}
|
||||
return maleModels.map(model => model.text);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||
|
||||
if (channelMatch) {
|
||||
return channelMatch[0];
|
||||
}
|
||||
if (channelMatch) {
|
||||
return channelMatch[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeLatest(html, site) {
|
||||
const siteSlugs = await getSiteSlugs();
|
||||
const { element } = ex(html);
|
||||
const siteSlugs = await getSiteSlugs();
|
||||
const { element } = ex(html);
|
||||
|
||||
return ctxa(element, '#content-main .itemm').map(({
|
||||
q, qa, qlength, qdate, qimages,
|
||||
}) => {
|
||||
const release = {
|
||||
site,
|
||||
meta: {
|
||||
siteSlugs,
|
||||
},
|
||||
};
|
||||
return ctxa(element, '#content-main .itemm').map(({
|
||||
q, qa, qlength, qdate, qimages,
|
||||
}) => {
|
||||
const release = {
|
||||
site,
|
||||
meta: {
|
||||
siteSlugs,
|
||||
},
|
||||
};
|
||||
|
||||
const sceneLink = q('a');
|
||||
const sceneLink = q('a');
|
||||
|
||||
release.title = sceneLink.title;
|
||||
release.url = `${site.url}${sceneLink.href}`;
|
||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
||||
release.title = sceneLink.title;
|
||||
release.url = `${site.url}${sceneLink.href}`;
|
||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
||||
|
||||
const slug = new URL(release.url).pathname.split('/')[2];
|
||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||
const slug = new URL(release.url).pathname.split('/')[2];
|
||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||
|
||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||
|
||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
||||
|
||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
||||
release.duration = qlength('.dropdown p:first-child');
|
||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
||||
release.duration = qlength('.dropdown p:first-child');
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, site, url, metaSiteSlugs) {
|
||||
const {
|
||||
q, qa, qlength, qdate, qposter, qtrailer,
|
||||
} = ex(html);
|
||||
const {
|
||||
q, qa, qlength, qdate, qposter, qtrailer,
|
||||
} = ex(html);
|
||||
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.title = q('#movie-header h2', true);
|
||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.title = q('#movie-header h2', true);
|
||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
release.description = q('.container .mg-md', true);
|
||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
||||
release.description = q('.container .mg-md', true);
|
||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
||||
|
||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
||||
release.tags = qa('.tag-container a', true);
|
||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
||||
release.tags = qa('.tag-container a', true);
|
||||
|
||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||
|
||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||
release.poster = qposter();
|
||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||
release.poster = qposter();
|
||||
|
||||
const trailer = qtrailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = qtrailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
||||
|
||||
if (release.channel) {
|
||||
const { pathname } = new URL(url);
|
||||
release.url = `https://${release.channel}.com${pathname}`;
|
||||
if (release.channel) {
|
||||
const { pathname } = new URL(url);
|
||||
release.url = `https://${release.channel}.com${pathname}`;
|
||||
|
||||
const slug = pathname.split('/')[2];
|
||||
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
||||
}
|
||||
const slug = pathname.split('/')[2];
|
||||
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
||||
}
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/movies/page-${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/movies/page-${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -6,135 +6,135 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
async function getTrailer(entryId) {
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
};
|
||||
}
|
||||
if (trailerRes.statusCode === 200) {
|
||||
return {
|
||||
poster: trailerRes.body.TrailerImg,
|
||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeLatestScene(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
const entryId = $('li').attr('id');
|
||||
const sceneLinkElement = $('#scene_title_border a');
|
||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
||||
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
||||
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
const poster = $('a:nth-child(2) > img').attr('src');
|
||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
||||
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
const stars = $('img[src*="/star.png"]')
|
||||
.toArray()
|
||||
.map(element => $(element).attr('src'))
|
||||
.length || 0;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const release = { url, site };
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
release.entryId = document.querySelector('input#set_ID').value;
|
||||
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
release.title = document.querySelector('title').textContent;
|
||||
release.description = document.querySelector('.player_data').textContent.trim();
|
||||
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
||||
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeFallbackLanding(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return document.querySelector('input#set_ID').value;
|
||||
return document.querySelector('input#set_ID').value;
|
||||
}
|
||||
|
||||
async function scrapeFallbackScene(html, entryId, url, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { url, entryId, site };
|
||||
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
||||
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
const { poster, trailer } = await getTrailer(release.entryId);
|
||||
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
release.poster = poster;
|
||||
release.trailer = { src: trailer };
|
||||
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = page === 1
|
||||
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
|
||||
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
const res = page === 1
|
||||
? await bhttp.get(`${site.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`)
|
||||
: await bhttp.get(`${site.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&websiteid=0&deviceview=browser&tourId=${site.parameters.tourId}`);
|
||||
const elements = JSON.parse(res.body.toString());
|
||||
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, site)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
||||
|
||||
return latest;
|
||||
return latest;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isFallback) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
if (res.statusCode === 200) {
|
||||
if (site.isNetwork) {
|
||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
||||
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
||||
setId: entryId,
|
||||
});
|
||||
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,56 +5,56 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const ethnicityMap = {
|
||||
White: 'Caucasian',
|
||||
White: 'Caucasian',
|
||||
};
|
||||
|
||||
const hairMap = {
|
||||
Brunette: 'brown',
|
||||
Brunette: 'brown',
|
||||
};
|
||||
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
|
||||
const entries = Array.from(document.querySelectorAll('.infoPiece'), el => el.textContent.replace(/\n|\t/g, '').split(':'));
|
||||
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
const profile = {
|
||||
name: actorName,
|
||||
};
|
||||
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
|
||||
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
|
||||
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
||||
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
|
||||
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
|
||||
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
if (descriptionString) profile.description = descriptionString.textContent;
|
||||
|
||||
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
|
||||
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
|
||||
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
|
||||
profile.residencePlace = bio['City and Country'];
|
||||
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
|
||||
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
|
||||
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
|
||||
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
|
||||
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
|
||||
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
|
||||
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
|
||||
|
||||
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
|
||||
profile.social = Array.from(document.querySelectorAll('.socialList a'), el => el.href).filter(link => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
/* Model pages are not reliably associated with actual porn stars
|
||||
const modelUrl = `https://pornhub.com/model/${actorSlug}`;
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
|
||||
@@ -74,12 +74,12 @@ async function fetchProfile(actorName) {
|
||||
}
|
||||
*/
|
||||
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
||||
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -9,193 +9,193 @@ const { get, geta } = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
async function getPhotos(entryId, site) {
|
||||
const { hostname } = new URL(site.url);
|
||||
const { hostname } = new URL(site.url);
|
||||
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||
const html = res.body.toString();
|
||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||
const html = res.body.toString();
|
||||
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = $('a.fakethumb').map((photoIndex, photoElement) => $(photoElement).attr('data-src') || $(photoElement).attr('href')).toArray();
|
||||
|
||||
return photos;
|
||||
return photos;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.content-wrapper .scene').toArray();
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.content-wrapper .scene').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('h3 a');
|
||||
const thumbnailElement = $(element).find('a img');
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('h3 a');
|
||||
const thumbnailElement = $(element).find('a img');
|
||||
|
||||
const url = sceneLinkElement.attr('href');
|
||||
// const title = sceneLinkElement.text();
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
const url = sceneLinkElement.attr('href');
|
||||
// const title = sceneLinkElement.text();
|
||||
const entryId = url.split('/').slice(-1)[0];
|
||||
|
||||
const titleText = thumbnailElement.attr('alt');
|
||||
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
|
||||
const titleText = thumbnailElement.attr('alt');
|
||||
const title = titleText.slice(titleText.indexOf(':') + 1).trim();
|
||||
|
||||
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
|
||||
const date = moment.utc($(element).find('.scene-date'), ['MM/DD/YYYY', 'YYYY-MM-DD']).toDate();
|
||||
|
||||
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const likes = Number($(element).find('.scene-votes').text());
|
||||
const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const likes = Number($(element).find('.scene-votes').text());
|
||||
|
||||
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
|
||||
const poster = thumbnailElement.attr('src');
|
||||
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
|
||||
const photoCount = Number(thumbnailElement.attr('thumbs_num'));
|
||||
const poster = thumbnailElement.attr('src');
|
||||
const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`));
|
||||
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
const scene = {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
poster,
|
||||
photos,
|
||||
rating: {
|
||||
likes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
|
||||
return scene;
|
||||
});
|
||||
return scene;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { url };
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { url };
|
||||
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
|
||||
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
|
||||
[release.entryId] = url.split('/').slice(-1);
|
||||
release.title = $('.video-wrapper meta[itemprop="name"]').attr('content');
|
||||
release.description = $('.video-wrapper meta[itemprop="description"]').attr('content');
|
||||
|
||||
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
|
||||
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.date = moment.utc($('.video-wrapper meta[itemprop="uploadDate"]').attr('content'), 'MM/DD/YYYY').toDate();
|
||||
release.actors = $('.content-wrapper .scene-models-list a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
|
||||
const timestamp = $('.video-wrapper meta[itemprop="duration"]').attr('content');
|
||||
|
||||
if (timestamp) {
|
||||
const [minutes, seconds] = timestamp.match(/\d+/g);
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
}
|
||||
if (timestamp) {
|
||||
const [minutes, seconds] = timestamp.match(/\d+/g);
|
||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
||||
}
|
||||
|
||||
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.likes = Number($('.content-desc #social-actions #likes').text());
|
||||
release.tags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.likes = Number($('.content-desc #social-actions #likes').text());
|
||||
|
||||
const posterScript = $('script:contains(poster)').html();
|
||||
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
|
||||
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
|
||||
const posterScript = $('script:contains(poster)').html();
|
||||
const posterLink = posterScript?.slice(posterScript.indexOf('https://'), posterScript.indexOf('.jpg') + 4);
|
||||
release.poster = $('meta[property="og:image"]').attr('content') || posterLink || $('#trailer_player_finished img').attr('src');
|
||||
|
||||
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
|
||||
const trailer = $('meta[property="og:video"]').attr('content') || $('#videojs-trailer source').attr('src');
|
||||
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
release.photos = await getPhotos(release.entryId, site);
|
||||
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
|
||||
release.photos = await getPhotos(release.entryId, site);
|
||||
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
|
||||
|
||||
const siteElement = $('.content-wrapper .logos-sites a');
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), '');
|
||||
const siteElement = $('.content-wrapper .logos-sites a');
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ html, q, qa, qtx }) {
|
||||
const profile = {};
|
||||
const profile = {};
|
||||
|
||||
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
|
||||
const [key, value] = fact.split(':');
|
||||
const trimmedValue = value.trim();
|
||||
const bio = qa('.model-facts li:not(.model-facts-long)', true).reduce((acc, fact) => {
|
||||
const [key, value] = fact.split(':');
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
|
||||
return { ...acc, [slugify(key, '_')]: trimmedValue };
|
||||
}, {});
|
||||
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
|
||||
return { ...acc, [slugify(key, '_')]: trimmedValue };
|
||||
}, {});
|
||||
|
||||
const description = q('.model-facts-long', true);
|
||||
if (description) profile.description = description;
|
||||
const description = q('.model-facts-long', true);
|
||||
if (description) profile.description = description;
|
||||
|
||||
const aliases = qtx('.aka')?.split(/,\s*/);
|
||||
if (aliases) profile.aliases = aliases;
|
||||
const aliases = qtx('.aka')?.split(/,\s*/);
|
||||
if (aliases) profile.aliases = aliases;
|
||||
|
||||
if (bio.birth_place) profile.birthPlace = bio.birth_place;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
if (bio.birth_place) profile.birthPlace = bio.birth_place;
|
||||
if (bio.nationality) profile.nationality = bio.nationality;
|
||||
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
if (bio.measurements) {
|
||||
const [bust, waist, hip] = bio.measurements.split('-');
|
||||
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
if (bust) profile.bust = bust;
|
||||
if (waist) profile.waist = Number(waist);
|
||||
if (hip) profile.hip = Number(hip);
|
||||
}
|
||||
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/^\d+/)[0]);
|
||||
if (bio.height) profile.height = Number(bio.height.match(/^\d+/)[0]);
|
||||
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.eye_color) profile.eye = bio.eye_color;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.eye_color) profile.eye = bio.eye_color;
|
||||
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
if (bio.tattoos) {
|
||||
profile.hasTattoos = true;
|
||||
profile.tattoos = bio.tattoos;
|
||||
}
|
||||
|
||||
if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
if (bio.piercings) {
|
||||
profile.hasPiercings = true;
|
||||
profile.piercings = bio.piercings;
|
||||
}
|
||||
|
||||
profile.avatar = q('.img-pornstar img').dataset.src;
|
||||
profile.releases = scrapeLatest(html);
|
||||
profile.avatar = q('.img-pornstar img').dataset.src;
|
||||
profile.releases = scrapeLatest(html);
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const { hostname } = new URL(site.url);
|
||||
const { hostname } = new URL(site.url);
|
||||
|
||||
if (hostname.match('private.com')) {
|
||||
const res = await bhttp.get(`${site.url}/${page}/`);
|
||||
if (hostname.match('private.com')) {
|
||||
const res = await bhttp.get(`${site.url}/${page}/`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
|
||||
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
|
||||
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSearchSlug = slugify(actorName, '+');
|
||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||
const modelRes = await geta(url, '.model h3 a');
|
||||
const actorSearchSlug = slugify(actorName, '+');
|
||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||
const modelRes = await geta(url, '.model h3 a');
|
||||
|
||||
if (modelRes.ok) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
|
||||
if (modelRes.ok) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const model = modelRes.items.find(({ text }) => slugify(text) === actorSlug);
|
||||
|
||||
if (model) {
|
||||
const res = await get(model.el.href);
|
||||
if (model) {
|
||||
const res = await get(model.el.href);
|
||||
|
||||
return res.ok ? scrapeProfile(res.item) : res.status;
|
||||
}
|
||||
}
|
||||
return res.ok ? scrapeProfile(res.item) : res.status;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -4,49 +4,49 @@ const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
const {
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
scrapeLatestX,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
} = require('./mindgeek');
|
||||
|
||||
function scrapeLatestClassic(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
|
||||
const stateTag = $('script:contains("initialState")').html();
|
||||
const prefix = 'initialState = {';
|
||||
const prefixIndex = stateTag.indexOf('initialState = {');
|
||||
const suffix = '};';
|
||||
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
|
||||
const data = JSON.parse(stateString);
|
||||
const stateTag = $('script:contains("initialState")').html();
|
||||
const prefix = 'initialState = {';
|
||||
const prefixIndex = stateTag.indexOf('initialState = {');
|
||||
const suffix = '};';
|
||||
const stateString = stateTag.slice(prefixIndex + prefix.length - 1, stateTag.indexOf('};', prefixIndex) + suffix.length - 1);
|
||||
const data = JSON.parse(stateString);
|
||||
|
||||
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
|
||||
return Object.values(data.entities.releases).map(scene => scrapeLatestX(scene, site));
|
||||
}
|
||||
|
||||
async function fetchClassic(site, page) {
|
||||
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
|
||||
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestClassic(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestClassic(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestWrap(site, page = 1) {
|
||||
if (site.parameters?.classic) {
|
||||
return fetchClassic(site, page);
|
||||
}
|
||||
if (site.parameters?.classic) {
|
||||
return fetchClassic(site, page);
|
||||
}
|
||||
|
||||
return fetchLatest(site, page);
|
||||
return fetchLatest(site, page);
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'realitykings');
|
||||
return fetchProfile(actorName, 'realitykings');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest: fetchLatestWrap,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -7,255 +7,255 @@ const slugify = require('../utils/slugify');
|
||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function scrapePhotos(html) {
|
||||
const { qis } = ex(html, '#photos-page');
|
||||
const photos = qis('img');
|
||||
const { qis } = ex(html, '#photos-page');
|
||||
const photos = qis('img');
|
||||
|
||||
return photos.map(photo => [
|
||||
photo
|
||||
.replace('x_800', 'x_xl')
|
||||
.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
return photos.map(photo => [
|
||||
photo
|
||||
.replace('x_800', 'x_xl')
|
||||
.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapePhotos(res.body.toString(), url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapePhotos(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAll(html, site) {
|
||||
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
return exa(html, '.container .video, .container-fluid .video').map(({ q, qa, qd, ql }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = q('.title, .i-title', true);
|
||||
release.title = q('.title, .i-title', true);
|
||||
|
||||
const linkEl = q('a');
|
||||
const url = new URL(linkEl.href);
|
||||
release.url = `${url.origin}${url.pathname}`;
|
||||
const linkEl = q('a');
|
||||
const url = new URL(linkEl.href);
|
||||
release.url = `${url.origin}${url.pathname}`;
|
||||
|
||||
// this is a photo album, not a scene (used for profiles)
|
||||
if (/photos\//.test(url)) return null;
|
||||
// this is a photo album, not a scene (used for profiles)
|
||||
if (/photos\//.test(url)) return null;
|
||||
|
||||
[release.entryId] = url.pathname.split('/').slice(-2);
|
||||
[release.entryId] = url.pathname.split('/').slice(-2);
|
||||
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
||||
release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/)
|
||||
|| qd('.dt-box', 'MMM.DD YYYY');
|
||||
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount, .amount');
|
||||
release.actors = site?.parameters?.actors || qa('.model, .i-model', true);
|
||||
release.duration = ql('.i-amount, .amount');
|
||||
|
||||
const posterEl = q('.item-img img');
|
||||
const posterEl = q('.item-img img');
|
||||
|
||||
if (posterEl) {
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
}
|
||||
if (posterEl) {
|
||||
release.poster = `https:${posterEl.src}`;
|
||||
}
|
||||
|
||||
if (posterEl?.dataset.gifPreview) {
|
||||
release.teaser = {
|
||||
src: `https:${posterEl.dataset.gifPreview}`,
|
||||
};
|
||||
}
|
||||
if (posterEl?.dataset.gifPreview) {
|
||||
release.teaser = {
|
||||
src: `https:${posterEl.dataset.gifPreview}`,
|
||||
};
|
||||
}
|
||||
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
return release;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const { qu } = ex(html, '#videos-page, #content');
|
||||
const release = {};
|
||||
const { qu } = ex(html, '#videos-page, #content');
|
||||
const release = {};
|
||||
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-2);
|
||||
|
||||
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||
release.title = qu.q('h2.text-uppercase, h2.title, #breadcrumb-top + h1', true)
|
||||
|| qu.q('h1.m-title', true)?.split(/»|\//).slice(-1)[0].trim();
|
||||
release.description = qu.text('.p-desc, .desc');
|
||||
release.description = qu.text('.p-desc, .desc');
|
||||
|
||||
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||
release.actors = qu.all('.value a[href*=models], .value a[href*=performer], .value a[href*=teen-babes]', true);
|
||||
|
||||
if (release.actors.length === 0) {
|
||||
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
||||
const actorString = qu.text(actorEl);
|
||||
if (release.actors.length === 0) {
|
||||
const actorEl = qu.all('.stat').find(stat => /Featuring/.test(stat.textContent));
|
||||
const actorString = qu.text(actorEl);
|
||||
|
||||
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
||||
}
|
||||
release.actors = actorString?.split(/,\band\b|,/g).map(actor => actor.trim()) || [];
|
||||
}
|
||||
|
||||
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
||||
if (release.actors.length === 0 && site.parameters?.actors) release.actors = site.parameters.actors;
|
||||
|
||||
release.tags = qu.all('a[href*=tag]', true);
|
||||
release.tags = qu.all('a[href*=tag]', true);
|
||||
|
||||
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
||||
const dateEl = qu.all('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
|
||||
release.date = qu.date(dateEl, null, 'MMMM Do, YYYY')
|
||||
|| qu.date('.date', 'MMMM Do, YYYY', /\w+ \d{1,2}\w+, \d{4}/)
|
||||
|| qu.date('.info .holder', 'MM/DD/YYYY', /\d{2}\/\d{2}\/\d{4}/);
|
||||
|
||||
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = qu.dur(durationEl);
|
||||
const durationEl = qu.all('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = qu.dur(durationEl);
|
||||
|
||||
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
const photosUrl = qu.url('.stat a[href*=photos]');
|
||||
release.poster = qu.poster('video') || qu.img('.flowplayer img') || qu.img('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
const photosUrl = qu.url('.stat a[href*=photos]');
|
||||
|
||||
if (photosUrl) {
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
} else {
|
||||
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
||||
photo.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
if (photosUrl) {
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
} else {
|
||||
release.photos = qu.imgs('img[src*=ThumbNails], .p-photos .tn img').map(photo => [
|
||||
photo.replace('_tn', ''),
|
||||
photo,
|
||||
]);
|
||||
}
|
||||
|
||||
const trailers = qu.all('a[href*=Trailers]');
|
||||
const trailers = qu.all('a[href*=Trailers]');
|
||||
|
||||
if (trailers) {
|
||||
release.trailer = trailers.map((trailer) => {
|
||||
const src = `https:${trailer.href}`;
|
||||
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
||||
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
||||
if (trailers) {
|
||||
release.trailer = trailers.map((trailer) => {
|
||||
const src = `https:${trailer.href}`;
|
||||
const format = trailer.textContent.trim().match(/^\w+/)[0].toLowerCase();
|
||||
const quality = parseInt(trailer.textContent.trim().match(/\d+([a-zA-Z]+)?$/)[0], 10);
|
||||
|
||||
return format === 'mp4' ? { src, quality } : null;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
return format === 'mp4' ? { src, quality } : null;
|
||||
}).filter(Boolean);
|
||||
}
|
||||
|
||||
const stars = qu.q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
const stars = qu.q('.rate-box').dataset.score;
|
||||
if (stars) release.rating = { stars };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeModels(html, actorName) {
|
||||
const { qa } = ex(html);
|
||||
const model = qa('.model a').find(link => link.title === actorName);
|
||||
const { qa } = ex(html);
|
||||
const model = qa('.model a').find(link => link.title === actorName);
|
||||
|
||||
return model?.href || null;
|
||||
return model?.href || null;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(url, accReleases = []) {
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
if (res.ok) {
|
||||
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
||||
const nextPage = res.item.qu.url('.next-pg');
|
||||
if (res.ok) {
|
||||
const releases = accReleases.concat(scrapeAll(res.item.document.body.outerHTML));
|
||||
const nextPage = res.item.qu.url('.next-pg');
|
||||
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
if (nextPage && new URL(nextPage).searchParams.has('page')) { // last page has 'next' button linking to join page
|
||||
return fetchActorReleases(nextPage, releases);
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
return releases;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeProfile(html, actorUrl, withReleases) {
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
const { q, qa, qi } = ex(html, '#model-page');
|
||||
const profile = { gender: 'female' };
|
||||
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, '_');
|
||||
const value = q(el, '.value', true);
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, '_');
|
||||
const value = q(el, '.value', true);
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
return {
|
||||
...acc,
|
||||
[key]: value,
|
||||
};
|
||||
}, {});
|
||||
|
||||
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
||||
if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
|
||||
|
||||
if (bio.birthday) {
|
||||
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
||||
const [birthDay] = bio.birthday.match(/\d+/);
|
||||
if (bio.birthday) {
|
||||
const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
|
||||
const [birthDay] = bio.birthday.match(/\d+/);
|
||||
|
||||
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
||||
}
|
||||
profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
|
||||
}
|
||||
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
|
||||
if (bio.hair_color) profile.hair = bio.hair_color;
|
||||
|
||||
if (bio.height) profile.height = heightToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
if (bio.height) profile.height = heightToCm(bio.height);
|
||||
if (bio.weight) profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
if (bio.bra_size) profile.bust = bio.bra_size;
|
||||
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
||||
if (bio.bra_size) profile.bust = bio.bra_size;
|
||||
if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
|
||||
|
||||
if (bio.occupation) profile.occupation = bio.occupation;
|
||||
if (bio.occupation) profile.occupation = bio.occupation;
|
||||
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
const avatar = qi('img');
|
||||
if (avatar) profile.avatar = avatar;
|
||||
|
||||
if (withReleases) {
|
||||
const { origin, pathname } = new URL(actorUrl);
|
||||
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
|
||||
}
|
||||
if (withReleases) {
|
||||
const { origin, pathname } = new URL(actorUrl);
|
||||
profile.releases = await fetchActorReleases(`${origin}${pathname}/scenes?page=1`);
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const latestPath = site.parameters?.path || '/big-boob-videos';
|
||||
const url = `${site.url}${latestPath}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const latestPath = site.parameters?.path || '/big-boob-videos';
|
||||
const url = `${site.url}${latestPath}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include, page = 1, source = 0) {
|
||||
const letter = actorName.charAt(0).toUpperCase();
|
||||
const letter = actorName.charAt(0).toUpperCase();
|
||||
|
||||
const sources = [
|
||||
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
|
||||
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
|
||||
];
|
||||
const sources = [
|
||||
`https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`,
|
||||
`https://www.50plusmilfs.com/xxx-milf-models/browse/${letter}/?page=${page}`,
|
||||
];
|
||||
|
||||
const url = sources[source];
|
||||
const url = sources[source];
|
||||
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
const res = await bhttp.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||
if (res.statusCode === 200) {
|
||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
if (actorUrl) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
||||
}
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
|
||||
}
|
||||
return fetchProfile(actorName, scraperSlug, site, include, page + 1, source);
|
||||
}
|
||||
|
||||
if (sources[source + 1]) {
|
||||
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
|
||||
}
|
||||
if (sources[source + 1]) {
|
||||
return fetchProfile(actorName, scraperSlug, site, include, 1, source + 1);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -65,143 +65,143 @@ const freeones = require('./freeones');
|
||||
// const freeoneslegacy = require('./freeones_legacy');
|
||||
|
||||
module.exports = {
|
||||
releases: {
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
digitalplayground,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
hussiepass: hush,
|
||||
hushpass: hush,
|
||||
insex,
|
||||
interracialpass: hush,
|
||||
jayrock,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
kink,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
naughtyamerica,
|
||||
newsensations,
|
||||
nubiles,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pimpxxx: cherrypimps,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
realitykings,
|
||||
score,
|
||||
sexyhub: mindgeek,
|
||||
swallowsalon: julesjordan,
|
||||
teamskeet,
|
||||
twistys,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
whalemember,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
aziani,
|
||||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
blowpass,
|
||||
boobpedia,
|
||||
brattysis: nubiles,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
digitalplayground,
|
||||
dtfsluts: fullpornnetwork,
|
||||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
famedigital,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
girlfaction: fullpornnetwork,
|
||||
gloryholesecrets: aziani,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
hotcrazymess: nubiles,
|
||||
hushpass: hush,
|
||||
hussiepass: hush,
|
||||
iconmale,
|
||||
interracialpass: hush,
|
||||
interracialpovs: hush,
|
||||
jamesdeen: fullpornnetwork,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
score,
|
||||
seehimfuck: hush,
|
||||
sexyhub: mindgeek,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vixen,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
releases: {
|
||||
'21naturals': naturals,
|
||||
'21sextreme': sextreme,
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
assylum,
|
||||
aziani,
|
||||
babes,
|
||||
bamvisions,
|
||||
bang,
|
||||
bangbros,
|
||||
blowpass,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
digitalplayground,
|
||||
dogfart,
|
||||
dogfartnetwork: dogfart,
|
||||
evilangel,
|
||||
fakehub,
|
||||
famedigital,
|
||||
fantasymassage,
|
||||
fullpornnetwork,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
hussiepass: hush,
|
||||
hushpass: hush,
|
||||
insex,
|
||||
interracialpass: hush,
|
||||
jayrock,
|
||||
jesseloadsmonsterfacials,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
kink,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
naughtyamerica,
|
||||
newsensations,
|
||||
nubiles,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pimpxxx: cherrypimps,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
realitykings,
|
||||
score,
|
||||
sexyhub: mindgeek,
|
||||
swallowsalon: julesjordan,
|
||||
teamskeet,
|
||||
twistys,
|
||||
vivid,
|
||||
vixen,
|
||||
vogov,
|
||||
whalemember,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
'21sextury': sextury,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
anilos: nubiles,
|
||||
aziani,
|
||||
babes,
|
||||
baddaddypov: fullpornnetwork,
|
||||
bamvisions,
|
||||
bangbros,
|
||||
blacked: vixen,
|
||||
blackedraw: vixen,
|
||||
blowpass,
|
||||
boobpedia,
|
||||
brattysis: nubiles,
|
||||
brazzers,
|
||||
burningangel,
|
||||
cherrypimps,
|
||||
ddfnetwork,
|
||||
deeper: vixen,
|
||||
deeplush: nubiles,
|
||||
digitalplayground,
|
||||
dtfsluts: fullpornnetwork,
|
||||
evilangel,
|
||||
eyeontheguy: hush,
|
||||
fakehub,
|
||||
famedigital,
|
||||
freeones,
|
||||
gangbangcreampie: aziani,
|
||||
girlfaction: fullpornnetwork,
|
||||
gloryholesecrets: aziani,
|
||||
hergape: fullpornnetwork,
|
||||
homemadeanalwhores: fullpornnetwork,
|
||||
hotcrazymess: nubiles,
|
||||
hushpass: hush,
|
||||
hussiepass: hush,
|
||||
iconmale,
|
||||
interracialpass: hush,
|
||||
interracialpovs: hush,
|
||||
jamesdeen: fullpornnetwork,
|
||||
julesjordan,
|
||||
kellymadison,
|
||||
legalporno,
|
||||
men,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
mofos,
|
||||
mugfucked: fullpornnetwork,
|
||||
naughtyamerica,
|
||||
nfbusty: nubiles,
|
||||
nubilefilms: nubiles,
|
||||
nubiles,
|
||||
nubilesporn: nubiles,
|
||||
onlyprince: fullpornnetwork,
|
||||
pervertgallery: fullpornnetwork,
|
||||
pimpxxx: cherrypimps,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
private: privateNetwork,
|
||||
realitykings,
|
||||
score,
|
||||
seehimfuck: hush,
|
||||
sexyhub: mindgeek,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
tushy: vixen,
|
||||
tushyraw: vixen,
|
||||
twistys,
|
||||
vixen,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -5,176 +5,176 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function extractTitle(pathname) {
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
function extractActors(str) {
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneA(html, site, sceneX, url) {
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
}
|
||||
|
||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestA(site) {
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'transangels');
|
||||
return fetchProfile(actorName, 'transangels');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'twistys');
|
||||
return fetchProfile(actorName, 'twistys');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -8,128 +8,128 @@ const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = requir
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeLatestNative(scenes, site) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
release.entryId = scene.id;
|
||||
release.url = `${site.url}${scene.url}`;
|
||||
|
||||
release.title = scene.name;
|
||||
release.date = ed(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
release.title = scene.name;
|
||||
release.date = ed(scene.release_date, 'YYYY-MM-DD');
|
||||
release.duration = parseInt(scene.runtime, 10) * 60;
|
||||
|
||||
release.actors = scene.cast?.map(actor => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
release.actors = scene.cast?.map(actor => ({
|
||||
name: actor.stagename,
|
||||
gender: actor.gender.toLowerCase(),
|
||||
avatar: actor.placard,
|
||||
})) || [];
|
||||
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
release.stars = Number(scene.rating);
|
||||
release.poster = scene.placard_800 || scene.placard;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeSceneNative({ html, q, qa }, url, _site) {
|
||||
const release = { url };
|
||||
const release = { url };
|
||||
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
release.entryId = new URL(url).pathname.split('/')[2]; // eslint-disable-line prefer-destructuring
|
||||
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
release.title = q('.scene-h2-heading', true);
|
||||
release.description = q('.indie-model-p', true);
|
||||
|
||||
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
|
||||
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
const dateString = qa('h5').find(el => /Released/.test(el.textContent)).textContent;
|
||||
release.date = ed(dateString, 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
const duration = qa('h5').find(el => /Runtime/.test(el.textContent)).textContent;
|
||||
const [hours, minutes] = duration.match(/\d+/g);
|
||||
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
if (minutes) release.duration = (hours * 3600) + (minutes * 60);
|
||||
else release.duration = hours * 60; // scene shorter that 1hr, hour match are minutes
|
||||
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
release.actors = qa('h4 a[href*="/stars"], h4 a[href*="/celebs"]', true);
|
||||
release.tags = qa('h5 a[href*="/categories"]', true);
|
||||
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
const [poster, trailer] = html.match(/https:\/\/content.vivid.com(.*)(.jpg|.mp4)/g);
|
||||
release.poster = poster;
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
if (trailer) {
|
||||
release.trailer = {
|
||||
src: trailer,
|
||||
};
|
||||
}
|
||||
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
const channel = q('h5 a[href*="/sites"]', true);
|
||||
if (channel) release.channel = channel.replace(/\.\w+/, '');
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestNative(site, page = 1) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiLatest(site, page);
|
||||
}
|
||||
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await bhttp.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||
const res = await bhttp.get(apiUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
if (res.statusCode === 200 && res.body.code === 200) {
|
||||
return scrapeLatestNative(res.body.responseData, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcomingNative(site) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchApiUpcoming(site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchSceneNative(url, site, release) {
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
if (site.parameters?.useGamma) {
|
||||
return fetchScene(url, site, release);
|
||||
}
|
||||
|
||||
const res = await get(url);
|
||||
const res = await get(url);
|
||||
|
||||
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||
return res.ok ? scrapeSceneNative(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchSceneWrapper(url, site, release) {
|
||||
const scene = await fetchScene(url, site, release);
|
||||
const scene = await fetchScene(url, site, release);
|
||||
|
||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||
const searchRes = await bhttp.get(searchUrl, {
|
||||
decodeJSON: true,
|
||||
});
|
||||
|
||||
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
|
||||
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
|
||||
if (searchRes.statusCode === 200 && searchRes.body.code === 200) {
|
||||
const sceneMatch = searchRes.body.responseData.find(item => slugify(item.name) === slugify(scene.title));
|
||||
|
||||
if (sceneMatch) {
|
||||
return {
|
||||
...scene,
|
||||
url: `${site.url}${sceneMatch.url}`,
|
||||
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sceneMatch) {
|
||||
return {
|
||||
...scene,
|
||||
url: `${site.url}${sceneMatch.url}`,
|
||||
date: ed(sceneMatch.release_date, 'YYYY-MM-DD'),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scene;
|
||||
return scene;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchScene: fetchSceneWrapper,
|
||||
};
|
||||
|
||||
@@ -8,246 +8,246 @@ const { get, post } = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
F: 'female',
|
||||
M: 'male',
|
||||
T: 'transsexual', // not yet observed
|
||||
};
|
||||
|
||||
function getPosterFallbacks(poster) {
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
return poster
|
||||
.filter(image => /landscape/i.test(image.name))
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map((image) => {
|
||||
const sources = [image.src, image.highdpi?.['2x'], image.highdpi?.['3x']];
|
||||
// high DPI images for full HD source are huge, only prefer for smaller fallback sources
|
||||
return image.height === 1080 ? sources : sources.reverse();
|
||||
})
|
||||
.flat();
|
||||
}
|
||||
|
||||
function getTeaserFallbacks(teaser) {
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
return teaser
|
||||
.filter(video => /landscape/i.test(video.name))
|
||||
.map(video => ({
|
||||
src: video.src,
|
||||
type: video.type,
|
||||
quality: Number(String(video.height).replace('353', '360')),
|
||||
}));
|
||||
}
|
||||
|
||||
function getAvatarFallbacks(avatar) {
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
return avatar
|
||||
.sort((imageA, imageB) => imageB.height - imageA.height)
|
||||
.map(image => [image.highdpi?.['3x'], image.highdpi?.['2x'], image.src])
|
||||
.flat();
|
||||
}
|
||||
|
||||
async function getTrailer(scene, site, url) {
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, { referer: url });
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
if (!tokenRes.ok) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
src: trailersRes.body[quality].token,
|
||||
quality,
|
||||
} : null)).filter(Boolean);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, origin) {
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
return scenes.map((scene) => {
|
||||
const release = {};
|
||||
|
||||
release.title = scene.title;
|
||||
release.title = scene.title;
|
||||
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
release.entryId = String(scene.newId);
|
||||
release.url = `${site?.url || origin}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
release.actors = scene.models;
|
||||
release.stars = Number(scene.textRating) / 2;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scene, site) {
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
if (!scene || scene.isPreReleasePeriod) return null;
|
||||
|
||||
const release = {};
|
||||
const release = {};
|
||||
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
release.title = scene.targetUrl
|
||||
.slice(1)
|
||||
.split('-')
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
release.url = `${site.url}${scene.targetUrl}`;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = scene.models;
|
||||
release.actors = scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
|
||||
|
||||
return [release];
|
||||
return [release];
|
||||
}
|
||||
|
||||
async function scrapeScene(data, url, site, baseRelease) {
|
||||
const scene = data.video;
|
||||
const scene = data.video;
|
||||
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
const release = {
|
||||
url,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
actors: scene.models,
|
||||
director: scene.directorNames,
|
||||
duration: scene.runLength,
|
||||
stars: scene.totalRateVal,
|
||||
tags: scene.tags,
|
||||
};
|
||||
|
||||
release.entryId = scene.newId;
|
||||
release.entryId = scene.newId;
|
||||
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
release.date = moment.utc(scene.releaseDate).toDate();
|
||||
release.shootDate = moment.utc(scene.shootDate).toDate();
|
||||
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
release.actors = baseRelease?.actors || scene.models;
|
||||
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
release.poster = getPosterFallbacks(scene.images.poster);
|
||||
release.photos = data.pictureset.map(photo => photo.main[0].src);
|
||||
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
release.teaser = getTeaserFallbacks(scene.previews.poster);
|
||||
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
const trailer = await getTrailer(scene, site, url);
|
||||
if (trailer) release.trailer = trailer;
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
}
|
||||
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
return [];
|
||||
}, { concurrency: 3 });
|
||||
|
||||
return releasesPerPage.flat();
|
||||
return releasesPerPage.flat();
|
||||
}
|
||||
|
||||
async function scrapeProfile(data, origin, withReleases) {
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
const model = data.model;
|
||||
const profile = {};
|
||||
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
profile.birthdate = new Date(model.dateOfBirth);
|
||||
profile.gender = genderMap[model.sex];
|
||||
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
profile.hair = model.hairColour;
|
||||
profile.nationality = model.nationality;
|
||||
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
if (model.biography.trim().length > 0) profile.description = model.biography;
|
||||
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
if (model.cupSize && model.bustMeasurment) profile.bust = `${model.bustMeasurment}${model.cupSize}`;
|
||||
if (model.waistMeasurment) profile.waist = model.waistMeasurment;
|
||||
if (model.hipMeasurment) profile.hip = model.hipMeasurment;
|
||||
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
profile.avatar = getAvatarFallbacks(model.images.listing);
|
||||
profile.poster = getAvatarFallbacks(model.images.profile);
|
||||
profile.banner = getAvatarFallbacks(model.images.poster);
|
||||
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
const releases = scrapeAll(data.videos.videos, null, origin);
|
||||
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
if (withReleases) {
|
||||
const pageCount = Math.ceil(data.videos.count / 6);
|
||||
const otherReleases = await fetchActorReleases((Array.from({ length: pageCount - 1 }, (value, index) => index + 2)), model, origin);
|
||||
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
profile.releases = [...releases, ...otherReleases];
|
||||
} else {
|
||||
profile.releases = releases;
|
||||
}
|
||||
|
||||
return profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease) {
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
|
||||
const res = await get(apiUrl);
|
||||
const res = await get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
}
|
||||
|
||||
return res.code;
|
||||
return res.code;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
const origin = `https://www.${scraperSlug}.com`;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchLatest,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
@@ -5,199 +5,199 @@ const { ex, ctxa } = require('../utils/q');
|
||||
// const slugify = require('../utils/slugify');
|
||||
|
||||
function getLicenseCode(html) {
|
||||
const licensePrefix = 'license_code: \'';
|
||||
const licenseStart = html.indexOf(licensePrefix);
|
||||
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
|
||||
const licensePrefix = 'license_code: \'';
|
||||
const licenseStart = html.indexOf(licensePrefix);
|
||||
const licenseCode = html.slice(licenseStart + licensePrefix.length, html.indexOf('\'', licenseStart + licensePrefix.length));
|
||||
|
||||
const c = '16px';
|
||||
let f;
|
||||
let g;
|
||||
let h;
|
||||
let i;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
const c = '16px';
|
||||
let f;
|
||||
let g;
|
||||
let h;
|
||||
let i;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
|
||||
for (f = '', g = 1; g < licenseCode.length; g += 1) {
|
||||
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
|
||||
}
|
||||
for (f = '', g = 1; g < licenseCode.length; g += 1) {
|
||||
f += parseInt(licenseCode[g], 10) ? parseInt(licenseCode[g], 10) : 1;
|
||||
}
|
||||
|
||||
for (j = parseInt(f.length / 2, 10),
|
||||
k = parseInt(f.substring(0, j + 1), 10),
|
||||
l = parseInt(f.substring(j), 10),
|
||||
g = l - k,
|
||||
g < 0 && (g = -g),
|
||||
f = g,
|
||||
g = k - l,
|
||||
g < 0 && (g = -g),
|
||||
f += g,
|
||||
f *= 2,
|
||||
f = String(f),
|
||||
i = (parseInt(c, 10) / 2) + 2,
|
||||
m = '',
|
||||
g = 0; g < j + 1; g += 1) {
|
||||
for (h = 1; h <= 4; h += 1) {
|
||||
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
|
||||
for (j = parseInt(f.length / 2, 10),
|
||||
k = parseInt(f.substring(0, j + 1), 10),
|
||||
l = parseInt(f.substring(j), 10),
|
||||
g = l - k,
|
||||
g < 0 && (g = -g),
|
||||
f = g,
|
||||
g = k - l,
|
||||
g < 0 && (g = -g),
|
||||
f += g,
|
||||
f *= 2,
|
||||
f = String(f),
|
||||
i = (parseInt(c, 10) / 2) + 2,
|
||||
m = '',
|
||||
g = 0; g < j + 1; g += 1) {
|
||||
for (h = 1; h <= 4; h += 1) {
|
||||
n = parseInt(licenseCode[g + h], 10) + parseInt(f[g], 10);
|
||||
|
||||
if (n >= i) n -= i;
|
||||
m += n;
|
||||
}
|
||||
}
|
||||
if (n >= i) n -= i;
|
||||
m += n;
|
||||
}
|
||||
}
|
||||
|
||||
return m;
|
||||
return m;
|
||||
}
|
||||
|
||||
function decodeTrailerUrl(html, encodedTrailerUrl) {
|
||||
const licenseCode = getLicenseCode(html);
|
||||
const i = licenseCode;
|
||||
const licenseCode = getLicenseCode(html);
|
||||
const i = licenseCode;
|
||||
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
let o;
|
||||
let j;
|
||||
let k;
|
||||
let l;
|
||||
let m;
|
||||
let n;
|
||||
let o;
|
||||
|
||||
const d = '16px';
|
||||
const g = encodedTrailerUrl.split('/').slice(2);
|
||||
const d = '16px';
|
||||
const g = encodedTrailerUrl.split('/').slice(2);
|
||||
|
||||
let h = g[5].substring(0, 2 * parseInt(d, 10));
|
||||
let h = g[5].substring(0, 2 * parseInt(d, 10));
|
||||
|
||||
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
|
||||
for (l = k, m = k; m < i.length; m += 1) {
|
||||
l += parseInt(i[m], 10);
|
||||
}
|
||||
for (j = h, k = h.length - 1; k >= 0; k -= 1) {
|
||||
for (l = k, m = k; m < i.length; m += 1) {
|
||||
l += parseInt(i[m], 10);
|
||||
}
|
||||
|
||||
for (; l >= h.length;) {
|
||||
l -= h.length;
|
||||
}
|
||||
for (; l >= h.length;) {
|
||||
l -= h.length;
|
||||
}
|
||||
|
||||
for (n = '', o = 0; o < h.length; o += 1) {
|
||||
if (o === k) {
|
||||
n += h[l];
|
||||
} else {
|
||||
n += (o === l ? h[k] : h[o]);
|
||||
}
|
||||
}
|
||||
for (n = '', o = 0; o < h.length; o += 1) {
|
||||
if (o === k) {
|
||||
n += h[l];
|
||||
} else {
|
||||
n += (o === l ? h[k] : h[o]);
|
||||
}
|
||||
}
|
||||
|
||||
h = n;
|
||||
}
|
||||
h = n;
|
||||
}
|
||||
|
||||
g[5] = g[5].replace(j, h);
|
||||
const trailer = g.join('/');
|
||||
g[5] = g[5].replace(j, h);
|
||||
const trailer = g.join('/');
|
||||
|
||||
return trailer;
|
||||
return trailer;
|
||||
}
|
||||
|
||||
function scrapeLatest(html) {
|
||||
const { document } = ex(html);
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
|
||||
const release = {};
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
release.entryId = q('.ico-fav-0').dataset.favVideoId;
|
||||
// release.entryId = slugify(release.title);
|
||||
release.entryId = q('.ico-fav-0').dataset.favVideoId;
|
||||
|
||||
const titleEl = q('.video-title-title');
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
const titleEl = q('.video-title-title');
|
||||
release.title = titleEl.title;
|
||||
release.url = titleEl.href;
|
||||
|
||||
release.date = qd('.video-data em', 'MMM DD, YYYY');
|
||||
release.actors = qa('.video-model-list a', true);
|
||||
release.date = qd('.video-data em', 'MMM DD, YYYY');
|
||||
release.actors = qa('.video-model-list a', true);
|
||||
|
||||
const posterData = q('img.thumb').dataset;
|
||||
release.poster = posterData.src;
|
||||
release.trailer = posterData.preview;
|
||||
const posterData = q('img.thumb').dataset;
|
||||
release.poster = posterData.src;
|
||||
release.trailer = posterData.preview;
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, url) {
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
const { qu } = ex(html);
|
||||
const release = { url };
|
||||
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
// release.entryId = slugify(release.title);
|
||||
[release.entryId] = qu.q('link[rel="canonical"]').href.match(/\d+/);
|
||||
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
release.title = qu.meta('meta[property="og:title"]') || qu.q('.video-page-header h1', true);
|
||||
release.description = qu.meta('meta[property="og:description"]') || qu.q('.info-video-description', true);
|
||||
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
release.date = qu.date('.info-video-details li:first-child span', 'MMM DD, YYYY');
|
||||
release.duration = qu.dur('.info-video-details li:nth-child(2) span');
|
||||
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
release.actors = qu.all('.info-video-models a', true);
|
||||
release.tags = qu.all('.info-video-category a', true);
|
||||
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"');
|
||||
release.photos = qu.urls('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
|
||||
release.poster = qu.meta('meta[property="og:image"');
|
||||
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
|
||||
}
|
||||
if (!release.poster) {
|
||||
const previewStart = html.indexOf('preview_url');
|
||||
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
|
||||
}
|
||||
|
||||
const varsPrefix = 'flashvars = {';
|
||||
const varsStart = html.indexOf(varsPrefix);
|
||||
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
|
||||
const varsPrefix = 'flashvars = {';
|
||||
const varsStart = html.indexOf(varsPrefix);
|
||||
const varsString = html.slice(varsStart + varsPrefix.length, html.indexOf('};', varsStart));
|
||||
|
||||
const vars = varsString.split(',').reduce((acc, item) => {
|
||||
const [prop, value] = item.split(': ');
|
||||
acc[prop.trim()] = value.trim().replace(/'/g, '');
|
||||
const vars = varsString.split(',').reduce((acc, item) => {
|
||||
const [prop, value] = item.split(': ');
|
||||
acc[prop.trim()] = value.trim().replace(/'/g, '');
|
||||
|
||||
return acc;
|
||||
}, {});
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
release.trailer = [
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_url),
|
||||
quality: parseInt(vars.video_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url),
|
||||
quality: parseInt(vars.video_alt_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url2),
|
||||
quality: parseInt(vars.video_alt_url2_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url3),
|
||||
quality: parseInt(vars.video_alt_url3_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url4),
|
||||
quality: parseInt(vars.video_alt_url4_text, 10),
|
||||
},
|
||||
];
|
||||
release.trailer = [
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_url),
|
||||
quality: parseInt(vars.video_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url),
|
||||
quality: parseInt(vars.video_alt_url_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url2),
|
||||
quality: parseInt(vars.video_alt_url2_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url3),
|
||||
quality: parseInt(vars.video_alt_url3_text, 10),
|
||||
},
|
||||
{
|
||||
src: decodeTrailerUrl(html, vars.video_alt_url4),
|
||||
quality: parseInt(vars.video_alt_url4_text, 10),
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -5,86 +5,86 @@ const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
const videos = Array.from(document.querySelectorAll('.video-releases-list')).slice(-1)[0];
|
||||
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
release.url = `${origin}${scene.querySelector(':scope > a').href}`;
|
||||
release.entryId = scene.dataset.videoId;
|
||||
release.title = scene.querySelector('.card-title').textContent;
|
||||
release.date = moment.utc(scene.dataset.date, 'MMMM DD, YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.actors a'), el => el.textContent);
|
||||
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
release.poster = `https:${scene.querySelector('.single-image').src}`;
|
||||
release.photos = Array.from(scene.querySelectorAll('.rollover-thumbs img'), el => `https:${el.dataset.src}`);
|
||||
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
const trailerEl = scene.querySelector('source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.dataset.src };
|
||||
|
||||
return release;
|
||||
});
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
const scene = document.querySelector('#t2019-2col');
|
||||
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
release.url = url;
|
||||
release.title = scene.querySelector('.t2019-stitle').textContent.trim();
|
||||
release.description = scene.querySelector('#t2019-description').textContent.trim();
|
||||
release.actors = Array.from(scene.querySelectorAll('#t2019-models a'), el => el.textContent);
|
||||
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
const durationEls = Array.from(scene.querySelectorAll('#t2019-stime span'));
|
||||
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
if (durationEls.length > 1) {
|
||||
release.date = moment.utc(durationEls[0].textContent, 'MMMM DD, YYYY').toDate();
|
||||
release.duration = Number(durationEls[1].textContent.match(/\d+/)[0]) * 60;
|
||||
} else {
|
||||
release.duration = Number(durationEls[0].textContent.match(/\d+/)[0]) * 60;
|
||||
}
|
||||
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
release.photos = Array.from(scene.querySelectorAll('#t2019-main .t2019-thumbs img'), el => `https:${el.src}`);
|
||||
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
const posterEl = scene.querySelector('#no-player-image');
|
||||
const videoEl = scene.querySelector('video');
|
||||
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
if (posterEl) release.poster = `https:${posterEl.src}`;
|
||||
else if (videoEl) release.poster = `https:${videoEl.poster}`;
|
||||
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
const trailerEl = scene.querySelector('#t2019-video source');
|
||||
if (trailerEl) release.trailer = { src: trailerEl.src };
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
const url = `${site.url}?page=${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
||||
@@ -5,31 +5,31 @@ const bhttp = require('bhttp');
|
||||
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
const release = await scrapeScene(res.body.toString(), url, site);
|
||||
const release = await scrapeScene(res.body.toString(), url, site);
|
||||
|
||||
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
|
||||
release.channel = siteSlug;
|
||||
release.director = 'Mason';
|
||||
release.channel = siteSlug;
|
||||
release.director = 'Mason';
|
||||
|
||||
return release;
|
||||
return release;
|
||||
}
|
||||
|
||||
function getActorReleasesUrl(actorPath, page = 1) {
|
||||
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
|
||||
return `https://www.xempire.com/en/videos/xempire/latest/${page}/All-Categories/0${actorPath}`;
|
||||
}
|
||||
|
||||
async function networkFetchProfile(actorName, scraperSlug, site, include) {
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
return fetchProfile(actorName, scraperSlug, null, getActorReleasesUrl, include);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
fetchLatest,
|
||||
fetchProfile: networkFetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user