Using new HTTP module with a dynamic rate limiter.
|
@ -197,6 +197,12 @@ module.exports = {
|
|||
'www.deeper.com',
|
||||
],
|
||||
},
|
||||
limits: {
|
||||
default: {
|
||||
interval: 50,
|
||||
concurrency: 20,
|
||||
},
|
||||
},
|
||||
fetchAfter: [1, 'week'],
|
||||
missingDateLimit: 3,
|
||||
media: {
|
||||
|
|
|
@ -2208,6 +2208,11 @@
|
|||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
|
||||
},
|
||||
"bottleneck": {
|
||||
"version": "2.19.5",
|
||||
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
|
||||
"integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="
|
||||
},
|
||||
"brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
"blake2": "^4.0.0",
|
||||
"bluebird": "^3.7.2",
|
||||
"body-parser": "^1.19.0",
|
||||
"bottleneck": "^2.19.5",
|
||||
"canvas": "^2.6.1",
|
||||
"casual": "^1.6.2",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
|
|
After Width: | Height: | Size: 601 KiB |
After Width: | Height: | Size: 377 KiB |
After Width: | Height: | Size: 8.4 KiB |
After Width: | Height: | Size: 7.9 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 32 KiB |
|
@ -643,7 +643,7 @@ const tagPosters = [
|
|||
['mff', 1, 'Anikka Albrite, Kelsi Monroe and Mick Blue for HardX'],
|
||||
['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways" for Jules Jordan'],
|
||||
['natural-boobs', 4, 'Miela (Marry Queen) in "Pure" for FemJoy'],
|
||||
['nurse', 0, 'Sarah Vandella in "Cum For Nurse Sarah" for Brazzers'],
|
||||
['nurse', 1, 'Mia Malkova in "Always Think Happy Thoughts" for Brazzers'],
|
||||
['oil', 2, 'Jade Kush for Passion HD'],
|
||||
['oral-creampie', 1, 'Valentina Nappi for Her Limit'],
|
||||
['orgy', 1, 'Megan Rain (DP), Morgan Lee (anal), Jessa Rhodes, Melissa Moore and Kimmy Granger in "Orgy Masters 8" for Jules Jordan'],
|
||||
|
@ -825,6 +825,7 @@ const tagPhotos = [
|
|||
['natural-boobs', 3, 'Violet Starr in "Violet Starr 1st Lesbian Anal" for LesbianX'],
|
||||
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
|
||||
['natural-boobs', 2, 'Kylie Page for All Girl Massage'],
|
||||
['nurse', 0, 'Sarah Vandella in "Cum For Nurse Sarah" for Brazzers'],
|
||||
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
||||
['oil', 3, 'Vina Sky for Lubed'],
|
||||
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
||||
|
|
|
@ -420,15 +420,18 @@ async function storeFile(media) {
|
|||
} catch (error) {
|
||||
logger.warn(`Failed to store ${media.src}: ${error.message}`);
|
||||
|
||||
await fsPromises.unlink(media.file.path);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
const res = await http.get(source.src, {
|
||||
headers: {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
}, {
|
||||
},
|
||||
stream: true, // sources are fetched in parallel, don't gobble up memory
|
||||
transforms: [hashStream],
|
||||
destination: tempFileTarget,
|
||||
|
@ -642,7 +645,7 @@ async function storeMedias(baseMedias) {
|
|||
);
|
||||
}
|
||||
|
||||
const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index));
|
||||
const newMediaWithEntries = savedMedias.filter(Boolean).map((media, index) => curateMediaEntry(media, index));
|
||||
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
||||
|
||||
await bulkInsert('media', newMediaEntries);
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('@thependulum/bhttp');
|
||||
|
||||
const { post } = require('../utils/http');
|
||||
const http = require('../utils/http');
|
||||
const { extractDate } = require('../utils/qu');
|
||||
const { inchesToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
@ -84,7 +82,7 @@ function scrapeAll(scenes) {
|
|||
}
|
||||
|
||||
async function fetchActorReleases(actor) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
query: {
|
||||
bool: {
|
||||
|
@ -179,7 +177,7 @@ async function scrapeProfile(actor, include) {
|
|||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||
size: 50,
|
||||
from: (page - 1) * 50,
|
||||
query: {
|
||||
|
@ -269,7 +267,7 @@ async function fetchScene(url) {
|
|||
const encodedId = new URL(url).pathname.split('/')[2];
|
||||
const entryId = decodeId(encodedId);
|
||||
|
||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
},
|
||||
|
@ -279,7 +277,7 @@ async function fetchScene(url) {
|
|||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, context, include) {
|
||||
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||
size: 5,
|
||||
sort: [{
|
||||
_score: {
|
||||
|
@ -306,8 +304,11 @@ async function fetchProfile({ name: actorName }, context, include) {
|
|||
},
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
Authorization: `Basic ${authKey}`,
|
||||
}, { encodeJSON: true });
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
|
|
@ -41,7 +41,9 @@ function scrapeScene({ query }, url, channel) {
|
|||
}));
|
||||
|
||||
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
|
||||
release.poster = query.sourceSet('.player img', 'data-srcset');
|
||||
|
||||
const fallbackPoster = query.img('.player img');
|
||||
release.poster = query.sourceSet('.player img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
|
||||
|
||||
release.movie = {
|
||||
title: query.cnt('.movie a'),
|
||||
|
|
|
@ -7,7 +7,7 @@ const cheerio = require('cheerio');
|
|||
const moment = require('moment');
|
||||
|
||||
const logger = require('../logger')(__filename);
|
||||
const { ex, get } = require('../utils/q');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
|
@ -318,7 +318,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
|
|||
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
|
||||
|
||||
const url = getActorReleasesUrl(profilePath, page);
|
||||
const res = await get(url);
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (!res.ok) return [];
|
||||
|
||||
|
@ -333,14 +333,14 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
|
|||
}
|
||||
|
||||
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
|
||||
const { q } = ex(html);
|
||||
const { query } = qu.extract(html);
|
||||
|
||||
const avatar = q('img.actorPicture');
|
||||
const hair = q('.actorProfile .attribute_hair_color', true);
|
||||
const height = q('.actorProfile .attribute_height', true);
|
||||
const weight = q('.actorProfile .attribute_weight', true);
|
||||
const alias = q('.actorProfile .attribute_alternate_names', true);
|
||||
const nationality = q('.actorProfile .attribute_home', true);
|
||||
const avatar = query.el('img.actorPicture');
|
||||
const hair = query.cnt('.actorProfile .attribute_hair_color');
|
||||
const height = query.cnt('.actorProfile .attribute_height');
|
||||
const weight = query.cnt('.actorProfile .attribute_weight');
|
||||
const alias = query.cnt('.actorProfile .attribute_alternate_names');
|
||||
const nationality = query.cnt('.actorProfile .attribute_home');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
|
@ -358,7 +358,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
|
|||
profile.avatar = avatars;
|
||||
}
|
||||
|
||||
profile.description = q('.actorBio p:not(.bioTitle)', true);
|
||||
profile.description = query.cnt('.actorBio p:not(.bioTitle)');
|
||||
|
||||
if (hair) profile.hair = hair.split(':')[1].trim();
|
||||
if (height) profile.height = Number(height.match(/\d+/)[0]);
|
||||
|
|
|
@ -129,7 +129,9 @@ async function fetchProfile(baseActor, entity, include) {
|
|||
const searchRes = await http.post('https://tour.hitzefrei.com/search-preview', {
|
||||
q: baseActor.name,
|
||||
}, {
|
||||
headers: {
|
||||
'Accept-Language': 'en-US',
|
||||
},
|
||||
});
|
||||
|
||||
if (searchRes.ok) {
|
||||
|
|
|
@ -115,7 +115,7 @@ async function scrapeSceneAlt({ query }, url, channel, session) {
|
|||
release.trailer = query.video();
|
||||
|
||||
if (!release.trailer) {
|
||||
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, null, { useSession: session });
|
||||
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
||||
|
||||
if (trailerRes.ok) {
|
||||
release.trailer = trailerRes.body;
|
||||
|
@ -153,7 +153,7 @@ async function fetchLatest(site, page = 1) {
|
|||
|
||||
async function fetchScene(url, site) {
|
||||
const session = http.session();
|
||||
const res = await qu.get(url, null, null, { useSession: session });
|
||||
const res = await qu.get(url, null, null, { session });
|
||||
|
||||
if (res.ok) {
|
||||
if (site.parameters?.scraper === 'alt') {
|
||||
|
|
|
@ -23,7 +23,7 @@ async function fetchTrailerLocation(entryId, channel) {
|
|||
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
|
||||
|
||||
try {
|
||||
const res = await http.get(url, null, {
|
||||
const res = await http.get(url, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ const cheerio = require('cheerio');
|
|||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
@ -82,7 +82,7 @@ async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
|
|||
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||
|
||||
const res = await bhttp.get(albumUrl);
|
||||
const res = await http.get(albumUrl);
|
||||
const html = res.body.toString();
|
||||
|
||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||
|
@ -135,25 +135,25 @@ function getEntryId(html) {
|
|||
}
|
||||
|
||||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
return scenes.map(({ el, query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
release.url = query.url('.update_title a, .dvd_info > a, a ~ a');
|
||||
release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true);
|
||||
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
||||
|
||||
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
release.actors = query.all('.update_models a', true);
|
||||
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
||||
const dvdPhotos = query.imgs('.dvd_preview_thumb');
|
||||
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
|
||||
|
||||
[release.poster, ...release.photos] = dvdPhotos.length
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
const prefixedSrc = prefixUrl(src, site.url);
|
||||
const src = query.img('a img.thumbs', `src${index}_1x`) || query.img('a img.thumbs', `src${index}`) || query.img('a img.thumbs');
|
||||
const prefixedSrc = qu.prefixUrl(src, site.url);
|
||||
|
||||
if (src) {
|
||||
return [
|
||||
|
@ -183,7 +183,7 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
|||
return null;
|
||||
}).filter(Boolean);
|
||||
|
||||
const teaserScript = qu.html('script');
|
||||
const teaserScript = query.html('script');
|
||||
if (teaserScript) {
|
||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (src) release.teaser = { src };
|
||||
|
@ -236,17 +236,17 @@ function scrapeUpcoming(html, site) {
|
|||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ html, qu }, url, site, include) {
|
||||
async function scrapeScene({ html, query }, url, site, include) {
|
||||
const release = { url, site };
|
||||
|
||||
release.entryId = getEntryId(html);
|
||||
release.title = qu.q('.title_bar_hilite', true);
|
||||
release.description = qu.q('.update_description', true);
|
||||
release.title = query.q('.title_bar_hilite', true);
|
||||
release.description = query.q('.update_description', true);
|
||||
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||
|
||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = qu.all('.update_tags a', true);
|
||||
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||
release.tags = query.all('.update_tags a', true);
|
||||
|
||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||
|
||||
|
@ -280,14 +280,14 @@ async function scrapeScene({ html, qu }, url, site, include) {
|
|||
|
||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||
|
||||
if (qu.exists('.update_dvds a')) {
|
||||
if (query.exists('.update_dvds a')) {
|
||||
release.movie = {
|
||||
url: qu.url('.update_dvds a'),
|
||||
title: qu.q('.update_dvds a', true),
|
||||
url: query.url('.update_dvds a'),
|
||||
title: query.q('.update_dvds a', true),
|
||||
};
|
||||
}
|
||||
|
||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||
if (stars) release.stars = stars;
|
||||
|
||||
return release;
|
||||
|
@ -302,7 +302,7 @@ function scrapeMovie({ el, query }, url, site) {
|
|||
movie.channel = slugify(query.q('.update_date a', true), '');
|
||||
|
||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||
const sceneQus = ctxa(el, '.dvd_details');
|
||||
const sceneQus = qu.initAll(el, '.dvd_details');
|
||||
const scenes = scrapeAll(sceneQus, site);
|
||||
|
||||
const curatedScenes = scenes
|
||||
|
@ -332,7 +332,7 @@ function scrapeProfile(html, url, actorName, entity) {
|
|||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||
|
||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||
if (ageString) profile.age = Number(ageString[1]);
|
||||
|
||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||
|
@ -354,7 +354,7 @@ function scrapeProfile(html, url, actorName, entity) {
|
|||
avatarEl.getAttribute('src'),
|
||||
]
|
||||
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
|
||||
.map(avatar => prefixUrl(avatar, entity.url));
|
||||
.map(avatar => qu.prefixUrl(avatar, entity.url));
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
|
@ -370,7 +370,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
|
|||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
const res = await qu.getAll(url, '.update_details');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
||||
}
|
||||
|
@ -389,13 +389,13 @@ async function fetchUpcoming(site) {
|
|||
}
|
||||
|
||||
async function fetchScene(url, site, baseRelease, include) {
|
||||
const res = await get(url);
|
||||
const res = await qu.get(url);
|
||||
|
||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, site) {
|
||||
const res = await get(url);
|
||||
const res = await qu.get(url);
|
||||
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
}
|
||||
|
|
|
@ -97,8 +97,10 @@ async function scrapeScene({ query, html }, url, baseRelease) {
|
|||
const token = query.meta('name=_token');
|
||||
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
||||
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
|
||||
headers: {
|
||||
'X-CSRF-Token': token,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
|
||||
|
@ -136,7 +138,9 @@ function scrapeProfile({ query }) {
|
|||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||
const res = await http.get(url, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok && res.body.status === 'success') {
|
||||
|
@ -157,7 +161,9 @@ async function fetchScene(url, channel, baseRelease) {
|
|||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
|
||||
headers: {
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
|
|
|
@ -6,7 +6,7 @@ const moment = require('moment');
|
|||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { ex, get } = require('../utils/q');
|
||||
const qu = require('../utils/q');
|
||||
|
||||
function titleExtractor(pathname) {
|
||||
const components = pathname.split('/')[2].split('-');
|
||||
|
@ -102,24 +102,24 @@ function scrapeScene(html, url, site) {
|
|||
}
|
||||
|
||||
async function fetchActorReleases(url) {
|
||||
const res = await get(url);
|
||||
const res = await qu.get(url);
|
||||
|
||||
return res.ok
|
||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
? res.item.query.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||
: [];
|
||||
}
|
||||
|
||||
async function scrapeProfile(html) {
|
||||
const { qu } = ex(html);
|
||||
const { query } = qu.extract(html);
|
||||
const profile = {};
|
||||
|
||||
profile.description = qu.q('.bio_about_text', true);
|
||||
profile.description = query.q('.bio_about_text', true);
|
||||
|
||||
const avatar = qu.q('img.performer-pic', 'src');
|
||||
const avatar = query.q('img.performer-pic', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
const releases = qu.urls('.scene-item > a:first-child');
|
||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const releases = query.urls('.scene-item > a:first-child');
|
||||
const otherPages = query.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||
|
||||
profile.releases = releases.concat(olderReleases.flat());
|
||||
|
|
|
@ -71,10 +71,10 @@ async function fetchLatest(channel, page = 1) {
|
|||
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
||||
|
||||
for (let i = 0; i < page - 1; i += 1) {
|
||||
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
|
||||
await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
|
||||
}
|
||||
|
||||
const res = await http.get(url, headers, { useSession: session });
|
||||
const res = await http.get(url, { headers, session });
|
||||
|
||||
if (res.ok) {
|
||||
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
||||
|
|
|
@ -74,9 +74,14 @@ async function scrapeScene({ query }, url) {
|
|||
release.photos = query.imgs('.detail-grabs img');
|
||||
|
||||
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
|
||||
headers: {
|
||||
host,
|
||||
referer: url,
|
||||
}, { queueMethod: '5s' });
|
||||
},
|
||||
}, {
|
||||
interval: 5000,
|
||||
concurrency: 1,
|
||||
});
|
||||
|
||||
if (streamData.ok && streamData.body.status === 'success') {
|
||||
release.trailer = {
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
const Promise = require('bluebird');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, post } = require('../utils/http');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
const genderMap = {
|
||||
|
@ -45,13 +45,15 @@ function getAvatarFallbacks(avatar) {
|
|||
async function getTrailer(scene, site, url) {
|
||||
const qualities = [360, 480, 720, 1080, 2160];
|
||||
|
||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
||||
const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, {
|
||||
file: scene.previewVideoUrl1080P,
|
||||
sizes: qualities.join('+'),
|
||||
type: 'trailer',
|
||||
}, {
|
||||
headers: {
|
||||
referer: url,
|
||||
origin: site.url,
|
||||
},
|
||||
});
|
||||
|
||||
if (!tokenRes.ok) {
|
||||
|
@ -59,7 +61,7 @@ async function getTrailer(scene, site, url) {
|
|||
}
|
||||
|
||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
||||
const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } });
|
||||
|
||||
if (trailersRes.ok) {
|
||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||
|
@ -155,7 +157,7 @@ async function scrapeScene(data, url, site, baseRelease) {
|
|||
async function fetchActorReleases(pages, model, origin) {
|
||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||
const res = await get(url);
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||
|
@ -203,7 +205,7 @@ async function scrapeProfile(data, origin, withReleases) {
|
|||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/api/videos?page=${page}`;
|
||||
const res = await get(url);
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeAll(res.body.data.videos, site);
|
||||
|
@ -214,7 +216,7 @@ async function fetchLatest(site, page = 1) {
|
|||
|
||||
async function fetchUpcoming(site) {
|
||||
const apiUrl = `${site.url}/api`;
|
||||
const res = await get(apiUrl);
|
||||
const res = await http.get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||
|
@ -227,7 +229,7 @@ async function fetchScene(url, site, baseRelease) {
|
|||
const { origin, pathname } = new URL(url);
|
||||
const apiUrl = `${origin}/api${pathname}`;
|
||||
|
||||
const res = await get(apiUrl);
|
||||
const res = await http.get(apiUrl);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||
|
@ -240,7 +242,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
|
|||
const origin = site.url;
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `${origin}/api/${actorSlug}`;
|
||||
const res = await get(url);
|
||||
const res = await http.get(url);
|
||||
|
||||
if (res.code === 200) {
|
||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
const config = require('config');
|
||||
const tunnel = require('tunnel');
|
||||
const bhttp = require('@thependulum/bhttp');
|
||||
const taskQueue = require('promise-task-queue');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const defaultHeaders = {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
};
|
||||
|
||||
const defaultOptions = {
|
||||
responseTimeout: 30000,
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
proxy: {
|
||||
host: config.proxy.host,
|
||||
port: config.proxy.port,
|
||||
},
|
||||
});
|
||||
|
||||
function useProxy(url) {
|
||||
if (!config.proxy.enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { hostname } = new URL(url);
|
||||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
const defaultQueueMethod = '20p';
|
||||
|
||||
async function handler({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
||||
}
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
}
|
||||
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||
|
||||
if (options?.stream && options?.destination) {
|
||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
};
|
||||
}
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('20p', handler, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
queue.define('1s', handler, {
|
||||
interval: 1,
|
||||
});
|
||||
|
||||
queue.define('5s', handler, {
|
||||
interval: 5,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function head(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'HEAD',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
function session(headers, options) {
|
||||
return bhttp.session({
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
head,
|
||||
session,
|
||||
};
|
|
@ -1,21 +1,23 @@
|
|||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const bhttp = require('bhttp');
|
||||
const util = require('util');
|
||||
const stream = require('stream');
|
||||
const config = require('config');
|
||||
const tunnel = require('tunnel');
|
||||
const bhttp = require('@thependulum/bhttp');
|
||||
const taskQueue = require('promise-task-queue');
|
||||
const Bottleneck = require('bottleneck');
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const logger = require('../logger')(__filename);
|
||||
|
||||
const defaultHeaders = {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
};
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
const limiters = {};
|
||||
|
||||
const defaultOptions = {
|
||||
responseTimeout: 30000,
|
||||
encodeJSON: true,
|
||||
headers: {
|
||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||
},
|
||||
};
|
||||
|
||||
const proxyAgent = tunnel.httpsOverHttp({
|
||||
|
@ -34,113 +36,114 @@ function useProxy(url) {
|
|||
return config.proxy.hostnames.includes(hostname);
|
||||
}
|
||||
|
||||
const queue = taskQueue();
|
||||
const defaultQueueMethod = '20p';
|
||||
function getLimiter(limit = {}) {
|
||||
const interval = limit.interval === undefined ? config.limits.default.interval : limit.interval;
|
||||
const concurrency = limit.concurrency === undefined ? config.limits.default.concurrency : limit.concurrency;
|
||||
|
||||
async function handler({
|
||||
url,
|
||||
method = 'GET',
|
||||
body,
|
||||
headers = {},
|
||||
options = {},
|
||||
}) {
|
||||
if (body) {
|
||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
||||
} else {
|
||||
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
||||
if (!limiters[interval]?.[concurrency]) {
|
||||
limiters[interval] = limiters[interval] || {};
|
||||
|
||||
limiters[interval][concurrency] = new Bottleneck({
|
||||
minTime: interval,
|
||||
maxConcurrent: concurrency,
|
||||
});
|
||||
}
|
||||
|
||||
const reqOptions = {
|
||||
headers: {
|
||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||
...headers,
|
||||
},
|
||||
return limiters[interval][concurrency];
|
||||
}
|
||||
|
||||
async function request(method = 'get', url, body, requestOptions = {}) {
|
||||
const http = requestOptions.session || bhttp;
|
||||
|
||||
const options = {
|
||||
...defaultOptions,
|
||||
...options,
|
||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||
...requestOptions,
|
||||
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
||||
stream: !!requestOptions.destination,
|
||||
interval: requestOptions.interval || config.limits.default.interval,
|
||||
concurrency: requestOptions.concurrency || config.limits.default.concurrency,
|
||||
session: null,
|
||||
};
|
||||
|
||||
if (useProxy(url)) {
|
||||
reqOptions.agent = proxyAgent;
|
||||
options.agent = proxyAgent;
|
||||
}
|
||||
|
||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||
logger.debug(`GET (${options.interval}ms/${options.concurrency}p) ${url}`);
|
||||
|
||||
if (options?.stream && options?.destination) {
|
||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||
const res = await (body
|
||||
? http[method](url, body, options)
|
||||
: http[method](url, options));
|
||||
|
||||
const resIsOk = res.statusCode >= 200 && res.statusCode <= 299;
|
||||
|
||||
if (options.destination) {
|
||||
// res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`));
|
||||
|
||||
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||
}
|
||||
|
||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||
if (Buffer.isBuffer(res.body)) {
|
||||
const html = res.body.toString();
|
||||
const window = new JSDOM(html).window;
|
||||
|
||||
return {
|
||||
...res,
|
||||
originalRes: res,
|
||||
body: html,
|
||||
html,
|
||||
json,
|
||||
pipe: res.pipe,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
code: res.statusCode,
|
||||
status: res.statusCode,
|
||||
document: window.document,
|
||||
window,
|
||||
ok: resIsOk,
|
||||
};
|
||||
}
|
||||
|
||||
queue.on('concurrencyReached:http', () => {
|
||||
logger.silly('Queueing requests');
|
||||
});
|
||||
|
||||
queue.define('20p', handler, {
|
||||
concurrency: 20,
|
||||
});
|
||||
|
||||
queue.define('1s', handler, {
|
||||
interval: 1,
|
||||
});
|
||||
|
||||
queue.define('5s', handler, {
|
||||
interval: 5,
|
||||
});
|
||||
|
||||
async function get(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'GET',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
return {
|
||||
...res,
|
||||
body: res.body,
|
||||
status: res.statusCode,
|
||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||
};
|
||||
}
|
||||
|
||||
async function head(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'HEAD',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
async function scheduleRequest(method = 'get', url, body, options) {
|
||||
return getLimiter(options || {}).schedule(() => request(method, url, body, options));
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'POST',
|
||||
url,
|
||||
body,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
async function get(url, options) {
|
||||
return scheduleRequest('get', url, null, options);
|
||||
}
|
||||
|
||||
function session(headers, options) {
|
||||
return bhttp.session({
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
async function post(url, body, options) {
|
||||
return scheduleRequest('post', url, body, options);
|
||||
}
|
||||
|
||||
async function put(url, body, options) {
|
||||
return scheduleRequest('put', url, body, options);
|
||||
}
|
||||
|
||||
async function patch(url, body, options) {
|
||||
return scheduleRequest('patch', url, body, options);
|
||||
}
|
||||
|
||||
async function del(url, options) {
|
||||
return scheduleRequest('delete', url, null, options);
|
||||
}
|
||||
|
||||
async function head(url, options) {
|
||||
return scheduleRequest('head', url, null, options);
|
||||
}
|
||||
|
||||
function getSession(options) {
|
||||
return bhttp.session(options);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
head,
|
||||
session,
|
||||
post,
|
||||
delete: del,
|
||||
put,
|
||||
patch,
|
||||
session: getSession,
|
||||
};
|
||||
|
|
|
@ -457,8 +457,8 @@ function extractAll(htmlValue, selector) {
|
|||
|
||||
async function request(method = 'get', urlValue, body, selector, headers, options, queryAll = false) {
|
||||
const res = await (method === 'post'
|
||||
? http.post(urlValue, body, headers, options)
|
||||
: http[method](urlValue, headers, options));
|
||||
? http.post(urlValue, body, { ...options, headers })
|
||||
: http[method](urlValue, { ...options, headers }));
|
||||
|
||||
if (res.ok) {
|
||||
const item = queryAll
|
||||
|
@ -494,7 +494,7 @@ async function post(urlValue, body, selector, headers, options) {
|
|||
}
|
||||
|
||||
async function getAll(urlValue, selector, headers, options) {
|
||||
return request('get,', urlValue, selector, headers, options, true);
|
||||
return request('get', urlValue, null, selector, headers, options, true);
|
||||
}
|
||||
|
||||
async function postAll(urlValue, body, selector, headers, options) {
|
||||
|
|