Using new HTTP module with a dynamic rate limiter.
|
@ -197,6 +197,12 @@ module.exports = {
|
||||||
'www.deeper.com',
|
'www.deeper.com',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
limits: {
|
||||||
|
default: {
|
||||||
|
interval: 50,
|
||||||
|
concurrency: 20,
|
||||||
|
},
|
||||||
|
},
|
||||||
fetchAfter: [1, 'week'],
|
fetchAfter: [1, 'week'],
|
||||||
missingDateLimit: 3,
|
missingDateLimit: 3,
|
||||||
media: {
|
media: {
|
||||||
|
|
|
@ -2208,6 +2208,11 @@
|
||||||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||||
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
|
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
|
||||||
},
|
},
|
||||||
|
"bottleneck": {
|
||||||
|
"version": "2.19.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
|
||||||
|
"integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="
|
||||||
|
},
|
||||||
"brace-expansion": {
|
"brace-expansion": {
|
||||||
"version": "1.1.11",
|
"version": "1.1.11",
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||||
|
|
|
@ -78,6 +78,7 @@
|
||||||
"blake2": "^4.0.0",
|
"blake2": "^4.0.0",
|
||||||
"bluebird": "^3.7.2",
|
"bluebird": "^3.7.2",
|
||||||
"body-parser": "^1.19.0",
|
"body-parser": "^1.19.0",
|
||||||
|
"bottleneck": "^2.19.5",
|
||||||
"canvas": "^2.6.1",
|
"canvas": "^2.6.1",
|
||||||
"casual": "^1.6.2",
|
"casual": "^1.6.2",
|
||||||
"cheerio": "^1.0.0-rc.3",
|
"cheerio": "^1.0.0-rc.3",
|
||||||
|
|
After Width: | Height: | Size: 601 KiB |
After Width: | Height: | Size: 377 KiB |
After Width: | Height: | Size: 8.4 KiB |
After Width: | Height: | Size: 7.9 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 32 KiB |
|
@ -643,7 +643,7 @@ const tagPosters = [
|
||||||
['mff', 1, 'Anikka Albrite, Kelsi Monroe and Mick Blue for HardX'],
|
['mff', 1, 'Anikka Albrite, Kelsi Monroe and Mick Blue for HardX'],
|
||||||
['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways" for Jules Jordan'],
|
['mfm', 0, 'Vina Sky in "Jules Jordan\'s Three Ways" for Jules Jordan'],
|
||||||
['natural-boobs', 4, 'Miela (Marry Queen) in "Pure" for FemJoy'],
|
['natural-boobs', 4, 'Miela (Marry Queen) in "Pure" for FemJoy'],
|
||||||
['nurse', 0, 'Sarah Vandella in "Cum For Nurse Sarah" for Brazzers'],
|
['nurse', 1, 'Mia Malkova in "Always Think Happy Thoughts" for Brazzers'],
|
||||||
['oil', 2, 'Jade Kush for Passion HD'],
|
['oil', 2, 'Jade Kush for Passion HD'],
|
||||||
['oral-creampie', 1, 'Valentina Nappi for Her Limit'],
|
['oral-creampie', 1, 'Valentina Nappi for Her Limit'],
|
||||||
['orgy', 1, 'Megan Rain (DP), Morgan Lee (anal), Jessa Rhodes, Melissa Moore and Kimmy Granger in "Orgy Masters 8" for Jules Jordan'],
|
['orgy', 1, 'Megan Rain (DP), Morgan Lee (anal), Jessa Rhodes, Melissa Moore and Kimmy Granger in "Orgy Masters 8" for Jules Jordan'],
|
||||||
|
@ -825,6 +825,7 @@ const tagPhotos = [
|
||||||
['natural-boobs', 3, 'Violet Starr in "Violet Starr 1st Lesbian Anal" for LesbianX'],
|
['natural-boobs', 3, 'Violet Starr in "Violet Starr 1st Lesbian Anal" for LesbianX'],
|
||||||
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
|
['natural-boobs', 0, 'Valentina Nappi in "Hypnotic Curves" for LesbianX'],
|
||||||
['natural-boobs', 2, 'Kylie Page for All Girl Massage'],
|
['natural-boobs', 2, 'Kylie Page for All Girl Massage'],
|
||||||
|
['nurse', 0, 'Sarah Vandella in "Cum For Nurse Sarah" for Brazzers'],
|
||||||
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
['oil', 1, 'Kissa Sins in "Oil Overload 14" for JulesJordan'],
|
||||||
['oil', 3, 'Vina Sky for Lubed'],
|
['oil', 3, 'Vina Sky for Lubed'],
|
||||||
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
['oil', 0, 'Jada Stevens in "Jada Stevens Anal Ass Oiled Up For James Deen\'s Cock" for Jules Jordan'],
|
||||||
|
|
11
src/media.js
|
@ -420,15 +420,18 @@ async function storeFile(media) {
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`Failed to store ${media.src}: ${error.message}`);
|
logger.warn(`Failed to store ${media.src}: ${error.message}`);
|
||||||
|
|
||||||
|
await fsPromises.unlink(media.file.path);
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||||
const res = await http.get(source.src, {
|
const res = await http.get(source.src, {
|
||||||
...(source.referer && { referer: source.referer }),
|
headers: {
|
||||||
...(source.host && { host: source.host }),
|
...(source.referer && { referer: source.referer }),
|
||||||
}, {
|
...(source.host && { host: source.host }),
|
||||||
|
},
|
||||||
stream: true, // sources are fetched in parallel, don't gobble up memory
|
stream: true, // sources are fetched in parallel, don't gobble up memory
|
||||||
transforms: [hashStream],
|
transforms: [hashStream],
|
||||||
destination: tempFileTarget,
|
destination: tempFileTarget,
|
||||||
|
@ -642,7 +645,7 @@ async function storeMedias(baseMedias) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const newMediaWithEntries = savedMedias.map((media, index) => curateMediaEntry(media, index));
|
const newMediaWithEntries = savedMedias.filter(Boolean).map((media, index) => curateMediaEntry(media, index));
|
||||||
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
const newMediaEntries = newMediaWithEntries.filter(media => media.newEntry).map(media => media.entry);
|
||||||
|
|
||||||
await bulkInsert('media', newMediaEntries);
|
await bulkInsert('media', newMediaEntries);
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('@thependulum/bhttp');
|
const http = require('../utils/http');
|
||||||
|
|
||||||
const { post } = require('../utils/http');
|
|
||||||
const { extractDate } = require('../utils/qu');
|
const { extractDate } = require('../utils/qu');
|
||||||
const { inchesToCm } = require('../utils/convert');
|
const { inchesToCm } = require('../utils/convert');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
@ -84,7 +82,7 @@ function scrapeAll(scenes) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchActorReleases(actor) {
|
async function fetchActorReleases(actor) {
|
||||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||||
size: 50,
|
size: 50,
|
||||||
query: {
|
query: {
|
||||||
bool: {
|
bool: {
|
||||||
|
@ -179,7 +177,7 @@ async function scrapeProfile(actor, include) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const res = await bhttp.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/videos/video/_search`, {
|
||||||
size: 50,
|
size: 50,
|
||||||
from: (page - 1) * 50,
|
from: (page - 1) * 50,
|
||||||
query: {
|
query: {
|
||||||
|
@ -269,7 +267,7 @@ async function fetchScene(url) {
|
||||||
const encodedId = new URL(url).pathname.split('/')[2];
|
const encodedId = new URL(url).pathname.split('/')[2];
|
||||||
const entryId = decodeId(encodedId);
|
const entryId = decodeId(encodedId);
|
||||||
|
|
||||||
const res = await bhttp.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
const res = await http.get(`https://${clusterId}.us-east-1.aws.found.io/videos/video/${entryId}`, {
|
||||||
headers: {
|
headers: {
|
||||||
Authorization: `Basic ${authKey}`,
|
Authorization: `Basic ${authKey}`,
|
||||||
},
|
},
|
||||||
|
@ -279,7 +277,7 @@ async function fetchScene(url) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }, context, include) {
|
async function fetchProfile({ name: actorName }, context, include) {
|
||||||
const res = await post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
const res = await http.post(`https://${clusterId}.us-east-1.aws.found.io/actors/actor/_search`, {
|
||||||
size: 5,
|
size: 5,
|
||||||
sort: [{
|
sort: [{
|
||||||
_score: {
|
_score: {
|
||||||
|
@ -306,8 +304,11 @@ async function fetchProfile({ name: actorName }, context, include) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Authorization: `Basic ${authKey}`,
|
headers: {
|
||||||
}, { encodeJSON: true });
|
Authorization: `Basic ${authKey}`,
|
||||||
|
},
|
||||||
|
encodeJSON: true,
|
||||||
|
});
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
const actor = res.body.hits.hits.find(hit => hit._source.name.toLowerCase() === actorName.toLowerCase());
|
||||||
|
|
|
@ -41,7 +41,9 @@ function scrapeScene({ query }, url, channel) {
|
||||||
}));
|
}));
|
||||||
|
|
||||||
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
|
release.director = query.cnt('.director')?.split(/\s*:\s*/)[1];
|
||||||
release.poster = query.sourceSet('.player img', 'data-srcset');
|
|
||||||
|
const fallbackPoster = query.img('.player img');
|
||||||
|
release.poster = query.sourceSet('.player img', 'data-srcset') || [fallbackPoster.replace('_crop', ''), fallbackPoster];
|
||||||
|
|
||||||
release.movie = {
|
release.movie = {
|
||||||
title: query.cnt('.movie a'),
|
title: query.cnt('.movie a'),
|
||||||
|
|
|
@ -7,7 +7,7 @@ const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
const { ex, get } = require('../utils/q');
|
const qu = require('../utils/qu');
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
|
||||||
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
|
const profilePath = `/${pathname.split('/').slice(-2).join('/')}`;
|
||||||
|
|
||||||
const url = getActorReleasesUrl(profilePath, page);
|
const url = getActorReleasesUrl(profilePath, page);
|
||||||
const res = await get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
if (!res.ok) return [];
|
if (!res.ok) return [];
|
||||||
|
|
||||||
|
@ -333,14 +333,14 @@ async function fetchActorReleases(profileUrl, getActorReleasesUrl, page = 1, acc
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
|
async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUrl, withReleases) {
|
||||||
const { q } = ex(html);
|
const { query } = qu.extract(html);
|
||||||
|
|
||||||
const avatar = q('img.actorPicture');
|
const avatar = query.el('img.actorPicture');
|
||||||
const hair = q('.actorProfile .attribute_hair_color', true);
|
const hair = query.cnt('.actorProfile .attribute_hair_color');
|
||||||
const height = q('.actorProfile .attribute_height', true);
|
const height = query.cnt('.actorProfile .attribute_height');
|
||||||
const weight = q('.actorProfile .attribute_weight', true);
|
const weight = query.cnt('.actorProfile .attribute_weight');
|
||||||
const alias = q('.actorProfile .attribute_alternate_names', true);
|
const alias = query.cnt('.actorProfile .attribute_alternate_names');
|
||||||
const nationality = q('.actorProfile .attribute_home', true);
|
const nationality = query.cnt('.actorProfile .attribute_home');
|
||||||
|
|
||||||
const profile = {
|
const profile = {
|
||||||
name: actorName,
|
name: actorName,
|
||||||
|
@ -358,7 +358,7 @@ async function scrapeProfile(html, url, actorName, _siteSlug, getActorReleasesUr
|
||||||
profile.avatar = avatars;
|
profile.avatar = avatars;
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.description = q('.actorBio p:not(.bioTitle)', true);
|
profile.description = query.cnt('.actorBio p:not(.bioTitle)');
|
||||||
|
|
||||||
if (hair) profile.hair = hair.split(':')[1].trim();
|
if (hair) profile.hair = hair.split(':')[1].trim();
|
||||||
if (height) profile.height = Number(height.match(/\d+/)[0]);
|
if (height) profile.height = Number(height.match(/\d+/)[0]);
|
||||||
|
|
|
@ -129,7 +129,9 @@ async function fetchProfile(baseActor, entity, include) {
|
||||||
const searchRes = await http.post('https://tour.hitzefrei.com/search-preview', {
|
const searchRes = await http.post('https://tour.hitzefrei.com/search-preview', {
|
||||||
q: baseActor.name,
|
q: baseActor.name,
|
||||||
}, {
|
}, {
|
||||||
'Accept-Language': 'en-US',
|
headers: {
|
||||||
|
'Accept-Language': 'en-US',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (searchRes.ok) {
|
if (searchRes.ok) {
|
||||||
|
|
|
@ -115,7 +115,7 @@ async function scrapeSceneAlt({ query }, url, channel, session) {
|
||||||
release.trailer = query.video();
|
release.trailer = query.video();
|
||||||
|
|
||||||
if (!release.trailer) {
|
if (!release.trailer) {
|
||||||
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, null, { useSession: session });
|
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
||||||
|
|
||||||
if (trailerRes.ok) {
|
if (trailerRes.ok) {
|
||||||
release.trailer = trailerRes.body;
|
release.trailer = trailerRes.body;
|
||||||
|
@ -153,7 +153,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const session = http.session();
|
const session = http.session();
|
||||||
const res = await qu.get(url, null, null, { useSession: session });
|
const res = await qu.get(url, null, null, { session });
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (site.parameters?.scraper === 'alt') {
|
if (site.parameters?.scraper === 'alt') {
|
||||||
|
|
|
@ -23,7 +23,7 @@ async function fetchTrailerLocation(entryId, channel) {
|
||||||
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
|
const url = `${channel.url}/api/download/${entryId}/hd1080/stream`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await http.get(url, null, {
|
const res = await http.get(url, {
|
||||||
followRedirects: false,
|
followRedirects: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ const cheerio = require('cheerio');
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
|
const qu = require('../utils/qu');
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const { heightToCm } = require('../utils/convert');
|
const { heightToCm } = require('../utils/convert');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
@ -82,7 +82,7 @@ async function getPhotosLegacy(entryId, site, type = 'highres', page = 1) {
|
||||||
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
async function getPhotos(entryId, site, type = 'highres', page = 1) {
|
||||||
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
const albumUrl = `${site.parameters?.photos || `${site.url}/gallery.php`}?id=${entryId}&type=${type}&page=${page}`;
|
||||||
|
|
||||||
const res = await bhttp.get(albumUrl);
|
const res = await http.get(albumUrl);
|
||||||
const html = res.body.toString();
|
const html = res.body.toString();
|
||||||
|
|
||||||
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
const sourceLines = html.split(/\n/).filter(line => line.match(/ptx\["\w+"\]/));
|
||||||
|
@ -135,25 +135,25 @@ function getEntryId(html) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||||
return scenes.map(({ el, qu }) => {
|
return scenes.map(({ el, query }) => {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
|
release.url = query.url('.update_title a, .dvd_info > a, a ~ a');
|
||||||
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
|
release.title = query.q('.update_title a, .dvd_info > a, a ~ a', true);
|
||||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
||||||
|
|
||||||
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || query.q('.rating_box')?.dataset.id;
|
||||||
|
|
||||||
release.actors = qu.all('.update_models a', true);
|
release.actors = query.all('.update_models a', true);
|
||||||
|
|
||||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
const dvdPhotos = query.imgs('.dvd_preview_thumb');
|
||||||
const photoCount = Number(qu.q('a img.thumbs', 'cnt')) || 1;
|
const photoCount = Number(query.q('a img.thumbs', 'cnt')) || 1;
|
||||||
|
|
||||||
[release.poster, ...release.photos] = dvdPhotos.length
|
[release.poster, ...release.photos] = dvdPhotos.length
|
||||||
? dvdPhotos
|
? dvdPhotos
|
||||||
: Array.from({ length: photoCount }).map((value, index) => {
|
: Array.from({ length: photoCount }).map((value, index) => {
|
||||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
const src = query.img('a img.thumbs', `src${index}_1x`) || query.img('a img.thumbs', `src${index}`) || query.img('a img.thumbs');
|
||||||
const prefixedSrc = prefixUrl(src, site.url);
|
const prefixedSrc = qu.prefixUrl(src, site.url);
|
||||||
|
|
||||||
if (src) {
|
if (src) {
|
||||||
return [
|
return [
|
||||||
|
@ -183,7 +183,7 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||||
return null;
|
return null;
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
const teaserScript = qu.html('script');
|
const teaserScript = query.html('script');
|
||||||
if (teaserScript) {
|
if (teaserScript) {
|
||||||
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
const src = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||||
if (src) release.teaser = { src };
|
if (src) release.teaser = { src };
|
||||||
|
@ -236,17 +236,17 @@ function scrapeUpcoming(html, site) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene({ html, qu }, url, site, include) {
|
async function scrapeScene({ html, query }, url, site, include) {
|
||||||
const release = { url, site };
|
const release = { url, site };
|
||||||
|
|
||||||
release.entryId = getEntryId(html);
|
release.entryId = getEntryId(html);
|
||||||
release.title = qu.q('.title_bar_hilite', true);
|
release.title = query.q('.title_bar_hilite', true);
|
||||||
release.description = qu.q('.update_description', true);
|
release.description = query.q('.update_description', true);
|
||||||
|
|
||||||
release.date = qu.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
release.date = query.date('.update_date', 'MM/DD/YYYY', null, 'innerHTML');
|
||||||
|
|
||||||
release.actors = qu.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a', true);
|
||||||
release.tags = qu.all('.update_tags a', true);
|
release.tags = query.all('.update_tags a', true);
|
||||||
|
|
||||||
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
const posterPath = html.match(/useimage = "(.*)"/)?.[1];
|
||||||
|
|
||||||
|
@ -280,14 +280,14 @@ async function scrapeScene({ html, qu }, url, site, include) {
|
||||||
|
|
||||||
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
if (include.photos) release.photos = await getPhotos(release.entryId, site);
|
||||||
|
|
||||||
if (qu.exists('.update_dvds a')) {
|
if (query.exists('.update_dvds a')) {
|
||||||
release.movie = {
|
release.movie = {
|
||||||
url: qu.url('.update_dvds a'),
|
url: query.url('.update_dvds a'),
|
||||||
title: qu.q('.update_dvds a', true),
|
title: query.q('.update_dvds a', true),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const stars = Number(qu.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
const stars = Number(query.q('.avg_rating', true)?.replace(/[\s|Avg Rating:]/g, ''));
|
||||||
if (stars) release.stars = stars;
|
if (stars) release.stars = stars;
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
|
@ -302,7 +302,7 @@ function scrapeMovie({ el, query }, url, site) {
|
||||||
movie.channel = slugify(query.q('.update_date a', true), '');
|
movie.channel = slugify(query.q('.update_date a', true), '');
|
||||||
|
|
||||||
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
// movie.releases = Array.from(document.querySelectorAll('.cell.dvd_info > a'), el => el.href);
|
||||||
const sceneQus = ctxa(el, '.dvd_details');
|
const sceneQus = qu.initAll(el, '.dvd_details');
|
||||||
const scenes = scrapeAll(sceneQus, site);
|
const scenes = scrapeAll(sceneQus, site);
|
||||||
|
|
||||||
const curatedScenes = scenes
|
const curatedScenes = scenes
|
||||||
|
@ -332,7 +332,7 @@ function scrapeProfile(html, url, actorName, entity) {
|
||||||
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
|
||||||
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
const measurementsString = bio.match(/\w+-\d+-\d+/);
|
||||||
|
|
||||||
if (birthDateString) profile.birthdate = parseDate(birthDateString[1], 'MMMM D, YYYY');
|
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
|
||||||
if (ageString) profile.age = Number(ageString[1]);
|
if (ageString) profile.age = Number(ageString[1]);
|
||||||
|
|
||||||
if (heightString) profile.height = heightToCm(heightString[0]);
|
if (heightString) profile.height = heightToCm(heightString[0]);
|
||||||
|
@ -354,7 +354,7 @@ function scrapeProfile(html, url, actorName, entity) {
|
||||||
avatarEl.getAttribute('src'),
|
avatarEl.getAttribute('src'),
|
||||||
]
|
]
|
||||||
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
|
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
|
||||||
.map(avatar => prefixUrl(avatar, entity.url));
|
.map(avatar => qu.prefixUrl(avatar, entity.url));
|
||||||
|
|
||||||
if (avatarSources.length) profile.avatar = avatarSources;
|
if (avatarSources.length) profile.avatar = avatarSources;
|
||||||
}
|
}
|
||||||
|
@ -370,7 +370,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
|
||||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||||
|
|
||||||
// const res = await bhttp.get(url);
|
// const res = await bhttp.get(url);
|
||||||
const res = await geta(url, '.update_details');
|
const res = await qu.getAll(url, '.update_details');
|
||||||
|
|
||||||
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
||||||
}
|
}
|
||||||
|
@ -389,13 +389,13 @@ async function fetchUpcoming(site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseRelease, include) {
|
async function fetchScene(url, site, baseRelease, include) {
|
||||||
const res = await get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
return res.ok ? scrapeScene(res.item, url, site, include) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchMovie(url, site) {
|
async function fetchMovie(url, site) {
|
||||||
const res = await get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,8 +97,10 @@ async function scrapeScene({ query, html }, url, baseRelease) {
|
||||||
const token = query.meta('name=_token');
|
const token = query.meta('name=_token');
|
||||||
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
const trailerInfoUrl = `${origin}/episodes/trailer/sources/${release.entryId}?type=trailer`;
|
||||||
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
|
const trailerInfoRes = await http.post(trailerInfoUrl, null, {
|
||||||
'X-CSRF-Token': token,
|
headers: {
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-CSRF-Token': token,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
|
if (trailerInfoRes.ok && trailerInfoRes.body.sources.length > 0) {
|
||||||
|
@ -136,7 +138,9 @@ function scrapeProfile({ query }) {
|
||||||
async function fetchLatest(channel, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
const url = `${channel.url}/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
const url = `${channel.url}/episodes/search?page=${page}`; // TLS issues with teenfidelity.com, same overview on all sites
|
||||||
const res = await http.get(url, {
|
const res = await http.get(url, {
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
headers: {
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.ok && res.body.status === 'success') {
|
if (res.ok && res.body.status === 'success') {
|
||||||
|
@ -157,7 +161,9 @@ async function fetchScene(url, channel, baseRelease) {
|
||||||
async function fetchProfile({ name: actorName }) {
|
async function fetchProfile({ name: actorName }) {
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
|
const res = await qu.get(`https://www.kellymadison.com/models/${actorSlug}`, null, {
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
headers: {
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
|
|
|
@ -6,7 +6,7 @@ const moment = require('moment');
|
||||||
|
|
||||||
const http = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { ex, get } = require('../utils/q');
|
const qu = require('../utils/q');
|
||||||
|
|
||||||
function titleExtractor(pathname) {
|
function titleExtractor(pathname) {
|
||||||
const components = pathname.split('/')[2].split('-');
|
const components = pathname.split('/')[2].split('-');
|
||||||
|
@ -102,24 +102,24 @@ function scrapeScene(html, url, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchActorReleases(url) {
|
async function fetchActorReleases(url) {
|
||||||
const res = await get(url);
|
const res = await qu.get(url);
|
||||||
|
|
||||||
return res.ok
|
return res.ok
|
||||||
? res.item.qu.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
? res.item.query.urls('.contain-block:not(.live-scenes) .scene-item > a:first-child') // live scenes repeat on all pages
|
||||||
: [];
|
: [];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeProfile(html) {
|
async function scrapeProfile(html) {
|
||||||
const { qu } = ex(html);
|
const { query } = qu.extract(html);
|
||||||
const profile = {};
|
const profile = {};
|
||||||
|
|
||||||
profile.description = qu.q('.bio_about_text', true);
|
profile.description = query.q('.bio_about_text', true);
|
||||||
|
|
||||||
const avatar = qu.q('img.performer-pic', 'src');
|
const avatar = query.q('img.performer-pic', 'src');
|
||||||
if (avatar) profile.avatar = `https:${avatar}`;
|
if (avatar) profile.avatar = `https:${avatar}`;
|
||||||
|
|
||||||
const releases = qu.urls('.scene-item > a:first-child');
|
const releases = query.urls('.scene-item > a:first-child');
|
||||||
const otherPages = qu.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
const otherPages = query.urls('.pagination a:not([rel=next]):not([rel=prev])');
|
||||||
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
const olderReleases = await Promise.all(otherPages.map(async page => fetchActorReleases(page)));
|
||||||
|
|
||||||
profile.releases = releases.concat(olderReleases.flat());
|
profile.releases = releases.concat(olderReleases.flat());
|
||||||
|
|
|
@ -71,10 +71,10 @@ async function fetchLatest(channel, page = 1) {
|
||||||
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
||||||
|
|
||||||
for (let i = 0; i < page - 1; i += 1) {
|
for (let i = 0; i < page - 1; i += 1) {
|
||||||
await http.get(url, headers, { useSession: session }); // eslint-disable-line no-await-in-loop
|
await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await http.get(url, headers, { useSession: session });
|
const res = await http.get(url, { headers, session });
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
||||||
|
|
|
@ -74,9 +74,14 @@ async function scrapeScene({ query }, url) {
|
||||||
release.photos = query.imgs('.detail-grabs img');
|
release.photos = query.imgs('.detail-grabs img');
|
||||||
|
|
||||||
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
|
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
|
||||||
host,
|
headers: {
|
||||||
referer: url,
|
host,
|
||||||
}, { queueMethod: '5s' });
|
referer: url,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
interval: 5000,
|
||||||
|
concurrency: 1,
|
||||||
|
});
|
||||||
|
|
||||||
if (streamData.ok && streamData.body.status === 'success') {
|
if (streamData.ok && streamData.body.status === 'success') {
|
||||||
release.trailer = {
|
release.trailer = {
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const { get, post } = require('../utils/http');
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
const genderMap = {
|
const genderMap = {
|
||||||
|
@ -45,13 +45,15 @@ function getAvatarFallbacks(avatar) {
|
||||||
async function getTrailer(scene, site, url) {
|
async function getTrailer(scene, site, url) {
|
||||||
const qualities = [360, 480, 720, 1080, 2160];
|
const qualities = [360, 480, 720, 1080, 2160];
|
||||||
|
|
||||||
const tokenRes = await post(`${site.url}/api/__record_tknreq`, {
|
const tokenRes = await http.post(`${site.url}/api/__record_tknreq`, {
|
||||||
file: scene.previewVideoUrl1080P,
|
file: scene.previewVideoUrl1080P,
|
||||||
sizes: qualities.join('+'),
|
sizes: qualities.join('+'),
|
||||||
type: 'trailer',
|
type: 'trailer',
|
||||||
}, {
|
}, {
|
||||||
referer: url,
|
headers: {
|
||||||
origin: site.url,
|
referer: url,
|
||||||
|
origin: site.url,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!tokenRes.ok) {
|
if (!tokenRes.ok) {
|
||||||
|
@ -59,7 +61,7 @@ async function getTrailer(scene, site, url) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
const trailerUrl = `${site.url}/api${tokenRes.body.data.url}`;
|
||||||
const trailersRes = await post(trailerUrl, null, { referer: url });
|
const trailersRes = await http.post(trailerUrl, null, { headers: { referer: url } });
|
||||||
|
|
||||||
if (trailersRes.ok) {
|
if (trailersRes.ok) {
|
||||||
return qualities.map(quality => (trailersRes.body[quality] ? {
|
return qualities.map(quality => (trailersRes.body[quality] ? {
|
||||||
|
@ -155,7 +157,7 @@ async function scrapeScene(data, url, site, baseRelease) {
|
||||||
async function fetchActorReleases(pages, model, origin) {
|
async function fetchActorReleases(pages, model, origin) {
|
||||||
const releasesPerPage = await Promise.map(pages, async (page) => {
|
const releasesPerPage = await Promise.map(pages, async (page) => {
|
||||||
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
const url = `${origin}/api${model.targetUrl}?page=${page}`;
|
||||||
const res = await get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeAll(res.body.data.videos.videos, null, origin);
|
return scrapeAll(res.body.data.videos.videos, null, origin);
|
||||||
|
@ -203,7 +205,7 @@ async function scrapeProfile(data, origin, withReleases) {
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}/api/videos?page=${page}`;
|
const url = `${site.url}/api/videos?page=${page}`;
|
||||||
const res = await get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeAll(res.body.data.videos, site);
|
return scrapeAll(res.body.data.videos, site);
|
||||||
|
@ -214,7 +216,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
|
|
||||||
async function fetchUpcoming(site) {
|
async function fetchUpcoming(site) {
|
||||||
const apiUrl = `${site.url}/api`;
|
const apiUrl = `${site.url}/api`;
|
||||||
const res = await get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeUpcoming(res.body.data.nextScene, site);
|
return scrapeUpcoming(res.body.data.nextScene, site);
|
||||||
|
@ -227,7 +229,7 @@ async function fetchScene(url, site, baseRelease) {
|
||||||
const { origin, pathname } = new URL(url);
|
const { origin, pathname } = new URL(url);
|
||||||
const apiUrl = `${origin}/api${pathname}`;
|
const apiUrl = `${origin}/api${pathname}`;
|
||||||
|
|
||||||
const res = await get(apiUrl);
|
const res = await http.get(apiUrl);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeScene(res.body.data, url, site, baseRelease);
|
return scrapeScene(res.body.data, url, site, baseRelease);
|
||||||
|
@ -240,7 +242,7 @@ async function fetchProfile({ name: actorName }, { site }, include) {
|
||||||
const origin = site.url;
|
const origin = site.url;
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
const url = `${origin}/api/${actorSlug}`;
|
const url = `${origin}/api/${actorSlug}`;
|
||||||
const res = await get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.code === 200) {
|
if (res.code === 200) {
|
||||||
return scrapeProfile(res.body.data, origin, include.scenes);
|
return scrapeProfile(res.body.data, origin, include.scenes);
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const util = require('util');
|
||||||
|
const stream = require('stream');
|
||||||
|
const config = require('config');
|
||||||
|
const tunnel = require('tunnel');
|
||||||
|
const bhttp = require('@thependulum/bhttp');
|
||||||
|
const taskQueue = require('promise-task-queue');
|
||||||
|
|
||||||
|
const pipeline = util.promisify(stream.pipeline);
|
||||||
|
const logger = require('../logger')(__filename);
|
||||||
|
|
||||||
|
const defaultHeaders = {
|
||||||
|
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||||
|
};
|
||||||
|
|
||||||
|
const defaultOptions = {
|
||||||
|
responseTimeout: 30000,
|
||||||
|
};
|
||||||
|
|
||||||
|
const proxyAgent = tunnel.httpsOverHttp({
|
||||||
|
proxy: {
|
||||||
|
host: config.proxy.host,
|
||||||
|
port: config.proxy.port,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
function useProxy(url) {
|
||||||
|
if (!config.proxy.enable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { hostname } = new URL(url);
|
||||||
|
return config.proxy.hostnames.includes(hostname);
|
||||||
|
}
|
||||||
|
|
||||||
|
const queue = taskQueue();
|
||||||
|
const defaultQueueMethod = '20p';
|
||||||
|
|
||||||
|
async function handler({
|
||||||
|
url,
|
||||||
|
method = 'GET',
|
||||||
|
body,
|
||||||
|
headers = {},
|
||||||
|
options = {},
|
||||||
|
}) {
|
||||||
|
if (body) {
|
||||||
|
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
||||||
|
} else {
|
||||||
|
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const reqOptions = {
|
||||||
|
headers: {
|
||||||
|
...(options?.defaultHeaders !== false && defaultHeaders),
|
||||||
|
...headers,
|
||||||
|
},
|
||||||
|
...defaultOptions,
|
||||||
|
...options,
|
||||||
|
...(options?.timeout && { responseTimeout: options?.timeout }),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (useProxy(url)) {
|
||||||
|
reqOptions.agent = proxyAgent;
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
||||||
|
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
||||||
|
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
||||||
|
|
||||||
|
if (options?.stream && options?.destination) {
|
||||||
|
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
||||||
|
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...res,
|
||||||
|
originalRes: res,
|
||||||
|
html,
|
||||||
|
json,
|
||||||
|
pipe: res.pipe,
|
||||||
|
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||||
|
code: res.statusCode,
|
||||||
|
status: res.statusCode,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
queue.on('concurrencyReached:http', () => {
|
||||||
|
logger.silly('Queueing requests');
|
||||||
|
});
|
||||||
|
|
||||||
|
queue.define('20p', handler, {
|
||||||
|
concurrency: 20,
|
||||||
|
});
|
||||||
|
|
||||||
|
queue.define('1s', handler, {
|
||||||
|
interval: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
queue.define('5s', handler, {
|
||||||
|
interval: 5,
|
||||||
|
});
|
||||||
|
|
||||||
|
async function get(url, headers, options) {
|
||||||
|
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||||
|
method: 'GET',
|
||||||
|
url,
|
||||||
|
headers,
|
||||||
|
options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function head(url, headers, options) {
|
||||||
|
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||||
|
method: 'HEAD',
|
||||||
|
url,
|
||||||
|
headers,
|
||||||
|
options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function post(url, body, headers, options) {
|
||||||
|
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||||
|
method: 'POST',
|
||||||
|
url,
|
||||||
|
body,
|
||||||
|
headers,
|
||||||
|
options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function session(headers, options) {
|
||||||
|
return bhttp.session({
|
||||||
|
headers,
|
||||||
|
options,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
get,
|
||||||
|
post,
|
||||||
|
head,
|
||||||
|
session,
|
||||||
|
};
|
|
@ -1,21 +1,23 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const config = require('config');
|
||||||
|
const bhttp = require('bhttp');
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
const stream = require('stream');
|
const stream = require('stream');
|
||||||
const config = require('config');
|
|
||||||
const tunnel = require('tunnel');
|
const tunnel = require('tunnel');
|
||||||
const bhttp = require('@thependulum/bhttp');
|
const Bottleneck = require('bottleneck');
|
||||||
const taskQueue = require('promise-task-queue');
|
const { JSDOM } = require('jsdom');
|
||||||
|
|
||||||
const pipeline = util.promisify(stream.pipeline);
|
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
|
|
||||||
const defaultHeaders = {
|
const pipeline = util.promisify(stream.pipeline);
|
||||||
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
const limiters = {};
|
||||||
};
|
|
||||||
|
|
||||||
const defaultOptions = {
|
const defaultOptions = {
|
||||||
responseTimeout: 30000,
|
encodeJSON: true,
|
||||||
|
headers: {
|
||||||
|
'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const proxyAgent = tunnel.httpsOverHttp({
|
const proxyAgent = tunnel.httpsOverHttp({
|
||||||
|
@ -34,113 +36,114 @@ function useProxy(url) {
|
||||||
return config.proxy.hostnames.includes(hostname);
|
return config.proxy.hostnames.includes(hostname);
|
||||||
}
|
}
|
||||||
|
|
||||||
const queue = taskQueue();
|
function getLimiter(limit = {}) {
|
||||||
const defaultQueueMethod = '20p';
|
const interval = limit.interval === undefined ? config.limits.default.interval : limit.interval;
|
||||||
|
const concurrency = limit.concurrency === undefined ? config.limits.default.concurrency : limit.concurrency;
|
||||||
|
|
||||||
async function handler({
|
if (!limiters[interval]?.[concurrency]) {
|
||||||
url,
|
limiters[interval] = limiters[interval] || {};
|
||||||
method = 'GET',
|
|
||||||
body,
|
limiters[interval][concurrency] = new Bottleneck({
|
||||||
headers = {},
|
minTime: interval,
|
||||||
options = {},
|
maxConcurrent: concurrency,
|
||||||
}) {
|
});
|
||||||
if (body) {
|
|
||||||
logger.silly(`${method.toUpperCase()} ${url} with ${JSON.stringify(body)} ${options.queueMethod || defaultQueueMethod}`);
|
|
||||||
} else {
|
|
||||||
logger.silly(`${method.toUpperCase()} ${url} ${options.queueMethod || defaultQueueMethod}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const reqOptions = {
|
return limiters[interval][concurrency];
|
||||||
headers: {
|
}
|
||||||
...(options?.defaultHeaders !== false && defaultHeaders),
|
|
||||||
...headers,
|
async function request(method = 'get', url, body, requestOptions = {}) {
|
||||||
},
|
const http = requestOptions.session || bhttp;
|
||||||
|
|
||||||
|
const options = {
|
||||||
...defaultOptions,
|
...defaultOptions,
|
||||||
...options,
|
...requestOptions,
|
||||||
...(options?.timeout && { responseTimeout: options?.timeout }),
|
responseTimeout: requestOptions.responseTimeout || requestOptions.timeout || 60000,
|
||||||
|
stream: !!requestOptions.destination,
|
||||||
|
interval: requestOptions.interval || config.limits.default.interval,
|
||||||
|
concurrency: requestOptions.concurrency || config.limits.default.concurrency,
|
||||||
|
session: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (useProxy(url)) {
|
if (useProxy(url)) {
|
||||||
reqOptions.agent = proxyAgent;
|
options.agent = proxyAgent;
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())
|
logger.debug(`GET (${options.interval}ms/${options.concurrency}p) ${url}`);
|
||||||
? await (options.useSession || bhttp)[method.toLowerCase()](url, body, reqOptions)
|
|
||||||
: await (options.useSession || bhttp)[method.toLowerCase()](url, reqOptions);
|
|
||||||
|
|
||||||
if (options?.stream && options?.destination) {
|
const res = await (body
|
||||||
await pipeline(res, ...(options?.transforms || []), options?.destination);
|
? http[method](url, body, options)
|
||||||
|
: http[method](url, options));
|
||||||
|
|
||||||
|
const resIsOk = res.statusCode >= 200 && res.statusCode <= 299;
|
||||||
|
|
||||||
|
if (options.destination) {
|
||||||
|
// res.on('progress', (bytes, totalBytes) => logger.silly(`Downloaded ${Math.round((bytes / totalBytes) * 100)}% of ${url}`));
|
||||||
|
|
||||||
|
await pipeline(res, ...(options.transforms || []), options.destination);
|
||||||
}
|
}
|
||||||
|
|
||||||
const html = Buffer.isBuffer(res.body) ? res.body.toString() : null;
|
if (Buffer.isBuffer(res.body)) {
|
||||||
const json = Buffer.isBuffer(res.body) ? null : res.body;
|
const html = res.body.toString();
|
||||||
|
const window = new JSDOM(html).window;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...res,
|
||||||
|
body: html,
|
||||||
|
html,
|
||||||
|
status: res.statusCode,
|
||||||
|
document: window.document,
|
||||||
|
window,
|
||||||
|
ok: resIsOk,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...res,
|
...res,
|
||||||
originalRes: res,
|
body: res.body,
|
||||||
html,
|
|
||||||
json,
|
|
||||||
pipe: res.pipe,
|
|
||||||
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
|
||||||
code: res.statusCode,
|
|
||||||
status: res.statusCode,
|
status: res.statusCode,
|
||||||
|
ok: res.statusCode >= 200 && res.statusCode <= 299,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
queue.on('concurrencyReached:http', () => {
|
async function scheduleRequest(method = 'get', url, body, options) {
|
||||||
logger.silly('Queueing requests');
|
return getLimiter(options || {}).schedule(() => request(method, url, body, options));
|
||||||
});
|
|
||||||
|
|
||||||
queue.define('20p', handler, {
|
|
||||||
concurrency: 20,
|
|
||||||
});
|
|
||||||
|
|
||||||
queue.define('1s', handler, {
|
|
||||||
interval: 1,
|
|
||||||
});
|
|
||||||
|
|
||||||
queue.define('5s', handler, {
|
|
||||||
interval: 5,
|
|
||||||
});
|
|
||||||
|
|
||||||
async function get(url, headers, options) {
|
|
||||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
|
||||||
method: 'GET',
|
|
||||||
url,
|
|
||||||
headers,
|
|
||||||
options,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function head(url, headers, options) {
|
async function get(url, options) {
|
||||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
return scheduleRequest('get', url, null, options);
|
||||||
method: 'HEAD',
|
|
||||||
url,
|
|
||||||
headers,
|
|
||||||
options,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function post(url, body, headers, options) {
|
async function post(url, body, options) {
|
||||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
return scheduleRequest('post', url, body, options);
|
||||||
method: 'POST',
|
|
||||||
url,
|
|
||||||
body,
|
|
||||||
headers,
|
|
||||||
options,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function session(headers, options) {
|
async function put(url, body, options) {
|
||||||
return bhttp.session({
|
return scheduleRequest('put', url, body, options);
|
||||||
headers,
|
}
|
||||||
options,
|
|
||||||
});
|
async function patch(url, body, options) {
|
||||||
|
return scheduleRequest('patch', url, body, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function del(url, options) {
|
||||||
|
return scheduleRequest('delete', url, null, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function head(url, options) {
|
||||||
|
return scheduleRequest('head', url, null, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSession(options) {
|
||||||
|
return bhttp.session(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
get,
|
get,
|
||||||
post,
|
|
||||||
head,
|
head,
|
||||||
session,
|
post,
|
||||||
|
delete: del,
|
||||||
|
put,
|
||||||
|
patch,
|
||||||
|
session: getSession,
|
||||||
};
|
};
|
||||||
|
|
|
@ -457,8 +457,8 @@ function extractAll(htmlValue, selector) {
|
||||||
|
|
||||||
async function request(method = 'get', urlValue, body, selector, headers, options, queryAll = false) {
|
async function request(method = 'get', urlValue, body, selector, headers, options, queryAll = false) {
|
||||||
const res = await (method === 'post'
|
const res = await (method === 'post'
|
||||||
? http.post(urlValue, body, headers, options)
|
? http.post(urlValue, body, { ...options, headers })
|
||||||
: http[method](urlValue, headers, options));
|
: http[method](urlValue, { ...options, headers }));
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const item = queryAll
|
const item = queryAll
|
||||||
|
@ -494,7 +494,7 @@ async function post(urlValue, body, selector, headers, options) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getAll(urlValue, selector, headers, options) {
|
async function getAll(urlValue, selector, headers, options) {
|
||||||
return request('get,', urlValue, selector, headers, options, true);
|
return request('get', urlValue, null, selector, headers, options, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function postAll(urlValue, body, selector, headers, options) {
|
async function postAll(urlValue, body, selector, headers, options) {
|
||||||
|
|