Removed direct bhttp usage from scrapers in favor of local http module. Deleted legacy scrapers, as old code is available via git repo history.
This commit is contained in:
parent
3d427f7e1d
commit
0633197793
|
@ -1,12 +1,12 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const logger = require('../logger')(__filename);
|
const logger = require('../logger')(__filename);
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const http = require('../utils/http');
|
||||||
const { get, getAll, ex } = require('../utils/q');
|
const { get, getAll, ex } = require('../utils/q');
|
||||||
|
|
||||||
function scrape(html, site) {
|
function scrape(html, site) {
|
||||||
|
@ -192,7 +192,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
async function fetchUpcoming(site) {
|
async function fetchUpcoming(site) {
|
||||||
const res = await bhttp.get('https://www.bangbros.com');
|
const res = await http.get('https://www.bangbros.com');
|
||||||
|
|
||||||
return scrapeUpcoming(res.body.toString(), site);
|
return scrapeUpcoming(res.body.toString(), site);
|
||||||
}
|
}
|
||||||
|
@ -224,13 +224,13 @@ async function fetchScene(url, site, release) {
|
||||||
async function fetchProfile({ name: actorName }, scope) {
|
async function fetchProfile({ name: actorName }, scope) {
|
||||||
const actorSlug = slugify(actorName);
|
const actorSlug = slugify(actorName);
|
||||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
const actorUrl = scrapeProfileSearch(res.body.toString(), actorName);
|
||||||
|
|
||||||
if (actorUrl) {
|
if (actorUrl) {
|
||||||
const actorRes = await bhttp.get(actorUrl);
|
const actorRes = await http.get(actorUrl);
|
||||||
|
|
||||||
if (actorRes.statusCode === 200) {
|
if (actorRes.statusCode === 200) {
|
||||||
return scrapeProfile(actorRes.body.toString(), scope);
|
return scrapeProfile(actorRes.body.toString(), scope);
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
// const bhttp = require('bhttp');
|
|
||||||
|
|
||||||
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
|
const { fetchScene, fetchLatest, fetchUpcoming, fetchProfile } = require('./gamma');
|
||||||
|
|
||||||
async function fetchSceneWrapper(url, site, baseRelease) {
|
async function fetchSceneWrapper(url, site, baseRelease) {
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
|
|
||||||
const { ex } = require('../utils/q');
|
const { ex } = require('../utils/q');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
function scrapeProfile(html) {
|
function scrapeProfile(html) {
|
||||||
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
const { qu } = ex(html); /* eslint-disable-line object-curly-newline */
|
||||||
|
@ -80,7 +79,7 @@ function scrapeProfile(html) {
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }) {
|
async function fetchProfile({ name: actorName }) {
|
||||||
const actorSlug = actorName.replace(/\s+/, '_');
|
const actorSlug = actorName.replace(/\s+/, '_');
|
||||||
const res = await bhttp.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
const res = await http.get(`http://www.boobpedia.com/boobs/${actorSlug}`);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeProfile(res.body.toString());
|
return scrapeProfile(res.body.toString());
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
function scrapeAll(scenes, site, origin) {
|
function scrapeAll(scenes, site, origin) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
|
@ -150,14 +149,14 @@ async function fetchLatest(channel, page = 1) {
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
// DDF's main site moved to Porn World
|
// DDF's main site moved to Porn World
|
||||||
// const res = await bhttp.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
// const res = await http.get(`https://ddfnetwork.com${new URL(url).pathname}`);
|
||||||
const res = await qu.get(url, '.content, #content, .taspVideoPage');
|
const res = await qu.get(url, '.content, #content, .taspVideoPage');
|
||||||
|
|
||||||
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
return res.ok ? scrapeScene(res.item, url, site) : res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }) {
|
async function fetchProfile({ name: actorName }) {
|
||||||
const resSearch = await bhttp.post('https://ddfnetwork.com/search/ajax',
|
const resSearch = await http.post('https://ddfnetwork.com/search/ajax',
|
||||||
{
|
{
|
||||||
type: 'hints',
|
type: 'hints',
|
||||||
word: actorName,
|
word: actorName,
|
||||||
|
@ -180,7 +179,7 @@ async function fetchProfile({ name: actorName }) {
|
||||||
const [actor] = resSearch.body.list.pornstarsName;
|
const [actor] = resSearch.body.list.pornstarsName;
|
||||||
const url = `https://ddfnetwork.com${actor.href}`;
|
const url = `https://ddfnetwork.com${actor.href}`;
|
||||||
|
|
||||||
const resActor = await bhttp.get(url);
|
const resActor = await http.get(url);
|
||||||
|
|
||||||
if (resActor.statusCode !== 200) {
|
if (resActor.statusCode !== 200) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -2,12 +2,13 @@
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
// const Promise = require('bluebird');
|
// const Promise = require('bluebird');
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
async function getPhotos(albumUrl) {
|
async function getPhotos(albumUrl) {
|
||||||
const res = await bhttp.get(albumUrl);
|
const res = await http.get(albumUrl);
|
||||||
const html = res.body.toString();
|
const html = res.body.toString();
|
||||||
const { document } = new JSDOM(html).window;
|
const { document } = new JSDOM(html).window;
|
||||||
|
|
||||||
|
@ -125,13 +126,13 @@ async function scrapeScene(html, url, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const res = await bhttp.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
const res = await http.get(`https://dogfartnetwork.com/tour/scenes/?p=${page}`);
|
||||||
|
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
function scrapeProfile(html, actorName) {
|
function scrapeProfile(html, actorName) {
|
||||||
const { document } = new JSDOM(html).window;
|
const { document } = new JSDOM(html).window;
|
||||||
const profile = { name: actorName };
|
const profile = { name: actorName };
|
||||||
|
@ -68,17 +69,17 @@ function scrapeSearch(html) {
|
||||||
async function fetchProfile({ name: actorName }) {
|
async function fetchProfile({ name: actorName }) {
|
||||||
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
const actorSlug = actorName.toLowerCase().replace(/\s+/g, '-');
|
||||||
|
|
||||||
const res = await bhttp.get(`https://freeones.nl/${actorSlug}/profile`);
|
const res = await http.get(`https://freeones.nl/${actorSlug}/profile`);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeProfile(res.body.toString(), actorName);
|
return scrapeProfile(res.body.toString(), actorName);
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchRes = await bhttp.get(`https://freeones.nl/babes?q=${actorName}`);
|
const searchRes = await http.get(`https://freeones.nl/babes?q=${actorName}`);
|
||||||
const actorPath = scrapeSearch(searchRes.body.toString());
|
const actorPath = scrapeSearch(searchRes.body.toString());
|
||||||
|
|
||||||
if (actorPath) {
|
if (actorPath) {
|
||||||
const actorRes = await bhttp.get(`https://freeones.nl${actorPath}/profile`);
|
const actorRes = await http.get(`https://freeones.nl${actorPath}/profile`);
|
||||||
|
|
||||||
if (actorRes.statusCode === 200) {
|
if (actorRes.statusCode === 200) {
|
||||||
return scrapeProfile(actorRes.body.toString(), actorName);
|
return scrapeProfile(actorRes.body.toString(), actorName);
|
||||||
|
|
|
@ -1,140 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
|
||||||
const moment = require('moment');
|
|
||||||
|
|
||||||
async function scrapeProfileFrontpage(html, url, name) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
const bioEl = document.querySelector('.dashboard-bio-list');
|
|
||||||
|
|
||||||
const bioUrl = `https:${document.querySelector('.seemore a').href}`;
|
|
||||||
|
|
||||||
const keys = Array.from(bioEl.querySelectorAll('dt'), el => el.textContent.trim());
|
|
||||||
const values = Array.from(bioEl.querySelectorAll('dd'), el => el.textContent.trim());
|
|
||||||
|
|
||||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
|
||||||
|
|
||||||
const profile = {
|
|
||||||
name,
|
|
||||||
gender: 'female',
|
|
||||||
};
|
|
||||||
|
|
||||||
const birthdateString = bio['Date of Birth:'];
|
|
||||||
const measurementsString = bio['Measurements:'];
|
|
||||||
|
|
||||||
const birthCityString = bio['Place of Birth:'];
|
|
||||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
|
||||||
|
|
||||||
const birthCountryString = bio['Country of Origin:'];
|
|
||||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
|
||||||
|
|
||||||
const piercingsString = bio['Piercings:'];
|
|
||||||
const tattoosString = bio['Tattoos:'];
|
|
||||||
|
|
||||||
if (birthdateString && birthdateString !== 'Unknown (add)') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
|
||||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
|
||||||
|
|
||||||
if (bio['Fake Boobs:']) profile.naturalBoobs = bio['Fake Boobs:'] === 'No';
|
|
||||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
|
||||||
|
|
||||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
|
||||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
|
||||||
|
|
||||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
|
||||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
|
||||||
|
|
||||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
|
||||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
|
||||||
|
|
||||||
profile.social = Array.from(bioEl.querySelectorAll('.dashboard-socialmedia a'), el => el.href);
|
|
||||||
|
|
||||||
return {
|
|
||||||
profile,
|
|
||||||
url: bioUrl,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeProfileBio(html, frontpageProfile, url, name) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
const bioEl = document.querySelector('#biographyTable');
|
|
||||||
|
|
||||||
const keys = Array.from(bioEl.querySelectorAll('td:nth-child(1)'), el => el.textContent.trim());
|
|
||||||
const values = Array.from(bioEl.querySelectorAll('td:nth-child(2)'), el => el.textContent.trim());
|
|
||||||
|
|
||||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [key]: values[index] }), {});
|
|
||||||
|
|
||||||
const profile = {
|
|
||||||
...frontpageProfile,
|
|
||||||
name,
|
|
||||||
gender: 'female',
|
|
||||||
};
|
|
||||||
|
|
||||||
const birthdateString = bio['Date of Birth:'];
|
|
||||||
const measurementsString = bio['Measurements:'];
|
|
||||||
|
|
||||||
const birthCityString = bio['Place of Birth:'];
|
|
||||||
const birthCity = birthCityString !== undefined && birthCityString !== 'Unknown' && birthCityString !== 'Unknown (add)' && birthCityString;
|
|
||||||
|
|
||||||
const birthCountryString = bio['Country of Origin:'];
|
|
||||||
const birthCountry = birthCountryString !== undefined && birthCountryString !== 'Unknown' && birthCountryString !== 'Unknown (add)' && birthCountryString;
|
|
||||||
|
|
||||||
const piercingsString = bio['Piercings:'];
|
|
||||||
const tattoosString = bio['Tattoos:'];
|
|
||||||
|
|
||||||
if (birthdateString && birthdateString !== 'Unknown') profile.birthdate = moment.utc(birthdateString.slice(0, birthdateString.indexOf(' (')), 'MMMM D, YYYY').toDate();
|
|
||||||
if (measurementsString) [profile.bust, profile.waist, profile.hip] = measurementsString.split('-').map(measurement => (measurement === '??' ? null : measurement));
|
|
||||||
|
|
||||||
if (bio['Fake boobs']) profile.naturalBoobs = bio['Fake boobs:'] === 'No';
|
|
||||||
profile.ethnicity = bio['Ethnicity:'];
|
|
||||||
|
|
||||||
profile.birthPlace = `${birthCity || ''}${birthCity ? ', ' : ''}${birthCountry || ''}`;
|
|
||||||
|
|
||||||
profile.hair = bio['Hair Color:'].toLowerCase();
|
|
||||||
profile.eyes = bio['Eye Color:'].toLowerCase();
|
|
||||||
profile.height = Number(bio['Height:'].match(/\d+/)[0]);
|
|
||||||
profile.weight = Number(bio['Weight:'].match(/\d+/)[0]);
|
|
||||||
|
|
||||||
if (piercingsString) profile.hasPiercings = !!(piercingsString !== 'Unknown (add)' && piercingsString !== 'None');
|
|
||||||
if (tattoosString) profile.hasTattoos = !!(tattoosString !== 'Unknown (add)' && tattoosString !== 'None');
|
|
||||||
|
|
||||||
if (profile.hasPiercings && piercingsString !== 'various') profile.piercings = piercingsString;
|
|
||||||
if (profile.hasTattoos && tattoosString !== 'various') profile.tattoos = tattoosString;
|
|
||||||
|
|
||||||
profile.social = Array.from(bioEl.querySelectorAll('#socialmedia a'), el => el.href);
|
|
||||||
|
|
||||||
return profile;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchProfile({ name: actorName }) {
|
|
||||||
const slug = actorName.replace(' ', '_');
|
|
||||||
const frontpageUrl = `https://www.freeones.com/html/v_links/${slug}`;
|
|
||||||
|
|
||||||
const resFrontpage = await bhttp.get(frontpageUrl);
|
|
||||||
|
|
||||||
if (resFrontpage.statusCode === 200) {
|
|
||||||
const { url, bio } = await scrapeProfileFrontpage(resFrontpage.body.toString(), frontpageUrl, actorName);
|
|
||||||
const resBio = await bhttp.get(url);
|
|
||||||
|
|
||||||
return scrapeProfileBio(resBio.body.toString(), bio, url, actorName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// apparently some actors are appended 'Babe' as their surname...
|
|
||||||
const fallbackSlug = `${slug}_Babe`;
|
|
||||||
const fallbackUrl = `https://www.freeones.com/html/s_links/${fallbackSlug}`;
|
|
||||||
const resFallback = await bhttp.get(fallbackUrl);
|
|
||||||
|
|
||||||
if (resFallback.statusCode === 200) {
|
|
||||||
const { url, profile } = await scrapeProfileFrontpage(resFallback.body.toString(), fallbackUrl, actorName);
|
|
||||||
const resBio = await bhttp.get(url);
|
|
||||||
|
|
||||||
return scrapeProfileBio(resBio.body.toString(), profile, url, actorName);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
fetchProfile,
|
|
||||||
};
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
@ -13,7 +12,7 @@ const { heightToCm } = require('../utils/convert');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
async function fetchPhotos(url) {
|
async function fetchPhotos(url) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
return res.body.toString();
|
return res.body.toString();
|
||||||
}
|
}
|
||||||
|
@ -369,7 +368,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
|
||||||
? util.format(site.parameters.latest, page)
|
? util.format(site.parameters.latest, page)
|
||||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||||
|
|
||||||
// const res = await bhttp.get(url);
|
// const res = await http.get(url);
|
||||||
const res = await qu.getAll(url, '.update_details');
|
const res = await qu.getAll(url, '.update_details');
|
||||||
|
|
||||||
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
||||||
|
|
|
@ -87,12 +87,13 @@ async function fetchScene(url, channel) {
|
||||||
|
|
||||||
/* API protected
|
/* API protected
|
||||||
async function fetchProfile({ name: actorName }, context , site) {
|
async function fetchProfile({ name: actorName }, context , site) {
|
||||||
const session = bhttp.session();
|
const session = http.session();
|
||||||
|
|
||||||
await session.get(`https://tour.${site.slug}.com`);
|
await http.get(`https://tour.${site.slug}.com`, { session });
|
||||||
|
|
||||||
const url = `https://tour.${site.slug}.com/search-preview`;
|
const url = `https://tour.${site.slug}.com/search-preview`;
|
||||||
const res = await session.post(url, { q: actorName }, {
|
const res = await http.post(url, { q: actorName }, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
|
||||||
origin: `https://tour.${site.slug}.com`,
|
origin: `https://tour.${site.slug}.com`,
|
||||||
|
|
|
@ -2,12 +2,12 @@
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
const Promise = require('bluebird');
|
const Promise = require('bluebird');
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const qu = require('../utils/qu');
|
const qu = require('../utils/qu');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const http = require('../utils/http');
|
||||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||||
const { cookieToData } = require('../utils/cookies');
|
const { cookieToData } = require('../utils/cookies');
|
||||||
|
|
||||||
|
@ -145,14 +145,14 @@ function getUrl(site) {
|
||||||
|
|
||||||
async function getSession(site) {
|
async function getSession(site) {
|
||||||
const cookieJar = new CookieJar();
|
const cookieJar = new CookieJar();
|
||||||
const session = bhttp.session({ cookieJar });
|
const session = http.session({ cookieJar });
|
||||||
|
|
||||||
// const res = await session.get(url);
|
// const res = await session.get(url);
|
||||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.childSession || site.parent?.parameters?.childSession)
|
const sessionUrl = site.parameters?.siteId && !(site.parameters?.childSession || site.parent?.parameters?.childSession)
|
||||||
? site.parent.url
|
? site.parent.url
|
||||||
: site.url;
|
: site.url;
|
||||||
|
|
||||||
const res = await session.get(sessionUrl);
|
const res = await http.get(sessionUrl, { session });
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
||||||
|
@ -215,7 +215,8 @@ async function fetchLatest(site, page = 1) {
|
||||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||||
|
|
||||||
const res = await session.get(apiUrl, {
|
const res = await http.get(apiUrl, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
Origin: site.url,
|
Origin: site.url,
|
||||||
|
@ -236,7 +237,8 @@ async function fetchUpcoming(site) {
|
||||||
|
|
||||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||||
|
|
||||||
const res = await session.get(apiUrl, {
|
const res = await http.get(apiUrl, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
Origin: site.url,
|
Origin: site.url,
|
||||||
|
@ -260,7 +262,8 @@ async function fetchScene(url, site, baseScene) {
|
||||||
const entryId = url.match(/\d+/)[0];
|
const entryId = url.match(/\d+/)[0];
|
||||||
const { session, instanceToken } = await getSession(site);
|
const { session, instanceToken } = await getSession(site);
|
||||||
|
|
||||||
const res = await session.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
},
|
},
|
||||||
|
@ -277,7 +280,8 @@ async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath
|
||||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||||
const { session, instanceToken } = await getSession(networkOrNetworkSlug);
|
const { session, instanceToken } = await getSession(networkOrNetworkSlug);
|
||||||
|
|
||||||
const res = await session.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
},
|
},
|
||||||
|
@ -291,8 +295,9 @@ async function fetchProfile({ name: actorName }, networkOrNetworkSlug, actorPath
|
||||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||||
|
|
||||||
const [actorRes, actorReleasesRes] = await Promise.all([
|
const [actorRes, actorReleasesRes] = await Promise.all([
|
||||||
bhttp.get(actorUrl),
|
http.get(actorUrl),
|
||||||
session.get(actorReleasesUrl, {
|
http.get(actorReleasesUrl, {
|
||||||
|
session,
|
||||||
headers: {
|
headers: {
|
||||||
Instance: instanceToken,
|
Instance: instanceToken,
|
||||||
},
|
},
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const blake2 = require('blake2');
|
const blake2 = require('blake2');
|
||||||
const knex = require('../knex');
|
const knex = require('../knex');
|
||||||
|
|
||||||
const { ex, ctxa } = require('../utils/q');
|
const { ex, ctxa } = require('../utils/q');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
async function getSiteSlugs() {
|
async function getSiteSlugs() {
|
||||||
return knex('sites')
|
return knex('sites')
|
||||||
|
@ -124,7 +124,7 @@ async function scrapeScene(html, site, url, metaSiteSlugs) {
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}/movies/page-${page}`;
|
const url = `${site.url}/movies/page-${page}`;
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
|
@ -134,7 +134,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, release) {
|
async function fetchScene(url, site, release) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||||
|
|
|
@ -1,144 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const cheerio = require('cheerio');
|
|
||||||
const { JSDOM } = require('jsdom');
|
|
||||||
const moment = require('moment');
|
|
||||||
|
|
||||||
async function getTrailer(entryId) {
|
|
||||||
const trailerRes = await bhttp.post('https://www.pervcity.com/gettoken.php', {
|
|
||||||
setId: entryId,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (trailerRes.statusCode === 200) {
|
|
||||||
return {
|
|
||||||
poster: trailerRes.body.TrailerImg,
|
|
||||||
trailer: trailerRes.body.TrailerPath || trailerRes.body.Trailerfallback,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeLatestScene(html, site) {
|
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
|
||||||
|
|
||||||
const entryId = $('li').attr('id');
|
|
||||||
const sceneLinkElement = $('#scene_title_border a');
|
|
||||||
const url = `${site.url}/${sceneLinkElement.attr('href')}`;
|
|
||||||
const title = sceneLinkElement.attr('title').replace(/\u00E2\u0080\u0099/g, '\''); // replace weird apostrophes
|
|
||||||
|
|
||||||
const actors = $('.home_model_name a').toArray().map(element => $(element).text().replace(/,[\u0020\u00A0\u202F]/, '')); // replace weird commas
|
|
||||||
const date = moment.utc($('.add_date').text(), 'DD-MM-YYYY').toDate();
|
|
||||||
|
|
||||||
const poster = $('a:nth-child(2) > img').attr('src');
|
|
||||||
const photos = $('.sample-picker img').map((index, element) => $(element).attr('src').replace('tourpics', 'trailer')).toArray();
|
|
||||||
|
|
||||||
const stars = $('img[src*="/star.png"]')
|
|
||||||
.toArray()
|
|
||||||
.map(element => $(element).attr('src'))
|
|
||||||
.length || 0;
|
|
||||||
|
|
||||||
return {
|
|
||||||
url,
|
|
||||||
entryId,
|
|
||||||
title,
|
|
||||||
actors,
|
|
||||||
date,
|
|
||||||
poster,
|
|
||||||
photos,
|
|
||||||
rating: {
|
|
||||||
stars,
|
|
||||||
},
|
|
||||||
site,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeScene(html, url, site) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
|
|
||||||
const release = { url, site };
|
|
||||||
|
|
||||||
release.entryId = document.querySelector('input#set_ID').value;
|
|
||||||
|
|
||||||
release.title = document.querySelector('title').textContent;
|
|
||||||
release.description = document.querySelector('.player_data').textContent.trim();
|
|
||||||
|
|
||||||
const durationString = document.querySelector('.tag_lineR div:nth-child(2) span').textContent;
|
|
||||||
const [minutes, seconds] = durationString.match(/\d+/g);
|
|
||||||
|
|
||||||
release.duration = Number(minutes) * 60 + Number(seconds);
|
|
||||||
release.tags = document.querySelector('meta[name="keywords"]').content.split(',');
|
|
||||||
|
|
||||||
const { poster, trailer } = await getTrailer(release.entryId);
|
|
||||||
|
|
||||||
release.poster = poster;
|
|
||||||
release.trailer = { src: trailer };
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeFallbackLanding(html) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
|
|
||||||
return document.querySelector('input#set_ID').value;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeFallbackScene(html, entryId, url, site) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
const release = { url, entryId, site };
|
|
||||||
|
|
||||||
release.title = document.querySelector('.popup_data_set_head label').textContent;
|
|
||||||
release.description = document.querySelector('.popup_data_set_des p').textContent.trim();
|
|
||||||
release.date = moment.utc(document.querySelector('.popup_left_top div span').textContent, 'MM-DD-YYYY').toDate();
|
|
||||||
release.actors = Array.from(document.querySelectorAll('.popup_data_set_models a'), el => el.textContent);
|
|
||||||
|
|
||||||
const { poster, trailer } = await getTrailer(release.entryId);
|
|
||||||
|
|
||||||
release.poster = poster;
|
|
||||||
release.trailer = { src: trailer };
|
|
||||||
|
|
||||||
release.channel = document.querySelector('.popup_left_top div img').alt;
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
|
||||||
const url = `${channel.url}/final_latestupdateview.php?limitstart=${(page - 1) * 9}&limitend=9&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
|
|
||||||
const pagedUrl = `${channel.url}/final_load_latestupdate_grid_view.php?limitstart=0&limitend=${(page - 1) * 8 + 1}&webchannelid=0&deviceview=browser&tourId=${channel.parameters.tourId}`;
|
|
||||||
|
|
||||||
const res = page === 1
|
|
||||||
? await bhttp.get(url)
|
|
||||||
: await bhttp.get(pagedUrl);
|
|
||||||
|
|
||||||
const elements = JSON.parse(res.body.toString());
|
|
||||||
|
|
||||||
const latest = Object.values(elements.total_arr).map(html => scrapeLatestScene(html, channel)); // total_arr is a key-value object for final_load_latestupdate_grid_view.php
|
|
||||||
|
|
||||||
return latest;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
|
||||||
const res = await bhttp.get(url);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
if (site.isNetwork) {
|
|
||||||
const entryId = scrapeFallbackLanding(res.body.toString(), url);
|
|
||||||
|
|
||||||
const fallbackRes = await bhttp.post('https://www.pervcity.com/set_popupvideo.php', {
|
|
||||||
setId: entryId,
|
|
||||||
});
|
|
||||||
|
|
||||||
return scrapeFallbackScene(fallbackRes.body.toString(), entryId, url, site);
|
|
||||||
}
|
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
fetchLatest,
|
|
||||||
fetchScene,
|
|
||||||
};
|
|
|
@ -1,9 +1,10 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
const ethnicityMap = {
|
const ethnicityMap = {
|
||||||
White: 'Caucasian',
|
White: 'Caucasian',
|
||||||
};
|
};
|
||||||
|
@ -59,8 +60,8 @@ async function fetchProfile({ name: actorName }) {
|
||||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||||
|
|
||||||
const [modelRes, pornstarRes] = await Promise.all([
|
const [modelRes, pornstarRes] = await Promise.all([
|
||||||
bhttp.get(modelUrl),
|
http.get(modelUrl),
|
||||||
bhttp.get(pornstarUrl),
|
http.get(pornstarUrl),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
|
const model = modelRes.statusCode === 200 && await scrapeProfile(modelRes.body.toString(), modelUrl, actorName);
|
||||||
|
@ -75,7 +76,7 @@ async function fetchProfile({ name: actorName }) {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
const pornstarUrl = `https://pornhub.com/pornstar/${actorSlug}`;
|
||||||
const pornstarRes = await bhttp.get(pornstarUrl);
|
const pornstarRes = await http.get(pornstarUrl);
|
||||||
|
|
||||||
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
return scrapeProfile(pornstarRes.body.toString(), pornstarUrl, actorName);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable newline-per-chained-call */
|
/* eslint-disable newline-per-chained-call */
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
const { get, geta } = require('../utils/q');
|
const { get, geta } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
async function getPhotos(entryId, site) {
|
async function getPhotos(entryId, site) {
|
||||||
const { hostname } = new URL(site.url);
|
const { hostname } = new URL(site.url);
|
||||||
|
|
||||||
const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
const res = await http.get(`https://${hostname}/gallery.php?type=highres&id=${entryId}`);
|
||||||
const html = res.body.toString();
|
const html = res.body.toString();
|
||||||
|
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
@ -159,18 +159,18 @@ async function fetchLatest(site, page = 1) {
|
||||||
const { hostname } = new URL(site.url);
|
const { hostname } = new URL(site.url);
|
||||||
|
|
||||||
if (hostname.match('private.com')) {
|
if (hostname.match('private.com')) {
|
||||||
const res = await bhttp.get(`${site.url}/${page}/`);
|
const res = await http.get(`${site.url}/${page}/`);
|
||||||
|
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await bhttp.get(`${site.url}/scenes/${page}/`);
|
const res = await http.get(`${site.url}/scenes/${page}/`);
|
||||||
|
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
|
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
const {
|
const {
|
||||||
scrapeLatestX,
|
scrapeLatestX,
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
|
@ -24,7 +25,7 @@ function scrapeLatestClassic(html, site) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchClassic(site, page) {
|
async function fetchClassic(site, page) {
|
||||||
const res = await bhttp.get(`${site.url}/scenes?page=${page}`);
|
const res = await http.get(`${site.url}/scenes?page=${page}`);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeLatestClassic(res.body.toString(), site);
|
return scrapeLatestClassic(res.body.toString(), site);
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
|
|
||||||
const { ex, exa, get } = require('../utils/q');
|
const { ex, exa, get } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
const http = require('../utils/http');
|
||||||
const { heightToCm, lbsToKg } = require('../utils/convert');
|
const { heightToCm, lbsToKg } = require('../utils/convert');
|
||||||
|
|
||||||
function scrapePhotos(html) {
|
function scrapePhotos(html) {
|
||||||
|
@ -19,7 +18,7 @@ function scrapePhotos(html) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchPhotos(url) {
|
async function fetchPhotos(url) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapePhotos(res.body.toString(), url);
|
return scrapePhotos(res.body.toString(), url);
|
||||||
|
@ -198,7 +197,7 @@ async function scrapeProfile(html, actorUrl, withReleases) {
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const latestPath = site.parameters?.path || '/big-boob-videos';
|
const latestPath = site.parameters?.path || '/big-boob-videos';
|
||||||
const url = `${site.url}${latestPath}?page=${page}`;
|
const url = `${site.url}${latestPath}?page=${page}`;
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeAll(res.body.toString(), site);
|
return scrapeAll(res.body.toString(), site);
|
||||||
|
@ -208,7 +207,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
return scrapeScene(res.body.toString(), url, site);
|
||||||
|
@ -227,7 +226,7 @@ async function fetchProfile({ name: actorName }, context, include, page = 1, sou
|
||||||
|
|
||||||
const url = sources[source];
|
const url = sources[source];
|
||||||
|
|
||||||
const res = await bhttp.get(url, {
|
const res = await http.get(url, {
|
||||||
followRedirects: false,
|
followRedirects: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -235,7 +234,7 @@ async function fetchProfile({ name: actorName }, context, include, page = 1, sou
|
||||||
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
const actorUrl = scrapeModels(res.body.toString(), actorName);
|
||||||
|
|
||||||
if (actorUrl) {
|
if (actorUrl) {
|
||||||
const actorRes = await bhttp.get(actorUrl);
|
const actorRes = await http.get(actorUrl);
|
||||||
|
|
||||||
if (actorRes.statusCode === 200) {
|
if (actorRes.statusCode === 200) {
|
||||||
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
return scrapeProfile(actorRes.body.toString(), actorUrl, include.scenes);
|
||||||
|
|
|
@ -1,180 +0,0 @@
|
||||||
'use strict';
|
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
|
||||||
const moment = require('moment');
|
|
||||||
|
|
||||||
function extractTitle(pathname) {
|
|
||||||
return pathname
|
|
||||||
.split('/')
|
|
||||||
.slice(-2)[0]
|
|
||||||
.split('_')
|
|
||||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
|
||||||
.join(' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractActors(str) {
|
|
||||||
return str
|
|
||||||
.split(/,|\band\b/ig)
|
|
||||||
.filter(actor => !/\.{3}/.test(actor))
|
|
||||||
.map(actor => actor.trim())
|
|
||||||
.filter(actor => actor.length > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
|
|
||||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
|
||||||
|
|
||||||
return scenes.map((scene) => {
|
|
||||||
const release = { site };
|
|
||||||
|
|
||||||
const link = scene.querySelector('.info a');
|
|
||||||
const poster = scene.querySelector('img');
|
|
||||||
const { pathname } = new URL(link);
|
|
||||||
|
|
||||||
[release.entryId] = poster.id.match(/\d+/);
|
|
||||||
|
|
||||||
release.url = `https://www.teamskeet.com${pathname}`;
|
|
||||||
release.title = extractTitle(pathname);
|
|
||||||
|
|
||||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
|
||||||
|
|
||||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
|
||||||
[release.poster] = photos;
|
|
||||||
release.photos = photos.slice(1);
|
|
||||||
|
|
||||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
|
||||||
release.actors = extractActors(actors);
|
|
||||||
|
|
||||||
return release;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeScene(html, site, url) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
const release = { site };
|
|
||||||
|
|
||||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
|
||||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
|
||||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
|
||||||
|
|
||||||
release.url = url;
|
|
||||||
release.title = title;
|
|
||||||
release.actors = extractActors(actors);
|
|
||||||
release.channel = channel.toLowerCase();
|
|
||||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
|
||||||
|
|
||||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
|
||||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
|
||||||
|
|
||||||
const { poster } = document.querySelector('video');
|
|
||||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
|
||||||
|
|
||||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
|
||||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
|
||||||
|
|
||||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
|
||||||
|
|
||||||
const trailer = document.querySelector('div.right.gray a').href;
|
|
||||||
if (trailer) release.trailer = { src: trailer };
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeSceneA(html, site, sceneX, url) {
|
|
||||||
const scene = sceneX || new JSDOM(html).window.document;
|
|
||||||
const release = { site };
|
|
||||||
|
|
||||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
|
||||||
|
|
||||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
|
||||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
|
||||||
|
|
||||||
const durationString = scene.querySelector('.time').textContent.trim();
|
|
||||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
|
||||||
release.duration = moment.duration(duration).asSeconds();
|
|
||||||
|
|
||||||
if (sceneX) {
|
|
||||||
const titleEl = scene.querySelector(':scope > a');
|
|
||||||
|
|
||||||
release.url = titleEl.href;
|
|
||||||
release.entryId = titleEl.id;
|
|
||||||
release.title = titleEl.title;
|
|
||||||
|
|
||||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
|
||||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
|
||||||
release.photos = photos;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!sceneX) {
|
|
||||||
release.title = scene.querySelector('.title span').textContent;
|
|
||||||
release.url = url;
|
|
||||||
|
|
||||||
release.poster = scene.querySelector('video').poster;
|
|
||||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
|
||||||
}
|
|
||||||
|
|
||||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
|
||||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
function scrapeLatestA(html, site) {
|
|
||||||
const { document } = new JSDOM(html).window;
|
|
||||||
|
|
||||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
|
||||||
|
|
||||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
|
||||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
|
||||||
const res = await bhttp.get(url);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
return scrapeLatest(res.body.toString(), site);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatestA(site) {
|
|
||||||
const url = `${site.url}/scenes`;
|
|
||||||
const res = await bhttp.get(url);
|
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
|
||||||
return scrapeLatestA(res.body.toString(), site);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
|
||||||
if (site.parameters.id) {
|
|
||||||
return fetchLatestTeamSkeet(site, page);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (site.parameters.scraper === 'A') {
|
|
||||||
return fetchLatestA(site, page);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
|
||||||
const session = bhttp.session(); // resolve redirects
|
|
||||||
const res = await session.get(url);
|
|
||||||
|
|
||||||
if (site.parameters?.scraper === 'A') {
|
|
||||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
|
||||||
}
|
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), site, url);
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
fetchLatest,
|
|
||||||
fetchScene,
|
|
||||||
};
|
|
|
@ -1,10 +1,9 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/* eslint-disable no-unused-vars */
|
/* eslint-disable no-unused-vars */
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
|
|
||||||
const { get, ed } = require('../utils/q');
|
const { get, ed } = require('../utils/q');
|
||||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||||
|
const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeLatestNative(scenes, site) {
|
function scrapeLatestNative(scenes, site) {
|
||||||
|
@ -72,7 +71,7 @@ async function fetchLatestNative(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
const apiUrl = `${site.url}/videos/api/?limit=50&offset=${(page - 1) * 50}&sort=datedesc`;
|
||||||
const res = await bhttp.get(apiUrl, {
|
const res = await http.get(apiUrl, {
|
||||||
decodeJSON: true,
|
decodeJSON: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -107,7 +106,7 @@ async function fetchSceneWrapper(url, site, release) {
|
||||||
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
if (scene.date - new Date(site.parameters?.lastNative) <= 0) {
|
||||||
// scene is probably still available on Vivid site, use search API to get URL and original date
|
// scene is probably still available on Vivid site, use search API to get URL and original date
|
||||||
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
const searchUrl = `${site.url}/videos/api/?limit=10&sort=datedesc&search=${encodeURI(scene.title)}`;
|
||||||
const searchRes = await bhttp.get(searchUrl, {
|
const searchRes = await http.get(searchUrl, {
|
||||||
decodeJSON: true,
|
decodeJSON: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const { ex, ctxa } = require('../utils/q');
|
|
||||||
// const slugify = require('../utils/slugify');
|
// const slugify = require('../utils/slugify');
|
||||||
|
const { ex, ctxa } = require('../utils/q');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
function getLicenseCode(html) {
|
function getLicenseCode(html) {
|
||||||
const licensePrefix = 'license_code: \'';
|
const licensePrefix = 'license_code: \'';
|
||||||
|
@ -178,7 +178,7 @@ function scrapeScene(html, url) {
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
|
@ -188,7 +188,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url) {
|
async function fetchScene(url) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeScene(res.body.toString(), url);
|
return scrapeScene(res.body.toString(), url);
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('bhttp');
|
|
||||||
const { JSDOM } = require('jsdom');
|
const { JSDOM } = require('jsdom');
|
||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
|
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
function scrapeLatest(html, site) {
|
||||||
const { document } = new JSDOM(html).window;
|
const { document } = new JSDOM(html).window;
|
||||||
const { origin } = new URL(site.url);
|
const { origin } = new URL(site.url);
|
||||||
|
@ -112,7 +113,7 @@ function scrapeScene(html, site, url) {
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = `${site.url}?page=${page}`;
|
const url = `${site.url}?page=${page}`;
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeLatest(res.body.toString(), site);
|
return scrapeLatest(res.body.toString(), site);
|
||||||
|
@ -122,7 +123,7 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
if (res.statusCode === 200) {
|
if (res.statusCode === 200) {
|
||||||
return scrapeScene(res.body.toString(), site, url);
|
return scrapeScene(res.body.toString(), site, url);
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const bhttp = require('@thependulum/bhttp');
|
|
||||||
|
|
||||||
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
|
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
|
||||||
|
const http = require('../utils/http');
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const res = await http.get(url);
|
||||||
|
|
||||||
const release = await scrapeScene(res.body.toString(), url, site);
|
const release = await scrapeScene(res.body.toString(), url, site);
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const config = require('config');
|
const config = require('config');
|
||||||
const bhttp = require('bhttp');
|
const bhttp = require('@thependulum/bhttp');
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
const stream = require('stream');
|
const stream = require('stream');
|
||||||
const tunnel = require('tunnel');
|
const tunnel = require('tunnel');
|
||||||
|
@ -168,4 +168,5 @@ module.exports = {
|
||||||
put,
|
put,
|
||||||
patch,
|
patch,
|
||||||
session: getSession,
|
session: getSession,
|
||||||
|
getSession,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue