Added Men network and Icon Male to MindGeek. Added entropy filter to media module to help filter out generic avatars. Added Pure Taboo. Various logo updates.
This commit is contained in:
14
src/media.js
14
src/media.js
@@ -36,6 +36,12 @@ function pluckPhotos(photos, specifiedLimit) {
|
||||
return Array.from(new Set(plucked)).map(photoIndex => photos[photoIndex - 1]); // remove duplicates, may happen when photo total and photo limit are close
|
||||
}
|
||||
|
||||
async function getEntropy(buffer) {
|
||||
const { entropy } = await sharp(buffer).stats();
|
||||
|
||||
return entropy;
|
||||
}
|
||||
|
||||
async function createThumbnail(buffer) {
|
||||
try {
|
||||
const thumbnail = sharp(buffer)
|
||||
@@ -108,19 +114,20 @@ async function fetchPhoto(photoUrl, index, label, attempt = 1) {
|
||||
|
||||
try {
|
||||
const { pathname } = new URL(photoUrl);
|
||||
const mimetype = mime.getType(pathname);
|
||||
|
||||
const res = await bhttp.get(photoUrl);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const mimetype = mime.getType(pathname);
|
||||
const extension = mime.getExtension(mimetype);
|
||||
const hash = getHash(res.body);
|
||||
const entropy = await getEntropy(res.body);
|
||||
|
||||
return {
|
||||
photo: res.body,
|
||||
mimetype,
|
||||
extension,
|
||||
hash,
|
||||
entropy,
|
||||
source: photoUrl,
|
||||
};
|
||||
}
|
||||
@@ -181,6 +188,7 @@ async function storePhotos(photos, {
|
||||
targetId,
|
||||
subpath,
|
||||
primaryRole, // role to assign to first photo if not already in database, used mainly for avatars
|
||||
entropy = 2.5, // filter out fallback avatars and other generic clipart
|
||||
}, label) {
|
||||
if (!photos || photos.length === 0) {
|
||||
logger.info(`No ${role}s available for ${label}`);
|
||||
@@ -192,7 +200,7 @@ async function storePhotos(photos, {
|
||||
|
||||
const metaFiles = await Promise.map(sourceOriginals, async (photoUrl, index) => fetchPhoto(photoUrl, index, label), {
|
||||
concurrency: 10,
|
||||
}).filter(photo => photo);
|
||||
}).filter(photo => photo && photo.entropy > entropy);
|
||||
|
||||
const metaFilesByHash = metaFiles.reduce((acc, photo) => ({ ...acc, [photo.hash]: photo }), {}); // pre-filter hash duplicates within set; may occur through fallbacks
|
||||
const [hashDuplicates, hashOriginals] = await findDuplicates(Object.values(metaFilesByHash), 'hash', 'hash', label);
|
||||
|
||||
@@ -79,7 +79,7 @@ async function deepFetchReleases(baseReleases) {
|
||||
deep: true,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(error.message);
|
||||
logger.error(error.stack);
|
||||
|
||||
return {
|
||||
...release,
|
||||
|
||||
@@ -139,8 +139,55 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchApiCredentials(referer) {
|
||||
const res = await bhttp.get(referer);
|
||||
const body = res.body.toString();
|
||||
|
||||
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
|
||||
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
|
||||
const apiData = JSON.parse(apiSerial);
|
||||
|
||||
const { applicationID: appId, apiKey } = apiData.api.algolia;
|
||||
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
|
||||
|
||||
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
|
||||
|
||||
return {
|
||||
appId,
|
||||
apiKey,
|
||||
userAgent,
|
||||
apiUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, upcoming = false) {
|
||||
const referer = `${site.url}/en/videos`;
|
||||
const { apiUrl } = await fetchApiCredentials(referer);
|
||||
|
||||
console.log(referer);
|
||||
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: referer,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
console.log(res.body.results);
|
||||
|
||||
// return scrape(res.body.results[0].hits, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getPhotos,
|
||||
fetchProfile,
|
||||
scrapeProfile,
|
||||
fetchLatest,
|
||||
};
|
||||
|
||||
11
src/scrapers/iconmale.js
Normal file
11
src/scrapers/iconmale.js
Normal file
@@ -0,0 +1,11 @@
|
||||
'use strict';
|
||||
|
||||
const { fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'iconmale');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
13
src/scrapers/men.js
Normal file
13
src/scrapers/men.js
Normal file
@@ -0,0 +1,13 @@
|
||||
'use strict';
|
||||
|
||||
const { fetchScene, fetchLatest, fetchProfile } = require('./mindgeek');
|
||||
|
||||
async function networkFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'men');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile: networkFetchProfile,
|
||||
};
|
||||
@@ -27,7 +27,7 @@ function getThumbs(scene) {
|
||||
|
||||
function scrapeLatestX(data, site) {
|
||||
const { id: entryId, title, description } = data;
|
||||
const hostname = site.parameters?.classic ? site.url : site.network.url;
|
||||
const hostname = site.parameters?.native ? site.url : site.network.url;
|
||||
const url = `${hostname}/scene/${entryId}/`;
|
||||
const date = new Date(data.dateReleased);
|
||||
const actors = data.actors.map(actor => actor.name);
|
||||
@@ -100,7 +100,12 @@ function getUrl(site) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters && site.parameters.siteId) {
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.network.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
@@ -161,14 +166,23 @@ async function fetchLatest(site, page = 1) {
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 10;
|
||||
const apiUrl = `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
const apiUrl = site.parameters?.native
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await session.get(apiUrl, {
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeLatest(res.body.result, site);
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
@@ -181,7 +195,11 @@ async function fetchScene(url, site) {
|
||||
},
|
||||
});
|
||||
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeScene(res.body.result, url, site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, networkName, actorPath = 'model') {
|
||||
|
||||
103
src/scrapers/puretaboo.js
Normal file
103
src/scrapers/puretaboo.js
Normal file
@@ -0,0 +1,103 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const moment = require('moment');
|
||||
|
||||
// const { fetchLatest } = require('./gamma');
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
const { getPhotos } = require('./gamma');
|
||||
|
||||
function scrape(html) {
|
||||
const { document } = ex(html);
|
||||
|
||||
return ctxa(document, '.sceneList .sceneContainer').map(({ q, qa }) => {
|
||||
const release = {};
|
||||
|
||||
const linkEl = q('a.imgLink');
|
||||
|
||||
release.entryId = linkEl.dataset.id;
|
||||
release.title = linkEl.title;
|
||||
release.url = `https://www.puretaboo.com${linkEl.href}`;
|
||||
release.actors = qa('.sceneActors a', true);
|
||||
|
||||
release.poster = q('.imgLink img').dataset.original;
|
||||
release.trailer = `https://videothumb.gammacdn.com/307x224/${release.entryId}.mp4`;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const release = {};
|
||||
|
||||
const { q, qa, qd, qm } = ex(html);
|
||||
const { pathname } = new URL(url);
|
||||
|
||||
const dataPrefix = 'dataLayer = [';
|
||||
const dataStart = html.indexOf(dataPrefix);
|
||||
const dataString = html.slice(dataStart + dataPrefix.length, html.indexOf('];', dataStart));
|
||||
const { sceneDetails: data } = JSON.parse(dataString);
|
||||
|
||||
const dataString2 = q('script[type="application/ld+json"]', true);
|
||||
const data2 = JSON.parse(dataString2)[1];
|
||||
|
||||
const videoPrefix = 'window.ScenePlayerOptions = ';
|
||||
const videoStart = html.indexOf(videoPrefix);
|
||||
const videoString = html.slice(videoStart + videoPrefix.length, html.indexOf('};', videoStart) + 1);
|
||||
const videoData = JSON.parse(videoString);
|
||||
|
||||
release.entryId = data.sceneId || videoData.playerOptions.sceneInfos.sceneId || pathname.slice(pathname.lastIndexOf('/') + 1);
|
||||
release.url = url;
|
||||
release.title = data.sceneTitle || videoData.playerOptions.sceneInfos.sceneTitle || qm('meta[name="twitter:title"]') || q('.sceneTitle', true);
|
||||
release.description = data.sceneDescription || data2.description || q('.sceneDesc', true).replace('Video Description:', '');
|
||||
|
||||
release.duration = moment.duration(data2.duration.slice(2)).asSeconds();
|
||||
|
||||
// data2.dateCreated is the date the scene was first added to 'upcoming'
|
||||
release.date = videoData.playerOptions.sceneInfos.sceneReleaseDate
|
||||
? moment.utc(videoData.playerOptions.sceneInfos.sceneReleaseDate, 'YYYY-MM-DD').toDate()
|
||||
: qd('.updatedDate', 'MM-DD-YYYY');
|
||||
|
||||
release.actors = data.sceneActors?.map(actor => actor.actorName)
|
||||
|| data2.actor?.map(actor => actor.name)
|
||||
|| qa('.sceneColActors a', true);
|
||||
|
||||
release.tags = data2.keywords.split(', ') || qa('.sceneColCategories a', 'title');
|
||||
|
||||
release.poster = videoData.picPreview;
|
||||
release.trailer = {
|
||||
src: `${videoData.playerOptions.host}${videoData.url}`,
|
||||
};
|
||||
|
||||
release.photos = await getPhotos(q('.picturesItem a').href, 'puretaboo.com', site);
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
};
|
||||
@@ -15,6 +15,8 @@ const perfectgonzo = require('./perfectgonzo');
|
||||
const pervcity = require('./pervcity');
|
||||
const pornpros = require('./pornpros');
|
||||
const privateNetwork = require('./private'); // reserved keyword
|
||||
const puretaboo = require('./puretaboo');
|
||||
const mindgeek = require('./mindgeek');
|
||||
const naughtyamerica = require('./naughtyamerica');
|
||||
const realitykings = require('./realitykings');
|
||||
const teamskeet = require('./teamskeet');
|
||||
@@ -25,11 +27,12 @@ const vogov = require('./vogov');
|
||||
const blowpass = require('./blowpass');
|
||||
const brazzers = require('./brazzers');
|
||||
const ddfnetwork = require('./ddfnetwork');
|
||||
const metrohd = require('./metrohd');
|
||||
const evilangel = require('./evilangel');
|
||||
const julesjordan = require('./julesjordan');
|
||||
const kellymadison = require('./kellymadison');
|
||||
const legalporno = require('./legalporno');
|
||||
const men = require('./men');
|
||||
const metrohd = require('./metrohd');
|
||||
const mofos = require('./mofos');
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const xempire = require('./xempire');
|
||||
@@ -38,6 +41,7 @@ const xempire = require('./xempire');
|
||||
const boobpedia = require('./boobpedia');
|
||||
const freeones = require('./freeones');
|
||||
const freeonesLegacy = require('./freeones_legacy');
|
||||
const iconmale = require('./iconmale');
|
||||
const pornhub = require('./pornhub');
|
||||
|
||||
module.exports = {
|
||||
@@ -60,13 +64,16 @@ module.exports = {
|
||||
kellymadison,
|
||||
kink,
|
||||
legalporno,
|
||||
men,
|
||||
mikeadriano,
|
||||
milehighmedia,
|
||||
mindgeek,
|
||||
mofos,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pornpros,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
naughtyamerica,
|
||||
realitykings,
|
||||
teamskeet,
|
||||
@@ -78,8 +85,6 @@ module.exports = {
|
||||
// ordered by data priority
|
||||
'21sextury': twentyonesextury,
|
||||
evilangel,
|
||||
xempire,
|
||||
blowpass,
|
||||
mofos,
|
||||
realitykings,
|
||||
digitalplayground,
|
||||
@@ -87,14 +92,18 @@ module.exports = {
|
||||
babes,
|
||||
metrohd,
|
||||
milehighmedia,
|
||||
iconmale,
|
||||
men,
|
||||
xempire,
|
||||
blowpass,
|
||||
julesjordan,
|
||||
brazzers,
|
||||
boobpedia,
|
||||
legalporno,
|
||||
kellymadison,
|
||||
pornhub,
|
||||
freeones,
|
||||
freeonesLegacy,
|
||||
kellymadison,
|
||||
ddfnetwork,
|
||||
},
|
||||
};
|
||||
|
||||
18
src/utils/img.js
Normal file
18
src/utils/img.js
Normal file
@@ -0,0 +1,18 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const sharp = require('sharp');
|
||||
const { argv } = require('yargs');
|
||||
|
||||
const url = argv.url || 'http://localhost:5000/media/actors/tommy-pistol/1580341442712.jpeg';
|
||||
|
||||
async function scan() {
|
||||
console.log(url);
|
||||
|
||||
const res = await bhttp.get(url);
|
||||
const stats = await sharp(res.body).stats();
|
||||
|
||||
console.log(stats);
|
||||
}
|
||||
|
||||
scan();
|
||||
Reference in New Issue
Block a user