forked from DebaucheryLibrarian/traxxx
Added Wicked network. Merged Evil Angel, XEmpire and Wicked into generic Gamma scraper.
This commit is contained in:
parent
37ab07356e
commit
94bf207397
|
@ -4,8 +4,6 @@ import { curateRelease } from '../curate';
|
|||
|
||||
function initReleasesActions(store, _router) {
|
||||
async function fetchReleases({ _commit }, { limit = 100 }) {
|
||||
console.log(store.state.ui.filter, store.getters.after, store.getters.before);
|
||||
|
||||
const { releases } = await graphql(`
|
||||
query Releases(
|
||||
$limit:Int = 1000,
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
|
@ -191,6 +191,12 @@ const networks = [
|
|||
url: 'https://www.vogov.com',
|
||||
description: 'Fantastic collection of exclusive porn movies with the most beautiful porn models in leading roles saisfies the most picky visitor of the site.',
|
||||
},
|
||||
{
|
||||
slug: 'wicked',
|
||||
name: 'Wicked',
|
||||
url: 'https://www.wicked.com',
|
||||
description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos',
|
||||
},
|
||||
{
|
||||
slug: 'xempire',
|
||||
name: 'XEmpire',
|
||||
|
|
|
@ -1217,7 +1217,7 @@ function getSites(networksMap) {
|
|||
{
|
||||
slug: 'evilangel',
|
||||
name: 'Evil Angel',
|
||||
url: 'https://evilangel.com',
|
||||
url: 'https://www.evilangel.com',
|
||||
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
|
||||
parameters: JSON.stringify({ independent: true }),
|
||||
network_id: networksMap.evilangel,
|
||||
|
@ -3340,6 +3340,15 @@ function getSites(networksMap) {
|
|||
description: 'Top rated models. Graceful locations. Best gonzo scenes. 4K UHD 60 FPS. So, in general Vogov is a website that is worth visiting and exploring carefully. It gives a chance to spend a fantastic night with gorgeous girls ready to experiment and to full around with their lovers.',
|
||||
network_id: networksMap.vogov,
|
||||
},
|
||||
// WICKED
|
||||
{
|
||||
slug: 'wicked',
|
||||
name: 'Wicked',
|
||||
url: 'https://www.wicked.com',
|
||||
description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos',
|
||||
parameters: JSON.stringify({ independent: true }),
|
||||
network_id: networksMap.wicked,
|
||||
},
|
||||
// XEMPIRE
|
||||
{
|
||||
slug: 'hardx',
|
||||
|
|
|
@ -589,6 +589,7 @@ function getTags(groupsMap) {
|
|||
name: 'MILF',
|
||||
slug: 'milf',
|
||||
alias_for: null,
|
||||
priority: 7,
|
||||
group_id: groupsMap.age,
|
||||
},
|
||||
{
|
||||
|
@ -1578,6 +1579,10 @@ function getTagAliases(tagsMap) {
|
|||
name: 'trans',
|
||||
alias_for: tagsMap.transsexual,
|
||||
},
|
||||
{
|
||||
name: 'transgender',
|
||||
alias_for: tagsMap.transsexual,
|
||||
},
|
||||
{
|
||||
name: 'trimmed pussy',
|
||||
alias_for: tagsMap.trimmed,
|
||||
|
|
|
@ -342,7 +342,7 @@ async function scrapeActors(actorNames) {
|
|||
|
||||
const profiles = await Promise.map(sources, async ([scraperSlug, scraper]) => {
|
||||
try {
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName);
|
||||
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug);
|
||||
|
||||
return {
|
||||
...profile,
|
||||
|
|
16
src/argv.js
16
src/argv.js
|
@ -35,6 +35,12 @@ const { argv } = yargs
|
|||
alias: 'with-scenes',
|
||||
default: false,
|
||||
})
|
||||
.option('with-profiles', {
|
||||
describe: 'Scrape profiles for new actors after fetching scenes',
|
||||
type: 'boolean',
|
||||
alias: 'with-actors',
|
||||
default: true,
|
||||
})
|
||||
.option('scene', {
|
||||
describe: 'Scrape scene info from URL',
|
||||
type: 'array',
|
||||
|
@ -55,6 +61,16 @@ const { argv } = yargs
|
|||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('latest', {
|
||||
describe: 'Scrape latest releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('upcoming', {
|
||||
describe: 'Scrape upcoming releases if available',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('redownload', {
|
||||
describe: 'Don\'t ignore duplicates, update existing entries',
|
||||
type: 'boolean',
|
||||
|
|
|
@ -425,7 +425,9 @@ async function storeReleases(releases) {
|
|||
storeReleaseAssets(storedReleases),
|
||||
]);
|
||||
|
||||
await scrapeBasicActors();
|
||||
if (argv.withProfiles) {
|
||||
await scrapeBasicActors();
|
||||
}
|
||||
|
||||
return {
|
||||
releases: storedReleases,
|
||||
|
|
|
@ -65,8 +65,12 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
|
|||
: await scraper.fetchMovie(url, site, release);
|
||||
|
||||
return {
|
||||
url,
|
||||
...scrapedRelease,
|
||||
...release,
|
||||
...(scrapedRelease && release?.tags && {
|
||||
tags: release.tags.concat(scrapedRelease.tags),
|
||||
}),
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
|
|
@ -30,6 +30,10 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
|
|||
}
|
||||
|
||||
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
|
||||
if (!argv.latest || !scraper.fetchLatest) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const latestReleases = await scraper.fetchLatest(site, page);
|
||||
|
||||
if (latestReleases.length === 0) {
|
||||
|
@ -58,7 +62,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
|
|||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site) {
|
||||
if (scraper.fetchUpcoming) {
|
||||
if (argv.upcoming && scraper.fetchUpcoming) {
|
||||
const upcomingReleases = await scraper.fetchUpcoming(site);
|
||||
|
||||
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
|
||||
|
@ -100,7 +104,9 @@ async function scrapeSiteReleases(scraper, site) {
|
|||
scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview
|
||||
]);
|
||||
|
||||
logger.info(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
|
||||
if (argv.upcoming) {
|
||||
logger.info(`${site.name}: ${argv.latest ? 'Found' : 'Ignoring'} ${newReleases.length || ''}latest releases, ${argv.upcoming ? '' : 'ignoring '}${upcomingReleases.length || ''} upcoming releases`);
|
||||
}
|
||||
|
||||
const baseReleases = [...newReleases, ...upcomingReleases];
|
||||
|
||||
|
|
|
@ -1,235 +1,10 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const { getPhotos } = require('./gamma');
|
||||
|
||||
async function scrape(json, site) {
|
||||
return Promise.all(json.map(async (scene) => {
|
||||
const {
|
||||
title,
|
||||
description,
|
||||
length,
|
||||
master_categories: tags,
|
||||
ratings_up: likes,
|
||||
ratings_down: dislikes,
|
||||
} = scene;
|
||||
|
||||
const entryId = scene.clip_id;
|
||||
const url = `https://evilangel.com/en/video/${scene.url_title}/${entryId}`;
|
||||
const date = moment(scene.release_date, 'YYYY-MM-DD').toDate();
|
||||
const actors = scene.actors.map(({ name }) => name);
|
||||
const director = scene.directors[0].name;
|
||||
|
||||
const poster = `https://images-evilangel.gammacdn.com/movies${scene.pictures.resized}`;
|
||||
const movie = `https://evilangel.com/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
description,
|
||||
length,
|
||||
actors,
|
||||
director,
|
||||
date,
|
||||
tags,
|
||||
poster,
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
movie,
|
||||
site,
|
||||
};
|
||||
}));
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
|
||||
const [data, data2] = JSON.parse(json);
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
||||
const entryId = new URL(url).pathname.split('/').slice(-1)[0];
|
||||
|
||||
const {
|
||||
name: title,
|
||||
description,
|
||||
} = data;
|
||||
// date in data object is not the release date of the scene, but the date the entry was added
|
||||
const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate();
|
||||
|
||||
const actors = data.actor.map(actor => actor.name);
|
||||
const hasTrans = data.actor.some(actor => actor.gender === 'shemale');
|
||||
|
||||
const director = (data.director && data.director[0].name) || (data2.director && data2.director[0].name) || null;
|
||||
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
|
||||
|
||||
const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
|
||||
|
||||
const rawTags = data.keywords.split(', ');
|
||||
const tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
|
||||
|
||||
const poster = videoData.picPreview;
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
|
||||
const photos = await getPhotos($('.picturesItem a').attr('href'), 'evilangel.com', site);
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
director,
|
||||
description,
|
||||
duration,
|
||||
tags,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
quality: parseInt(videoData.sizeOnLoad, 10),
|
||||
},
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeActor(data, releases) {
|
||||
const actor = {};
|
||||
|
||||
if (data.male === 1) actor.gender = 'male';
|
||||
if (data.female === 1) actor.gender = 'female';
|
||||
if (data.shemale === 1 || data.trans === 1) actor.gender = 'transsexual';
|
||||
|
||||
if (data.description) actor.description = data.description.trim();
|
||||
|
||||
if (data.attributes.ethnicity) actor.ethnicity = data.attributes.ethnicity;
|
||||
if (data.attributes.eye_color) actor.eyes = data.attributes.eye_color;
|
||||
if (data.attributes.hair_color) actor.hair = data.attributes.hair_color;
|
||||
|
||||
const avatarPath = Object.values(data.pictures).reverse()[0];
|
||||
actor.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`;
|
||||
|
||||
actor.releases = releases.map(release => `https://evilangel.com/en/video/${release.url_title}/${release.clip_id}`);
|
||||
|
||||
return actor;
|
||||
}
|
||||
|
||||
async function fetchApiCredentials() {
|
||||
const res = await bhttp.get('https://evilangel.com/en/videos');
|
||||
const body = res.body.toString();
|
||||
|
||||
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
|
||||
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
|
||||
const apiData = JSON.parse(apiSerial);
|
||||
|
||||
const { applicationID: appId, apiKey } = apiData.api.algolia;
|
||||
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
|
||||
|
||||
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
|
||||
|
||||
return {
|
||||
appId,
|
||||
apiKey,
|
||||
userAgent,
|
||||
apiUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, upcoming = false) {
|
||||
const { apiUrl } = await fetchApiCredentials();
|
||||
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: 'https://www.evilangel.com/en/videos',
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
return scrape(res.body.results[0].hits, site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
return fetchLatest(site, 1, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchActorScenes(actorName, apiUrl) {
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: 'https://www.evilangel.com/en/videos',
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
|
||||
return res.body.results[0].hits;
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const { apiUrl } = await fetchApiCredentials();
|
||||
const actorSlug = encodeURI(actorName);
|
||||
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_actors',
|
||||
params: `query=${actorSlug}`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: `https://www.evilangel.com/en/search?query=${actorSlug}&tab=actors`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
|
||||
const actorData = res.body.results[0].hits.find(actor => actor.name === actorName);
|
||||
|
||||
if (actorData) {
|
||||
const actorScenes = await fetchActorScenes(actorName, apiUrl);
|
||||
|
||||
return scrapeActor(actorData, actorScenes);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
||||
|
|
|
@ -4,6 +4,7 @@ const Promise = require('bluebird');
|
|||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
async function fetchPhotos(url) {
|
||||
const res = await bhttp.get(url);
|
||||
|
@ -39,33 +40,191 @@ function scrapePhotos(html) {
|
|||
});
|
||||
}
|
||||
|
||||
async function getPhotos(albumPath, siteDomain) {
|
||||
const albumUrl = `https://${siteDomain}${albumPath}`;
|
||||
async function getPhotos(albumPath, site) {
|
||||
const albumUrl = `${site.url}${albumPath}`;
|
||||
|
||||
try {
|
||||
const html = await fetchPhotos(albumUrl);
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const photos = scrapePhotos(html);
|
||||
|
||||
const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray();
|
||||
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
|
||||
|
||||
const otherPhotos = await Promise.map(pages, async (page) => {
|
||||
const pageUrl = `https://${siteDomain}${page}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
if (lastPage) {
|
||||
const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1);
|
||||
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
const otherPhotos = await Promise.map(otherPages, async (page) => {
|
||||
const pageUrl = `${site.url}/${albumPath}/${page}`;
|
||||
const pageHtml = await fetchPhotos(pageUrl);
|
||||
|
||||
return photos.concat(otherPhotos.flat());
|
||||
return scrapePhotos(pageHtml);
|
||||
}, {
|
||||
concurrency: 2,
|
||||
});
|
||||
|
||||
return photos.concat(otherPhotos.flat());
|
||||
}
|
||||
|
||||
return photos;
|
||||
} catch (error) {
|
||||
console.error(`Failed to fetch ${siteDomain} photos from ${albumPath}: ${error.message}`);
|
||||
console.error(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`);
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeApiReleases(json, site) {
|
||||
return json.map((scene) => {
|
||||
const release = {
|
||||
entryId: scene.clip_id,
|
||||
title: scene.title,
|
||||
description: scene.description,
|
||||
duration: scene.length,
|
||||
likes: scene.ratings_up,
|
||||
dislikes: scene.ratings_down,
|
||||
};
|
||||
|
||||
release.url = `${site.url}/en/video/${scene.url_title}/${release.entryId}`;
|
||||
release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate();
|
||||
release.actors = scene.actors.map(({ name }) => name);
|
||||
release.director = scene.directors[0].name;
|
||||
|
||||
release.tags = scene.master_categories.concat(scene.categories?.map(category => category.name));
|
||||
|
||||
const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]);
|
||||
|
||||
if (posterPath) {
|
||||
release.poster = [
|
||||
`https://images-evilangel.gammacdn.com/movies${posterPath}`,
|
||||
`https://transform.gammacdn.com/movies${posterPath}`,
|
||||
];
|
||||
}
|
||||
|
||||
release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeAll(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('li[data-itemtype=scene]').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
|
||||
const url = `${site.url}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const entryId = $(element).attr('data-itemid');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
|
||||
.toDate();
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
|
||||
const poster = $(element).find('.imgLink img').attr('data-original');
|
||||
const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
director: 'Mason',
|
||||
date,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
quality: 224,
|
||||
},
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const release = { $ };
|
||||
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
|
||||
const [data, data2] = JSON.parse(json);
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
|
||||
|
||||
[release.entryId] = new URL(url).pathname.split('/').slice(-1);
|
||||
|
||||
release.title = data.name;
|
||||
release.description = data.description;
|
||||
|
||||
// date in data object is not the release date of the scene, but the date the entry was added
|
||||
const dateString = $('.updatedDate').first().text().trim();
|
||||
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
|
||||
release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
|
||||
|
||||
release.director = data.director?.[0].name || data2?.director?.[0].name;
|
||||
release.actors = data.actor.map(actor => actor.name);
|
||||
const hasTrans = data.actor.some(actor => actor.gender === 'shemale');
|
||||
|
||||
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
|
||||
if (stars) release.rating = { stars };
|
||||
|
||||
release.duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
|
||||
|
||||
const rawTags = data.keywords?.split(', ');
|
||||
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
|
||||
|
||||
release.poster = videoData.picPreview;
|
||||
release.photos = await getPhotos($('.picturesItem a').attr('href'), site);
|
||||
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
release.trailer = [
|
||||
{
|
||||
src: trailer.replace('hd', 'sm'),
|
||||
quality: 240,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', 'med'),
|
||||
quality: 360,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', 'big'),
|
||||
quality: 480,
|
||||
},
|
||||
{
|
||||
// probably 540p
|
||||
src: trailer,
|
||||
quality: parseInt(videoData.sizeOnLoad, 10),
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '720p'),
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '1080p'),
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '4k'),
|
||||
quality: 2160,
|
||||
},
|
||||
];
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeActorSearch(html, url, actorName) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
|
||||
|
@ -112,6 +271,113 @@ function scrapeProfile(html, url, actorName, siteSlug) {
|
|||
return profile;
|
||||
}
|
||||
|
||||
function scrapeApiProfile(data, releases, siteSlug) {
|
||||
const profile = {};
|
||||
|
||||
if (data.male === 1) profile.gender = 'male';
|
||||
if (data.female === 1) profile.gender = 'female';
|
||||
if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual';
|
||||
|
||||
if (data.description) profile.description = data.description.trim();
|
||||
|
||||
if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity;
|
||||
if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color;
|
||||
if (data.attributes.hair_color) profile.hair = data.attributes.hair_color;
|
||||
|
||||
const avatarPath = Object.values(data.pictures).reverse()[0];
|
||||
if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`;
|
||||
|
||||
profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchApiCredentials(referer) {
|
||||
const res = await bhttp.get(referer);
|
||||
const body = res.body.toString();
|
||||
|
||||
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
|
||||
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
|
||||
const apiData = JSON.parse(apiSerial);
|
||||
|
||||
const { applicationID: appId, apiKey } = apiData.api.algolia;
|
||||
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
|
||||
|
||||
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
|
||||
|
||||
return {
|
||||
appId,
|
||||
apiKey,
|
||||
userAgent,
|
||||
apiUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchApiLatest(site, page = 1, upcoming = false) {
|
||||
const referer = `${site.url}/en/videos`;
|
||||
const { apiUrl } = await fetchApiCredentials(referer);
|
||||
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: referer,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
return scrapeApiReleases(res.body.results[0].hits, site);
|
||||
}
|
||||
|
||||
async function fetchApiUpcoming(site) {
|
||||
return fetchApiLatest(site, 1, true);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
|
||||
|
||||
return scrapeAll(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
|
||||
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: `https://www.${siteSlug}.com/en/videos`,
|
||||
},
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
|
||||
return res.body.results[0].hits;
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, siteSlug, altSearchUrl) {
|
||||
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
|
||||
const searchUrl = altSearchUrl
|
||||
|
@ -139,38 +405,17 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function fetchApiCredentials(referer) {
|
||||
const res = await bhttp.get(referer);
|
||||
const body = res.body.toString();
|
||||
async function fetchApiProfile(actorName, siteSlug) {
|
||||
const actorSlug = encodeURI(actorName);
|
||||
const referer = `https://www.${siteSlug}.com/en/search?query=${actorSlug}&tab=actors`;
|
||||
|
||||
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
|
||||
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
|
||||
const apiData = JSON.parse(apiSerial);
|
||||
|
||||
const { applicationID: appId, apiKey } = apiData.api.algolia;
|
||||
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
|
||||
|
||||
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
|
||||
|
||||
return {
|
||||
appId,
|
||||
apiKey,
|
||||
userAgent,
|
||||
apiUrl,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, upcoming = false) {
|
||||
const referer = `${site.url}/en/videos`;
|
||||
const { apiUrl } = await fetchApiCredentials(referer);
|
||||
|
||||
console.log(referer);
|
||||
|
||||
const res = await bhttp.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
|
||||
indexName: 'all_actors',
|
||||
params: `query=${actorSlug}`,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
|
@ -180,14 +425,31 @@ async function fetchLatest(site, page = 1, upcoming = false) {
|
|||
encodeJSON: true,
|
||||
});
|
||||
|
||||
console.log(res.body.results);
|
||||
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
|
||||
const actorData = res.body.results[0].hits.find(actor => actor.name === actorName);
|
||||
|
||||
// return scrape(res.body.results[0].hits, site);
|
||||
if (actorData) {
|
||||
const actorScenes = await fetchActorScenes(actorName, apiUrl, siteSlug);
|
||||
|
||||
return scrapeApiProfile(actorData, actorScenes, siteSlug);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getPhotos,
|
||||
fetchProfile,
|
||||
scrapeProfile,
|
||||
fetchApiLatest,
|
||||
fetchApiProfile,
|
||||
fetchApiUpcoming,
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming,
|
||||
getPhotos,
|
||||
scrapeApiProfile,
|
||||
scrapeApiReleases,
|
||||
scrapeProfile,
|
||||
scrapeAll,
|
||||
scrapeScene,
|
||||
};
|
||||
|
|
|
@ -36,6 +36,7 @@ const mofos = require('./mofos');
|
|||
const naughtyamerica = require('./naughtyamerica');
|
||||
const twentyonesextury = require('./21sextury');
|
||||
const xempire = require('./xempire');
|
||||
const wicked = require('./wicked');
|
||||
|
||||
// profiles
|
||||
const boobpedia = require('./boobpedia');
|
||||
|
@ -80,12 +81,14 @@ module.exports = {
|
|||
teamskeet,
|
||||
vixen,
|
||||
vogov,
|
||||
wicked,
|
||||
xempire,
|
||||
},
|
||||
actors: {
|
||||
// ordered by data priority
|
||||
'21sextury': twentyonesextury,
|
||||
evilangel,
|
||||
wicked,
|
||||
mofos,
|
||||
realitykings,
|
||||
digitalplayground,
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
'use strict';
|
||||
|
||||
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchApiLatest,
|
||||
fetchProfile: fetchApiProfile,
|
||||
fetchScene,
|
||||
fetchUpcoming: fetchApiUpcoming,
|
||||
};
|
|
@ -1,168 +1,26 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const cheerio = require('cheerio');
|
||||
const moment = require('moment');
|
||||
|
||||
const { getPhotos, fetchProfile } = require('./gamma');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('li[data-itemtype=scene]').toArray();
|
||||
|
||||
return scenesElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.sceneTitle a');
|
||||
|
||||
const url = `${site.url}${sceneLinkElement.attr('href')}`;
|
||||
const title = sceneLinkElement.attr('title');
|
||||
|
||||
const entryId = $(element).attr('data-itemid');
|
||||
|
||||
const date = moment
|
||||
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
|
||||
.toDate();
|
||||
|
||||
const actors = $(element).find('.sceneActors a')
|
||||
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
|
||||
.toArray();
|
||||
|
||||
const [likes, dislikes] = $(element).find('.value')
|
||||
.toArray()
|
||||
.map(value => Number($(value).text()));
|
||||
|
||||
const poster = $(element).find('.imgLink img').attr('data-original');
|
||||
const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`;
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
title,
|
||||
actors,
|
||||
director: 'Mason',
|
||||
date,
|
||||
poster,
|
||||
trailer: {
|
||||
src: trailer,
|
||||
quality: 224,
|
||||
},
|
||||
rating: {
|
||||
likes,
|
||||
dislikes,
|
||||
},
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const json = $('script[type="application/ld+json"]').html();
|
||||
const json2 = $('script:contains("dataLayer = ")').html();
|
||||
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
|
||||
|
||||
const data = JSON.parse(json)[0];
|
||||
const data2 = JSON.parse(json2.slice(json2.indexOf('[{'), -1))[0];
|
||||
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"id":'), videoJson.indexOf('};') + 1));
|
||||
|
||||
const entryId = data2.sceneDetails.sceneId || new URL(url).pathname.split('/').slice(-1)[0];
|
||||
|
||||
const title = data2.sceneDetails.sceneTitle || $('meta[name="twitter:title"]').attr('content');
|
||||
const description = data2.sceneDetails.sceneDescription || data.description || $('meta[name="twitter:description"]').attr('content');
|
||||
// date in data object is not the release date of the scene, but the date the entry was added
|
||||
const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate();
|
||||
|
||||
const actors = (data2.sceneDetails.sceneActors || data.actor).map(actor => actor.actorName || actor.name);
|
||||
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
|
||||
|
||||
const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
|
||||
|
||||
const siteDomain = $('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
|
||||
const poster = videoData.picPreview;
|
||||
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
|
||||
|
||||
const photos = await getPhotos($('.picturesItem a').attr('href'), siteDomain, site);
|
||||
|
||||
const tags = data.keywords.split(', ');
|
||||
|
||||
return {
|
||||
url: `${siteUrl}/en/video/${new URL(url).pathname.split('/').slice(-2).join('/')}`,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
actors,
|
||||
director: 'Mason',
|
||||
description,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: [
|
||||
{
|
||||
src: trailer.replace('hd', 'sm'),
|
||||
quality: 240,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', 'med'),
|
||||
quality: 360,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', 'big'),
|
||||
quality: 480,
|
||||
},
|
||||
{
|
||||
// probably 540p
|
||||
src: trailer,
|
||||
quality: parseInt(videoData.sizeOnLoad, 10),
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '720p'),
|
||||
quality: 720,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '1080p'),
|
||||
quality: 1080,
|
||||
},
|
||||
{
|
||||
src: trailer.replace('hd', '4k'),
|
||||
quality: 2160,
|
||||
},
|
||||
],
|
||||
tags,
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site,
|
||||
channel: siteSlug,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
|
||||
|
||||
return scrape(res.body.toString(), site);
|
||||
}
|
||||
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
}
|
||||
const release = await scrapeScene(res.body.toString(), url, site);
|
||||
|
||||
async function xEmpireFetchProfile(actorName) {
|
||||
return fetchProfile(actorName, 'xempire');
|
||||
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
|
||||
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
|
||||
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
|
||||
|
||||
release.channel = siteSlug;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile: xEmpireFetchProfile,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
fetchScene,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue