Added Wicked network. Merged Evil Angel, XEmpire and Wicked into generic Gamma scraper.

This commit is contained in:
ThePendulum 2020-02-01 01:15:40 +01:00
parent 37ab07356e
commit 94bf207397
17 changed files with 385 additions and 431 deletions

View File

@ -4,8 +4,6 @@ import { curateRelease } from '../curate';
function initReleasesActions(store, _router) {
async function fetchReleases({ _commit }, { limit = 100 }) {
console.log(store.state.ui.filter, store.getters.after, store.getters.before);
const { releases } = await graphql(`
query Releases(
$limit:Int = 1000,

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -191,6 +191,12 @@ const networks = [
url: 'https://www.vogov.com',
description: 'Fantastic collection of exclusive porn movies with the most beautiful porn models in leading roles saisfies the most picky visitor of the site.',
},
{
slug: 'wicked',
name: 'Wicked',
url: 'https://www.wicked.com',
description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos',
},
{
slug: 'xempire',
name: 'XEmpire',

View File

@ -1217,7 +1217,7 @@ function getSites(networksMap) {
{
slug: 'evilangel',
name: 'Evil Angel',
url: 'https://evilangel.com',
url: 'https://www.evilangel.com',
description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.',
parameters: JSON.stringify({ independent: true }),
network_id: networksMap.evilangel,
@ -3340,6 +3340,15 @@ function getSites(networksMap) {
description: 'Top rated models. Graceful locations. Best gonzo scenes. 4K UHD 60 FPS. So, in general Vogov is a website that is worth visiting and exploring carefully. It gives a chance to spend a fantastic night with gorgeous girls ready to experiment and to full around with their lovers.',
network_id: networksMap.vogov,
},
// WICKED
{
slug: 'wicked',
name: 'Wicked',
url: 'https://www.wicked.com',
description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos',
parameters: JSON.stringify({ independent: true }),
network_id: networksMap.wicked,
},
// XEMPIRE
{
slug: 'hardx',

View File

@ -589,6 +589,7 @@ function getTags(groupsMap) {
name: 'MILF',
slug: 'milf',
alias_for: null,
priority: 7,
group_id: groupsMap.age,
},
{
@ -1578,6 +1579,10 @@ function getTagAliases(tagsMap) {
name: 'trans',
alias_for: tagsMap.transsexual,
},
{
name: 'transgender',
alias_for: tagsMap.transsexual,
},
{
name: 'trimmed pussy',
alias_for: tagsMap.trimmed,

View File

@ -342,7 +342,7 @@ async function scrapeActors(actorNames) {
const profiles = await Promise.map(sources, async ([scraperSlug, scraper]) => {
try {
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName);
const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug);
return {
...profile,

View File

@ -35,6 +35,12 @@ const { argv } = yargs
alias: 'with-scenes',
default: false,
})
.option('with-profiles', {
describe: 'Scrape profiles for new actors after fetching scenes',
type: 'boolean',
alias: 'with-actors',
default: true,
})
.option('scene', {
describe: 'Scrape scene info from URL',
type: 'array',
@ -55,6 +61,16 @@ const { argv } = yargs
type: 'boolean',
default: true,
})
.option('latest', {
describe: 'Scrape latest releases if available',
type: 'boolean',
default: true,
})
.option('upcoming', {
describe: 'Scrape upcoming releases if available',
type: 'boolean',
default: true,
})
.option('redownload', {
describe: 'Don\'t ignore duplicates, update existing entries',
type: 'boolean',

View File

@ -425,7 +425,9 @@ async function storeReleases(releases) {
storeReleaseAssets(storedReleases),
]);
if (argv.withProfiles) {
await scrapeBasicActors();
}
return {
releases: storedReleases,

View File

@ -65,8 +65,12 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') {
: await scraper.fetchMovie(url, site, release);
return {
url,
...scrapedRelease,
...release,
...(scrapedRelease && release?.tags && {
tags: release.tags.concat(scrapedRelease.tags),
}),
site,
};
}

View File

@ -30,6 +30,10 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) {
}
async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) {
if (!argv.latest || !scraper.fetchLatest) {
return [];
}
const latestReleases = await scraper.fetchLatest(site, page);
if (latestReleases.length === 0) {
@ -58,7 +62,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a
}
async function scrapeUpcomingReleases(scraper, site) {
if (scraper.fetchUpcoming) {
if (argv.upcoming && scraper.fetchUpcoming) {
const upcomingReleases = await scraper.fetchUpcoming(site);
return upcomingReleases.map(release => ({ ...release, upcoming: true }));
@ -100,7 +104,9 @@ async function scrapeSiteReleases(scraper, site) {
scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview
]);
logger.info(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`);
if (argv.upcoming) {
logger.info(`${site.name}: ${argv.latest ? 'Found' : 'Ignoring'} ${newReleases.length || ''}latest releases, ${argv.upcoming ? '' : 'ignoring '}${upcomingReleases.length || ''} upcoming releases`);
}
const baseReleases = [...newReleases, ...upcomingReleases];

View File

@ -1,235 +1,10 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const { getPhotos } = require('./gamma');
async function scrape(json, site) {
return Promise.all(json.map(async (scene) => {
const {
title,
description,
length,
master_categories: tags,
ratings_up: likes,
ratings_down: dislikes,
} = scene;
const entryId = scene.clip_id;
const url = `https://evilangel.com/en/video/${scene.url_title}/${entryId}`;
const date = moment(scene.release_date, 'YYYY-MM-DD').toDate();
const actors = scene.actors.map(({ name }) => name);
const director = scene.directors[0].name;
const poster = `https://images-evilangel.gammacdn.com/movies${scene.pictures.resized}`;
const movie = `https://evilangel.com/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
return {
url,
entryId,
title,
description,
length,
actors,
director,
date,
tags,
poster,
rating: {
likes,
dislikes,
},
movie,
site,
};
}));
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const json = $('script[type="application/ld+json"]').html();
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
const [data, data2] = JSON.parse(json);
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
const entryId = new URL(url).pathname.split('/').slice(-1)[0];
const {
name: title,
description,
} = data;
// date in data object is not the release date of the scene, but the date the entry was added
const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate();
const actors = data.actor.map(actor => actor.name);
const hasTrans = data.actor.some(actor => actor.gender === 'shemale');
const director = (data.director && data.director[0].name) || (data2.director && data2.director[0].name) || null;
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
const rawTags = data.keywords.split(', ');
const tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
const poster = videoData.picPreview;
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
const photos = await getPhotos($('.picturesItem a').attr('href'), 'evilangel.com', site);
return {
url,
entryId,
title,
date,
actors,
director,
description,
duration,
tags,
poster,
photos,
trailer: {
src: trailer,
quality: parseInt(videoData.sizeOnLoad, 10),
},
rating: {
stars,
},
site,
};
}
function scrapeActor(data, releases) {
const actor = {};
if (data.male === 1) actor.gender = 'male';
if (data.female === 1) actor.gender = 'female';
if (data.shemale === 1 || data.trans === 1) actor.gender = 'transsexual';
if (data.description) actor.description = data.description.trim();
if (data.attributes.ethnicity) actor.ethnicity = data.attributes.ethnicity;
if (data.attributes.eye_color) actor.eyes = data.attributes.eye_color;
if (data.attributes.hair_color) actor.hair = data.attributes.hair_color;
const avatarPath = Object.values(data.pictures).reverse()[0];
actor.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`;
actor.releases = releases.map(release => `https://evilangel.com/en/video/${release.url_title}/${release.clip_id}`);
return actor;
}
async function fetchApiCredentials() {
const res = await bhttp.get('https://evilangel.com/en/videos');
const body = res.body.toString();
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
const apiData = JSON.parse(apiSerial);
const { applicationID: appId, apiKey } = apiData.api.algolia;
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
return {
appId,
apiKey,
userAgent,
apiUrl,
};
}
async function fetchLatest(site, page = 1, upcoming = false) {
const { apiUrl } = await fetchApiCredentials();
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
},
],
}, {
headers: {
Referer: 'https://www.evilangel.com/en/videos',
},
encodeJSON: true,
});
return scrape(res.body.results[0].hits, site);
}
async function fetchUpcoming(site) {
return fetchLatest(site, 1, true);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchActorScenes(actorName, apiUrl) {
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`,
},
],
}, {
headers: {
Referer: 'https://www.evilangel.com/en/videos',
},
encodeJSON: true,
});
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
return res.body.results[0].hits;
}
return [];
}
async function fetchProfile(actorName) {
const { apiUrl } = await fetchApiCredentials();
const actorSlug = encodeURI(actorName);
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_actors',
params: `query=${actorSlug}`,
},
],
}, {
headers: {
Referer: `https://www.evilangel.com/en/search?query=${actorSlug}&tab=actors`,
},
encodeJSON: true,
});
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
const actorData = res.body.results[0].hits.find(actor => actor.name === actorName);
if (actorData) {
const actorScenes = await fetchActorScenes(actorName, apiUrl);
return scrapeActor(actorData, actorScenes);
}
}
return null;
}
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest,
fetchProfile,
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming,
fetchUpcoming: fetchApiUpcoming,
};

View File

@ -4,6 +4,7 @@ const Promise = require('bluebird');
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const cheerio = require('cheerio');
const moment = require('moment');
async function fetchPhotos(url) {
const res = await bhttp.get(url);
@ -39,18 +40,21 @@ function scrapePhotos(html) {
});
}
async function getPhotos(albumPath, siteDomain) {
const albumUrl = `https://${siteDomain}${albumPath}`;
async function getPhotos(albumPath, site) {
const albumUrl = `${site.url}${albumPath}`;
try {
const html = await fetchPhotos(albumUrl);
const $ = cheerio.load(html, { normalizeWhitespace: true });
const photos = scrapePhotos(html);
const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray();
const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0];
const otherPhotos = await Promise.map(pages, async (page) => {
const pageUrl = `https://${siteDomain}${page}`;
if (lastPage) {
const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1);
const otherPhotos = await Promise.map(otherPages, async (page) => {
const pageUrl = `${site.url}/${albumPath}/${page}`;
const pageHtml = await fetchPhotos(pageUrl);
return scrapePhotos(pageHtml);
@ -59,13 +63,168 @@ async function getPhotos(albumPath, siteDomain) {
});
return photos.concat(otherPhotos.flat());
}
return photos;
} catch (error) {
console.error(`Failed to fetch ${siteDomain} photos from ${albumPath}: ${error.message}`);
console.error(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`);
return [];
}
}
async function scrapeApiReleases(json, site) {
return json.map((scene) => {
const release = {
entryId: scene.clip_id,
title: scene.title,
description: scene.description,
duration: scene.length,
likes: scene.ratings_up,
dislikes: scene.ratings_down,
};
release.url = `${site.url}/en/video/${scene.url_title}/${release.entryId}`;
release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate();
release.actors = scene.actors.map(({ name }) => name);
release.director = scene.directors[0].name;
release.tags = scene.master_categories.concat(scene.categories?.map(category => category.name));
const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]);
if (posterPath) {
release.poster = [
`https://images-evilangel.gammacdn.com/movies${posterPath}`,
`https://transform.gammacdn.com/movies${posterPath}`,
];
}
release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
return release;
});
}
function scrapeAll(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = $(element).attr('data-itemid');
const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate();
const actors = $(element).find('.sceneActors a')
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const poster = $(element).find('.imgLink img').attr('data-original');
const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`;
return {
url,
entryId,
title,
actors,
director: 'Mason',
date,
poster,
trailer: {
src: trailer,
quality: 224,
},
rating: {
likes,
dislikes,
},
site,
};
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const release = { $ };
const json = $('script[type="application/ld+json"]').html();
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
const [data, data2] = JSON.parse(json);
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1));
[release.entryId] = new URL(url).pathname.split('/').slice(-1);
release.title = data.name;
release.description = data.description;
// date in data object is not the release date of the scene, but the date the entry was added
const dateString = $('.updatedDate').first().text().trim();
const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0];
release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate();
release.director = data.director?.[0].name || data2?.director?.[0].name;
release.actors = data.actor.map(actor => actor.name);
const hasTrans = data.actor.some(actor => actor.gender === 'shemale');
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
if (stars) release.rating = { stars };
release.duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
const rawTags = data.keywords?.split(', ');
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
release.poster = videoData.picPreview;
release.photos = await getPhotos($('.picturesItem a').attr('href'), site);
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
release.trailer = [
{
src: trailer.replace('hd', 'sm'),
quality: 240,
},
{
src: trailer.replace('hd', 'med'),
quality: 360,
},
{
src: trailer.replace('hd', 'big'),
quality: 480,
},
{
// probably 540p
src: trailer,
quality: parseInt(videoData.sizeOnLoad, 10),
},
{
src: trailer.replace('hd', '720p'),
quality: 720,
},
{
src: trailer.replace('hd', '1080p'),
quality: 1080,
},
{
src: trailer.replace('hd', '4k'),
quality: 2160,
},
];
return release;
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
@ -112,6 +271,113 @@ function scrapeProfile(html, url, actorName, siteSlug) {
return profile;
}
function scrapeApiProfile(data, releases, siteSlug) {
const profile = {};
if (data.male === 1) profile.gender = 'male';
if (data.female === 1) profile.gender = 'female';
if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual';
if (data.description) profile.description = data.description.trim();
if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity;
if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color;
if (data.attributes.hair_color) profile.hair = data.attributes.hair_color;
const avatarPath = Object.values(data.pictures).reverse()[0];
if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`;
profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`);
return profile;
}
async function fetchApiCredentials(referer) {
const res = await bhttp.get(referer);
const body = res.body.toString();
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
const apiData = JSON.parse(apiSerial);
const { applicationID: appId, apiKey } = apiData.api.algolia;
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
return {
appId,
apiKey,
userAgent,
apiUrl,
};
}
async function fetchApiLatest(site, page = 1, upcoming = false) {
const referer = `${site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer);
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
},
],
}, {
headers: {
Referer: referer,
},
encodeJSON: true,
});
return scrapeApiReleases(res.body.results[0].hits, site);
}
async function fetchApiUpcoming(site) {
return fetchApiLatest(site, 1, true);
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
return scrapeAll(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrapeAll(res.body.toString(), site);
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
async function fetchActorScenes(actorName, apiUrl, siteSlug) {
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`,
},
],
}, {
headers: {
Referer: `https://www.${siteSlug}.com/en/videos`,
},
encodeJSON: true,
});
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
return res.body.results[0].hits;
}
return [];
}
async function fetchProfile(actorName, siteSlug, altSearchUrl) {
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
const searchUrl = altSearchUrl
@ -139,38 +405,17 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl) {
return null;
}
async function fetchApiCredentials(referer) {
const res = await bhttp.get(referer);
const body = res.body.toString();
async function fetchApiProfile(actorName, siteSlug) {
const actorSlug = encodeURI(actorName);
const referer = `https://www.${siteSlug}.com/en/search?query=${actorSlug}&tab=actors`;
const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey'));
const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1);
const apiData = JSON.parse(apiSerial);
const { applicationID: appId, apiKey } = apiData.api.algolia;
const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0';
const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`;
return {
appId,
apiKey,
userAgent,
apiUrl,
};
}
async function fetchLatest(site, page = 1, upcoming = false) {
const referer = `${site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer);
console.log(referer);
const res = await bhttp.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`,
indexName: 'all_actors',
params: `query=${actorSlug}`,
},
],
}, {
@ -180,14 +425,31 @@ async function fetchLatest(site, page = 1, upcoming = false) {
encodeJSON: true,
});
console.log(res.body.results);
if (res.statusCode === 200 && res.body.results[0].hits.length > 0) {
const actorData = res.body.results[0].hits.find(actor => actor.name === actorName);
// return scrape(res.body.results[0].hits, site);
if (actorData) {
const actorScenes = await fetchActorScenes(actorName, apiUrl, siteSlug);
return scrapeApiProfile(actorData, actorScenes, siteSlug);
}
}
return null;
}
module.exports = {
getPhotos,
fetchProfile,
scrapeProfile,
fetchApiLatest,
fetchApiProfile,
fetchApiUpcoming,
fetchLatest,
fetchProfile,
fetchScene,
fetchUpcoming,
getPhotos,
scrapeApiProfile,
scrapeApiReleases,
scrapeProfile,
scrapeAll,
scrapeScene,
};

View File

@ -36,6 +36,7 @@ const mofos = require('./mofos');
const naughtyamerica = require('./naughtyamerica');
const twentyonesextury = require('./21sextury');
const xempire = require('./xempire');
const wicked = require('./wicked');
// profiles
const boobpedia = require('./boobpedia');
@ -80,12 +81,14 @@ module.exports = {
teamskeet,
vixen,
vogov,
wicked,
xempire,
},
actors: {
// ordered by data priority
'21sextury': twentyonesextury,
evilangel,
wicked,
mofos,
realitykings,
digitalplayground,

10
src/scrapers/wicked.js Normal file
View File

@ -0,0 +1,10 @@
'use strict';
const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma');
module.exports = {
fetchLatest: fetchApiLatest,
fetchProfile: fetchApiProfile,
fetchScene,
fetchUpcoming: fetchApiUpcoming,
};

View File

@ -1,168 +1,26 @@
'use strict';
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const { getPhotos, fetchProfile } = require('./gamma');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('li[data-itemtype=scene]').toArray();
return scenesElements.map((element) => {
const sceneLinkElement = $(element).find('.sceneTitle a');
const url = `${site.url}${sceneLinkElement.attr('href')}`;
const title = sceneLinkElement.attr('title');
const entryId = $(element).attr('data-itemid');
const date = moment
.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY')
.toDate();
const actors = $(element).find('.sceneActors a')
.map((actorIndex, actorElement) => $(actorElement).attr('title'))
.toArray();
const [likes, dislikes] = $(element).find('.value')
.toArray()
.map(value => Number($(value).text()));
const poster = $(element).find('.imgLink img').attr('data-original');
const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`;
return {
url,
entryId,
title,
actors,
director: 'Mason',
date,
poster,
trailer: {
src: trailer,
quality: 224,
},
rating: {
likes,
dislikes,
},
site,
};
});
}
async function scrapeScene(html, url, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const json = $('script[type="application/ld+json"]').html();
const json2 = $('script:contains("dataLayer = ")').html();
const videoJson = $('script:contains("window.ScenePlayerOptions")').html();
const data = JSON.parse(json)[0];
const data2 = JSON.parse(json2.slice(json2.indexOf('[{'), -1))[0];
const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"id":'), videoJson.indexOf('};') + 1));
const entryId = data2.sceneDetails.sceneId || new URL(url).pathname.split('/').slice(-1)[0];
const title = data2.sceneDetails.sceneTitle || $('meta[name="twitter:title"]').attr('content');
const description = data2.sceneDetails.sceneDescription || data.description || $('meta[name="twitter:description"]').attr('content');
// date in data object is not the release date of the scene, but the date the entry was added
const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate();
const actors = (data2.sceneDetails.sceneActors || data.actor).map(actor => actor.actorName || actor.name);
const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5;
const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
const siteDomain = $('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
const siteUrl = siteDomain && `https://www.${siteDomain}`;
const poster = videoData.picPreview;
const trailer = `${videoData.playerOptions.host}${videoData.url}`;
const photos = await getPhotos($('.picturesItem a').attr('href'), siteDomain, site);
const tags = data.keywords.split(', ');
return {
url: `${siteUrl}/en/video/${new URL(url).pathname.split('/').slice(-2).join('/')}`,
entryId,
title,
date,
actors,
director: 'Mason',
description,
duration,
poster,
photos,
trailer: [
{
src: trailer.replace('hd', 'sm'),
quality: 240,
},
{
src: trailer.replace('hd', 'med'),
quality: 360,
},
{
src: trailer.replace('hd', 'big'),
quality: 480,
},
{
// probably 540p
src: trailer,
quality: parseInt(videoData.sizeOnLoad, 10),
},
{
src: trailer.replace('hd', '720p'),
quality: 720,
},
{
src: trailer.replace('hd', '1080p'),
quality: 1080,
},
{
src: trailer.replace('hd', '4k'),
quality: 2160,
},
],
tags,
rating: {
stars,
},
site,
channel: siteSlug,
};
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
return scrape(res.body.toString(), site);
}
async function fetchUpcoming(site) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`);
return scrape(res.body.toString(), site);
}
const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma');
async function fetchScene(url, site) {
const res = await bhttp.get(url);
return scrapeScene(res.body.toString(), url, site);
}
const release = await scrapeScene(res.body.toString(), url, site);
async function xEmpireFetchProfile(actorName) {
return fetchProfile(actorName, 'xempire');
const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
// const siteUrl = siteDomain && `https://www.${siteDomain}`;
release.channel = siteSlug;
return release;
}
module.exports = {
fetchLatest,
fetchProfile: xEmpireFetchProfile,
fetchProfile,
fetchUpcoming,
fetchScene,
};