diff --git a/assets/js/releases/actions.js b/assets/js/releases/actions.js index e26c2efe6..7c97decc7 100644 --- a/assets/js/releases/actions.js +++ b/assets/js/releases/actions.js @@ -4,8 +4,6 @@ import { curateRelease } from '../curate'; function initReleasesActions(store, _router) { async function fetchReleases({ _commit }, { limit = 100 }) { - console.log(store.state.ui.filter, store.getters.after, store.getters.before); - const { releases } = await graphql(` query Releases( $limit:Int = 1000, diff --git a/public/img/logos/wicked/favicon.png b/public/img/logos/wicked/favicon.png new file mode 100644 index 000000000..3abc516d6 Binary files /dev/null and b/public/img/logos/wicked/favicon.png differ diff --git a/public/img/logos/wicked/network.png b/public/img/logos/wicked/network.png new file mode 100644 index 000000000..7a4e481da Binary files /dev/null and b/public/img/logos/wicked/network.png differ diff --git a/public/img/logos/wicked/wicked.png b/public/img/logos/wicked/wicked.png new file mode 100644 index 000000000..eab5eb667 Binary files /dev/null and b/public/img/logos/wicked/wicked.png differ diff --git a/seeds/00_networks.js b/seeds/00_networks.js index 29237e463..6b23b8712 100644 --- a/seeds/00_networks.js +++ b/seeds/00_networks.js @@ -191,6 +191,12 @@ const networks = [ url: 'https://www.vogov.com', description: 'Fantastic collection of exclusive porn movies with the most beautiful porn models in leading roles saisfies the most picky visitor of the site.', }, + { + slug: 'wicked', + name: 'Wicked', + url: 'https://www.wicked.com', + description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos', + }, { slug: 'xempire', name: 'XEmpire', diff --git a/seeds/01_sites.js b/seeds/01_sites.js index c3dc1be57..9cbd3ae05 100644 --- a/seeds/01_sites.js +++ b/seeds/01_sites.js @@ -1217,7 +1217,7 @@ function getSites(networksMap) { { slug: 'evilangel', name: 'Evil Angel', - url: 'https://evilangel.com', + url: 'https://www.evilangel.com', description: 'Welcome to the award winning Evil Angel website, home to the most popular pornstars of today, yesterday and tomorrow in their most extreme and hardcore porn scenes to date. We feature almost 30 years of rough sex videos and hardcore anal porn like you\'ve never seen before, and have won countless AVN and XBiz awards including \'Best Site\' and \'Best Studio\'.', parameters: JSON.stringify({ independent: true }), network_id: networksMap.evilangel, @@ -3340,6 +3340,15 @@ function getSites(networksMap) { description: 'Top rated models. Graceful locations. Best gonzo scenes. 4K UHD 60 FPS. So, in general Vogov is a website that is worth visiting and exploring carefully. It gives a chance to spend a fantastic night with gorgeous girls ready to experiment and to full around with their lovers.', network_id: networksMap.vogov, }, + // WICKED + { + slug: 'wicked', + name: 'Wicked', + url: 'https://www.wicked.com', + description: 'Welcome to the new Wicked.com! Watch over 25 years of Wicked Pictures\' brand of award-winning porn for couples and women in 4k HD movies & xxx videos', + parameters: JSON.stringify({ independent: true }), + network_id: networksMap.wicked, + }, // XEMPIRE { slug: 'hardx', diff --git a/seeds/03_tags.js b/seeds/03_tags.js index 90731f895..46d1de245 100644 --- a/seeds/03_tags.js +++ b/seeds/03_tags.js @@ -589,6 +589,7 @@ function getTags(groupsMap) { name: 'MILF', slug: 'milf', alias_for: null, + priority: 7, group_id: groupsMap.age, }, { @@ -1578,6 +1579,10 @@ function getTagAliases(tagsMap) { name: 'trans', alias_for: tagsMap.transsexual, }, + { + name: 'transgender', + alias_for: tagsMap.transsexual, + }, { name: 'trimmed pussy', alias_for: tagsMap.trimmed, diff --git a/src/actors.js b/src/actors.js index 9d37e2121..4cc60e9b3 100644 --- a/src/actors.js +++ b/src/actors.js @@ -342,7 +342,7 @@ async function scrapeActors(actorNames) { const profiles = await Promise.map(sources, async ([scraperSlug, scraper]) => { try { - const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName); + const profile = await scraper.fetchProfile(actorEntry ? actorEntry.name : actorName, scraperSlug); return { ...profile, diff --git a/src/argv.js b/src/argv.js index b6981767c..ae6085431 100644 --- a/src/argv.js +++ b/src/argv.js @@ -35,6 +35,12 @@ const { argv } = yargs alias: 'with-scenes', default: false, }) + .option('with-profiles', { + describe: 'Scrape profiles for new actors after fetching scenes', + type: 'boolean', + alias: 'with-actors', + default: true, + }) .option('scene', { describe: 'Scrape scene info from URL', type: 'array', @@ -55,6 +61,16 @@ const { argv } = yargs type: 'boolean', default: true, }) + .option('latest', { + describe: 'Scrape latest releases if available', + type: 'boolean', + default: true, + }) + .option('upcoming', { + describe: 'Scrape upcoming releases if available', + type: 'boolean', + default: true, + }) .option('redownload', { describe: 'Don\'t ignore duplicates, update existing entries', type: 'boolean', diff --git a/src/releases.js b/src/releases.js index e465f89b5..2e66bc176 100644 --- a/src/releases.js +++ b/src/releases.js @@ -425,7 +425,9 @@ async function storeReleases(releases) { storeReleaseAssets(storedReleases), ]); - await scrapeBasicActors(); + if (argv.withProfiles) { + await scrapeBasicActors(); + } return { releases: storedReleases, diff --git a/src/scrape-releases.js b/src/scrape-releases.js index f8954d76e..78d022c55 100644 --- a/src/scrape-releases.js +++ b/src/scrape-releases.js @@ -65,8 +65,12 @@ async function scrapeRelease(source, basicRelease = null, type = 'scene') { : await scraper.fetchMovie(url, site, release); return { + url, ...scrapedRelease, ...release, + ...(scrapedRelease && release?.tags && { + tags: release.tags.concat(scrapedRelease.tags), + }), site, }; } diff --git a/src/scrape-sites.js b/src/scrape-sites.js index 512a8035b..2f181d684 100644 --- a/src/scrape-sites.js +++ b/src/scrape-sites.js @@ -30,6 +30,10 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) { } async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) { + if (!argv.latest || !scraper.fetchLatest) { + return []; + } + const latestReleases = await scraper.fetchLatest(site, page); if (latestReleases.length === 0) { @@ -58,7 +62,7 @@ async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), a } async function scrapeUpcomingReleases(scraper, site) { - if (scraper.fetchUpcoming) { + if (argv.upcoming && scraper.fetchUpcoming) { const upcomingReleases = await scraper.fetchUpcoming(site); return upcomingReleases.map(release => ({ ...release, upcoming: true })); @@ -100,7 +104,9 @@ async function scrapeSiteReleases(scraper, site) { scrapeUpcomingReleases(scraper, site), // fetch basic release info from upcoming overview ]); - logger.info(`${site.name}: Found ${newReleases.length} recent releases, ${upcomingReleases.length} upcoming releases`); + if (argv.upcoming) { + logger.info(`${site.name}: ${argv.latest ? 'Found' : 'Ignoring'} ${newReleases.length || ''}latest releases, ${argv.upcoming ? '' : 'ignoring '}${upcomingReleases.length || ''} upcoming releases`); + } const baseReleases = [...newReleases, ...upcomingReleases]; diff --git a/src/scrapers/evilangel.js b/src/scrapers/evilangel.js index e58794b36..c31fa6e80 100644 --- a/src/scrapers/evilangel.js +++ b/src/scrapers/evilangel.js @@ -1,235 +1,10 @@ 'use strict'; -const bhttp = require('bhttp'); -const cheerio = require('cheerio'); -const moment = require('moment'); - -const { getPhotos } = require('./gamma'); - -async function scrape(json, site) { - return Promise.all(json.map(async (scene) => { - const { - title, - description, - length, - master_categories: tags, - ratings_up: likes, - ratings_down: dislikes, - } = scene; - - const entryId = scene.clip_id; - const url = `https://evilangel.com/en/video/${scene.url_title}/${entryId}`; - const date = moment(scene.release_date, 'YYYY-MM-DD').toDate(); - const actors = scene.actors.map(({ name }) => name); - const director = scene.directors[0].name; - - const poster = `https://images-evilangel.gammacdn.com/movies${scene.pictures.resized}`; - const movie = `https://evilangel.com/en/movie/${scene.url_movie_title}/${scene.movie_id}`; - - return { - url, - entryId, - title, - description, - length, - actors, - director, - date, - tags, - poster, - rating: { - likes, - dislikes, - }, - movie, - site, - }; - })); -} - -async function scrapeScene(html, url, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const json = $('script[type="application/ld+json"]').html(); - const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); - - const [data, data2] = JSON.parse(json); - const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); - const entryId = new URL(url).pathname.split('/').slice(-1)[0]; - - const { - name: title, - description, - } = data; - // date in data object is not the release date of the scene, but the date the entry was added - const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate(); - - const actors = data.actor.map(actor => actor.name); - const hasTrans = data.actor.some(actor => actor.gender === 'shemale'); - - const director = (data.director && data.director[0].name) || (data2.director && data2.director[0].name) || null; - const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; - - const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); - - const rawTags = data.keywords.split(', '); - const tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; - - const poster = videoData.picPreview; - const trailer = `${videoData.playerOptions.host}${videoData.url}`; - - const photos = await getPhotos($('.picturesItem a').attr('href'), 'evilangel.com', site); - - return { - url, - entryId, - title, - date, - actors, - director, - description, - duration, - tags, - poster, - photos, - trailer: { - src: trailer, - quality: parseInt(videoData.sizeOnLoad, 10), - }, - rating: { - stars, - }, - site, - }; -} - -function scrapeActor(data, releases) { - const actor = {}; - - if (data.male === 1) actor.gender = 'male'; - if (data.female === 1) actor.gender = 'female'; - if (data.shemale === 1 || data.trans === 1) actor.gender = 'transsexual'; - - if (data.description) actor.description = data.description.trim(); - - if (data.attributes.ethnicity) actor.ethnicity = data.attributes.ethnicity; - if (data.attributes.eye_color) actor.eyes = data.attributes.eye_color; - if (data.attributes.hair_color) actor.hair = data.attributes.hair_color; - - const avatarPath = Object.values(data.pictures).reverse()[0]; - actor.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`; - - actor.releases = releases.map(release => `https://evilangel.com/en/video/${release.url_title}/${release.clip_id}`); - - return actor; -} - -async function fetchApiCredentials() { - const res = await bhttp.get('https://evilangel.com/en/videos'); - const body = res.body.toString(); - - const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); - const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); - const apiData = JSON.parse(apiSerial); - - const { applicationID: appId, apiKey } = apiData.api.algolia; - const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; - - const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; - - return { - appId, - apiKey, - userAgent, - apiUrl, - }; -} - -async function fetchLatest(site, page = 1, upcoming = false) { - const { apiUrl } = await fetchApiCredentials(); - - const res = await bhttp.post(apiUrl, { - requests: [ - { - indexName: 'all_scenes', - params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`, - }, - ], - }, { - headers: { - Referer: 'https://www.evilangel.com/en/videos', - }, - encodeJSON: true, - }); - - return scrape(res.body.results[0].hits, site); -} - -async function fetchUpcoming(site) { - return fetchLatest(site, 1, true); -} - -async function fetchScene(url, site) { - const res = await bhttp.get(url); - - return scrapeScene(res.body.toString(), url, site); -} - -async function fetchActorScenes(actorName, apiUrl) { - const res = await bhttp.post(apiUrl, { - requests: [ - { - indexName: 'all_scenes', - params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, - }, - ], - }, { - headers: { - Referer: 'https://www.evilangel.com/en/videos', - }, - encodeJSON: true, - }); - - if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { - return res.body.results[0].hits; - } - - return []; -} - -async function fetchProfile(actorName) { - const { apiUrl } = await fetchApiCredentials(); - const actorSlug = encodeURI(actorName); - - const res = await bhttp.post(apiUrl, { - requests: [ - { - indexName: 'all_actors', - params: `query=${actorSlug}`, - }, - ], - }, { - headers: { - Referer: `https://www.evilangel.com/en/search?query=${actorSlug}&tab=actors`, - }, - encodeJSON: true, - }); - - if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { - const actorData = res.body.results[0].hits.find(actor => actor.name === actorName); - - if (actorData) { - const actorScenes = await fetchActorScenes(actorName, apiUrl); - - return scrapeActor(actorData, actorScenes); - } - } - - return null; -} +const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma'); module.exports = { - fetchLatest, - fetchProfile, + fetchLatest: fetchApiLatest, + fetchProfile: fetchApiProfile, fetchScene, - fetchUpcoming, + fetchUpcoming: fetchApiUpcoming, }; diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index f1de3b113..33243b3c5 100644 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -4,6 +4,7 @@ const Promise = require('bluebird'); const bhttp = require('bhttp'); const { JSDOM } = require('jsdom'); const cheerio = require('cheerio'); +const moment = require('moment'); async function fetchPhotos(url) { const res = await bhttp.get(url); @@ -39,33 +40,191 @@ function scrapePhotos(html) { }); } -async function getPhotos(albumPath, siteDomain) { - const albumUrl = `https://${siteDomain}${albumPath}`; +async function getPhotos(albumPath, site) { + const albumUrl = `${site.url}${albumPath}`; try { const html = await fetchPhotos(albumUrl); const $ = cheerio.load(html, { normalizeWhitespace: true }); const photos = scrapePhotos(html); - const pages = $('.paginatorPages a').map((pageIndex, pageElement) => $(pageElement).attr('href')).toArray(); + const lastPage = $('.Gamma_Paginator a.last').attr('href')?.match(/\d+$/)[0]; - const otherPhotos = await Promise.map(pages, async (page) => { - const pageUrl = `https://${siteDomain}${page}`; - const pageHtml = await fetchPhotos(pageUrl); + if (lastPage) { + const otherPages = Array.from({ length: Number(lastPage) }, (_value, index) => index + 1).slice(1); - return scrapePhotos(pageHtml); - }, { - concurrency: 2, - }); + const otherPhotos = await Promise.map(otherPages, async (page) => { + const pageUrl = `${site.url}/${albumPath}/${page}`; + const pageHtml = await fetchPhotos(pageUrl); - return photos.concat(otherPhotos.flat()); + return scrapePhotos(pageHtml); + }, { + concurrency: 2, + }); + + return photos.concat(otherPhotos.flat()); + } + + return photos; } catch (error) { - console.error(`Failed to fetch ${siteDomain} photos from ${albumPath}: ${error.message}`); + console.error(`Failed to fetch ${site.name} photos from ${albumUrl}: ${error.message}`); return []; } } +async function scrapeApiReleases(json, site) { + return json.map((scene) => { + const release = { + entryId: scene.clip_id, + title: scene.title, + description: scene.description, + duration: scene.length, + likes: scene.ratings_up, + dislikes: scene.ratings_down, + }; + + release.url = `${site.url}/en/video/${scene.url_title}/${release.entryId}`; + release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate(); + release.actors = scene.actors.map(({ name }) => name); + release.director = scene.directors[0].name; + + release.tags = scene.master_categories.concat(scene.categories?.map(category => category.name)); + + const posterPath = scene.pictures.resized || (scene.pictures.nsfw?.top && Object.values(scene.pictures.nsfw.top)[0]); + + if (posterPath) { + release.poster = [ + `https://images-evilangel.gammacdn.com/movies${posterPath}`, + `https://transform.gammacdn.com/movies${posterPath}`, + ]; + } + + release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`; + + return release; + }); +} + +function scrapeAll(html, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const scenesElements = $('li[data-itemtype=scene]').toArray(); + + return scenesElements.map((element) => { + const sceneLinkElement = $(element).find('.sceneTitle a'); + + const url = `${site.url}${sceneLinkElement.attr('href')}`; + const title = sceneLinkElement.attr('title'); + + const entryId = $(element).attr('data-itemid'); + + const date = moment + .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') + .toDate(); + + const actors = $(element).find('.sceneActors a') + .map((actorIndex, actorElement) => $(actorElement).attr('title')) + .toArray(); + + const [likes, dislikes] = $(element).find('.value') + .toArray() + .map(value => Number($(value).text())); + + const poster = $(element).find('.imgLink img').attr('data-original'); + const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`; + + return { + url, + entryId, + title, + actors, + director: 'Mason', + date, + poster, + trailer: { + src: trailer, + quality: 224, + }, + rating: { + likes, + dislikes, + }, + site, + }; + }); +} + +async function scrapeScene(html, url, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const release = { $ }; + + const json = $('script[type="application/ld+json"]').html(); + const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); + + const [data, data2] = JSON.parse(json); + const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{'), videoJson.indexOf('};') + 1)); + + [release.entryId] = new URL(url).pathname.split('/').slice(-1); + + release.title = data.name; + release.description = data.description; + + // date in data object is not the release date of the scene, but the date the entry was added + const dateString = $('.updatedDate').first().text().trim(); + const dateMatch = dateString.match(/\d{2,4}-\d{2}-\d{2,4}/)?.[0]; + release.date = moment.utc(dateMatch, ['MM-DD-YYYY', 'YYYY-MM-DD']).toDate(); + + release.director = data.director?.[0].name || data2?.director?.[0].name; + release.actors = data.actor.map(actor => actor.name); + const hasTrans = data.actor.some(actor => actor.gender === 'shemale'); + + const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; + if (stars) release.rating = { stars }; + + release.duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); + + const rawTags = data.keywords?.split(', '); + release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags; + + release.poster = videoData.picPreview; + release.photos = await getPhotos($('.picturesItem a').attr('href'), site); + + const trailer = `${videoData.playerOptions.host}${videoData.url}`; + release.trailer = [ + { + src: trailer.replace('hd', 'sm'), + quality: 240, + }, + { + src: trailer.replace('hd', 'med'), + quality: 360, + }, + { + src: trailer.replace('hd', 'big'), + quality: 480, + }, + { + // probably 540p + src: trailer, + quality: parseInt(videoData.sizeOnLoad, 10), + }, + { + src: trailer.replace('hd', '720p'), + quality: 720, + }, + { + src: trailer.replace('hd', '1080p'), + quality: 1080, + }, + { + src: trailer.replace('hd', '4k'), + quality: 2160, + }, + ]; + + return release; +} + function scrapeActorSearch(html, url, actorName) { const { document } = new JSDOM(html).window; const actorLink = document.querySelector(`a[title="${actorName}" i]`); @@ -112,6 +271,113 @@ function scrapeProfile(html, url, actorName, siteSlug) { return profile; } +function scrapeApiProfile(data, releases, siteSlug) { + const profile = {}; + + if (data.male === 1) profile.gender = 'male'; + if (data.female === 1) profile.gender = 'female'; + if (data.shemale === 1 || data.trans === 1) profile.gender = 'transsexual'; + + if (data.description) profile.description = data.description.trim(); + + if (data.attributes.ethnicity) profile.ethnicity = data.attributes.ethnicity; + if (data.attributes.eye_color) profile.eyes = data.attributes.eye_color; + if (data.attributes.hair_color) profile.hair = data.attributes.hair_color; + + const avatarPath = Object.values(data.pictures).reverse()[0]; + if (avatarPath) profile.avatar = `https://images01-evilangel.gammacdn.com/actors${avatarPath}`; + + profile.releases = releases.map(release => `https://${siteSlug}.com/en/video/${release.url_title}/${release.clip_id}`); + + return profile; +} + +async function fetchApiCredentials(referer) { + const res = await bhttp.get(referer); + const body = res.body.toString(); + + const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); + const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); + const apiData = JSON.parse(apiSerial); + + const { applicationID: appId, apiKey } = apiData.api.algolia; + const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; + + const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; + + return { + appId, + apiKey, + userAgent, + apiUrl, + }; +} + +async function fetchApiLatest(site, page = 1, upcoming = false) { + const referer = `${site.url}/en/videos`; + const { apiUrl } = await fetchApiCredentials(referer); + + const res = await bhttp.post(apiUrl, { + requests: [ + { + indexName: 'all_scenes', + params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`, + }, + ], + }, { + headers: { + Referer: referer, + }, + encodeJSON: true, + }); + + return scrapeApiReleases(res.body.results[0].hits, site); +} + +async function fetchApiUpcoming(site) { + return fetchApiLatest(site, 1, true); +} + +async function fetchLatest(site, page = 1) { + const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`); + + return scrapeAll(res.body.toString(), site); +} + +async function fetchUpcoming(site) { + const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`); + + return scrapeAll(res.body.toString(), site); +} + +async function fetchScene(url, site) { + const res = await bhttp.get(url); + + return scrapeScene(res.body.toString(), url, site); +} + +async function fetchActorScenes(actorName, apiUrl, siteSlug) { + const res = await bhttp.post(apiUrl, { + requests: [ + { + indexName: 'all_scenes', + params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=0&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["actors.name:${actorName}"]]`, + }, + ], + }, { + headers: { + Referer: `https://www.${siteSlug}.com/en/videos`, + }, + encodeJSON: true, + }); + + if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { + return res.body.results[0].hits; + } + + return []; +} + async function fetchProfile(actorName, siteSlug, altSearchUrl) { const actorSlug = actorName.toLowerCase().replace(/\s+/, '+'); const searchUrl = altSearchUrl @@ -139,38 +405,17 @@ async function fetchProfile(actorName, siteSlug, altSearchUrl) { return null; } -async function fetchApiCredentials(referer) { - const res = await bhttp.get(referer); - const body = res.body.toString(); +async function fetchApiProfile(actorName, siteSlug) { + const actorSlug = encodeURI(actorName); + const referer = `https://www.${siteSlug}.com/en/search?query=${actorSlug}&tab=actors`; - const apiLine = body.split('\n').find(bodyLine => bodyLine.match('apiKey')); - const apiSerial = apiLine.slice(apiLine.indexOf('{'), apiLine.indexOf('};') + 1); - const apiData = JSON.parse(apiSerial); - - const { applicationID: appId, apiKey } = apiData.api.algolia; - const userAgent = 'Algolia for vanilla JavaScript (lite) 3.27.0;instantsearch.js 2.7.4;JS Helper 2.26.0'; - - const apiUrl = `https://${appId.toLowerCase()}-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=${userAgent}&x-algolia-application-id=${appId}&x-algolia-api-key=${apiKey}`; - - return { - appId, - apiKey, - userAgent, - apiUrl, - }; -} - -async function fetchLatest(site, page = 1, upcoming = false) { - const referer = `${site.url}/en/videos`; const { apiUrl } = await fetchApiCredentials(referer); - console.log(referer); - const res = await bhttp.post(apiUrl, { requests: [ { - indexName: 'all_scenes', - params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]`, + indexName: 'all_actors', + params: `query=${actorSlug}`, }, ], }, { @@ -180,14 +425,31 @@ async function fetchLatest(site, page = 1, upcoming = false) { encodeJSON: true, }); - console.log(res.body.results); + if (res.statusCode === 200 && res.body.results[0].hits.length > 0) { + const actorData = res.body.results[0].hits.find(actor => actor.name === actorName); - // return scrape(res.body.results[0].hits, site); + if (actorData) { + const actorScenes = await fetchActorScenes(actorName, apiUrl, siteSlug); + + return scrapeApiProfile(actorData, actorScenes, siteSlug); + } + } + + return null; } module.exports = { - getPhotos, - fetchProfile, - scrapeProfile, + fetchApiLatest, + fetchApiProfile, + fetchApiUpcoming, fetchLatest, + fetchProfile, + fetchScene, + fetchUpcoming, + getPhotos, + scrapeApiProfile, + scrapeApiReleases, + scrapeProfile, + scrapeAll, + scrapeScene, }; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 024ccccb4..ecee79667 100644 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -36,6 +36,7 @@ const mofos = require('./mofos'); const naughtyamerica = require('./naughtyamerica'); const twentyonesextury = require('./21sextury'); const xempire = require('./xempire'); +const wicked = require('./wicked'); // profiles const boobpedia = require('./boobpedia'); @@ -80,12 +81,14 @@ module.exports = { teamskeet, vixen, vogov, + wicked, xempire, }, actors: { // ordered by data priority '21sextury': twentyonesextury, evilangel, + wicked, mofos, realitykings, digitalplayground, diff --git a/src/scrapers/wicked.js b/src/scrapers/wicked.js new file mode 100644 index 000000000..c31fa6e80 --- /dev/null +++ b/src/scrapers/wicked.js @@ -0,0 +1,10 @@ +'use strict'; + +const { fetchApiLatest, fetchApiUpcoming, fetchScene, fetchApiProfile } = require('./gamma'); + +module.exports = { + fetchLatest: fetchApiLatest, + fetchProfile: fetchApiProfile, + fetchScene, + fetchUpcoming: fetchApiUpcoming, +}; diff --git a/src/scrapers/xempire.js b/src/scrapers/xempire.js index 94bf56901..355dcea8d 100644 --- a/src/scrapers/xempire.js +++ b/src/scrapers/xempire.js @@ -1,168 +1,26 @@ 'use strict'; const bhttp = require('bhttp'); -const cheerio = require('cheerio'); -const moment = require('moment'); -const { getPhotos, fetchProfile } = require('./gamma'); - -function scrape(html, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const scenesElements = $('li[data-itemtype=scene]').toArray(); - - return scenesElements.map((element) => { - const sceneLinkElement = $(element).find('.sceneTitle a'); - - const url = `${site.url}${sceneLinkElement.attr('href')}`; - const title = sceneLinkElement.attr('title'); - - const entryId = $(element).attr('data-itemid'); - - const date = moment - .utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY') - .toDate(); - - const actors = $(element).find('.sceneActors a') - .map((actorIndex, actorElement) => $(actorElement).attr('title')) - .toArray(); - - const [likes, dislikes] = $(element).find('.value') - .toArray() - .map(value => Number($(value).text())); - - const poster = $(element).find('.imgLink img').attr('data-original'); - const trailer = `https://videothumb.gammacdn.com/307x224/${entryId}.mp4`; - - return { - url, - entryId, - title, - actors, - director: 'Mason', - date, - poster, - trailer: { - src: trailer, - quality: 224, - }, - rating: { - likes, - dislikes, - }, - site, - }; - }); -} - -async function scrapeScene(html, url, site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const json = $('script[type="application/ld+json"]').html(); - const json2 = $('script:contains("dataLayer = ")').html(); - const videoJson = $('script:contains("window.ScenePlayerOptions")').html(); - - const data = JSON.parse(json)[0]; - const data2 = JSON.parse(json2.slice(json2.indexOf('[{'), -1))[0]; - const videoData = JSON.parse(videoJson.slice(videoJson.indexOf('{"id":'), videoJson.indexOf('};') + 1)); - - const entryId = data2.sceneDetails.sceneId || new URL(url).pathname.split('/').slice(-1)[0]; - - const title = data2.sceneDetails.sceneTitle || $('meta[name="twitter:title"]').attr('content'); - const description = data2.sceneDetails.sceneDescription || data.description || $('meta[name="twitter:description"]').attr('content'); - // date in data object is not the release date of the scene, but the date the entry was added - const date = moment.utc($('.updatedDate').first().text(), 'MM-DD-YYYY').toDate(); - - const actors = (data2.sceneDetails.sceneActors || data.actor).map(actor => actor.actorName || actor.name); - const stars = (data.aggregateRating.ratingValue / data.aggregateRating.bestRating) * 5; - - const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds(); - - const siteDomain = $('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available - const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase(); - const siteUrl = siteDomain && `https://www.${siteDomain}`; - - const poster = videoData.picPreview; - const trailer = `${videoData.playerOptions.host}${videoData.url}`; - - const photos = await getPhotos($('.picturesItem a').attr('href'), siteDomain, site); - - const tags = data.keywords.split(', '); - - return { - url: `${siteUrl}/en/video/${new URL(url).pathname.split('/').slice(-2).join('/')}`, - entryId, - title, - date, - actors, - director: 'Mason', - description, - duration, - poster, - photos, - trailer: [ - { - src: trailer.replace('hd', 'sm'), - quality: 240, - }, - { - src: trailer.replace('hd', 'med'), - quality: 360, - }, - { - src: trailer.replace('hd', 'big'), - quality: 480, - }, - { - // probably 540p - src: trailer, - quality: parseInt(videoData.sizeOnLoad, 10), - }, - { - src: trailer.replace('hd', '720p'), - quality: 720, - }, - { - src: trailer.replace('hd', '1080p'), - quality: 1080, - }, - { - src: trailer.replace('hd', '4k'), - quality: 2160, - }, - ], - tags, - rating: { - stars, - }, - site, - channel: siteSlug, - }; -} - -async function fetchLatest(site, page = 1) { - const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`); - - return scrape(res.body.toString(), site); -} - -async function fetchUpcoming(site) { - const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/1/upcoming`); - - return scrape(res.body.toString(), site); -} +const { fetchLatest, fetchUpcoming, scrapeScene, fetchProfile } = require('./gamma'); async function fetchScene(url, site) { const res = await bhttp.get(url); - return scrapeScene(res.body.toString(), url, site); -} + const release = await scrapeScene(res.body.toString(), url, site); -async function xEmpireFetchProfile(actorName) { - return fetchProfile(actorName, 'xempire'); + const siteDomain = release.$('meta[name="twitter:domain"]').attr('content') || 'allblackx.com'; // only AllBlackX has no twitter domain, no other useful hints available + const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase(); + // const siteUrl = siteDomain && `https://www.${siteDomain}`; + + release.channel = siteSlug; + + return release; } module.exports = { fetchLatest, - fetchProfile: xEmpireFetchProfile, + fetchProfile, fetchUpcoming, fetchScene, };