Added Private Black, improved Private scraper to handle their non-private.com sites.
| After Width: | Height: | Size: 45 KiB | 
| After Width: | Height: | Size: 46 KiB | 
| After Width: | Height: | Size: 13 KiB | 
| After Width: | Height: | Size: 31 KiB | 
| After Width: | Height: | Size: 3.4 KiB | 
| After Width: | Height: | Size: 4.3 KiB | 
| After Width: | Height: | Size: 19 KiB | 
| After Width: | Height: | Size: 9.1 KiB | 
| After Width: | Height: | Size: 32 KiB | 
| After Width: | Height: | Size: 39 KiB | 
| After Width: | Height: | Size: 3.7 KiB | 
| After Width: | Height: | Size: 30 KiB | 
| After Width: | Height: | Size: 26 KiB | 
| After Width: | Height: | Size: 40 KiB | 
| After Width: | Height: | Size: 30 KiB | 
| After Width: | Height: | Size: 37 KiB | 
|  | @ -1759,7 +1759,6 @@ function getSites(networksMap) { | ||||||
|             network_id: networksMap['pervcity'], |             network_id: networksMap['pervcity'], | ||||||
|             parameters: JSON.stringify({ tourId: 9 }), |             parameters: JSON.stringify({ tourId: 9 }), | ||||||
|         }, |         }, | ||||||
|         // PORN PROS
 |  | ||||||
|         // PRIVATE
 |         // PRIVATE
 | ||||||
|         { |         { | ||||||
|             slug: 'analintroductions', |             slug: 'analintroductions', | ||||||
|  | @ -1810,6 +1809,13 @@ function getSites(networksMap) { | ||||||
|             url: 'https://www.private.com/site/blacks-on-sluts', |             url: 'https://www.private.com/site/blacks-on-sluts', | ||||||
|             network_id: networksMap['private'], |             network_id: networksMap['private'], | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             slug: 'privateblack', | ||||||
|  |             name: 'Private Black', | ||||||
|  |             description: 'Private Black is number 1 for European Interracial Porn with exclusive interracial content in HD and Ultra 4K featuring the freshest young faces from Europe and the most popular European porn stars.', | ||||||
|  |             url: 'https://www.privateblack.com', | ||||||
|  |             network_id: networksMap['private'], | ||||||
|  |         }, | ||||||
|         { |         { | ||||||
|             slug: 'privatefetish', |             slug: 'privatefetish', | ||||||
|             name: 'Private Fetish', |             name: 'Private Fetish', | ||||||
|  |  | ||||||
|  | @ -867,6 +867,10 @@ function getTagAliases(tagsMap) { | ||||||
|             name: 'ball gag', |             name: 'ball gag', | ||||||
|             alias_for: tagsMap['gag'], |             alias_for: tagsMap['gag'], | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             name: 'boob fucking', | ||||||
|  |             alias_for: tagsMap['titty-fuck'], | ||||||
|  |         }, | ||||||
|         { |         { | ||||||
|             name: 'mfm', |             name: 'mfm', | ||||||
|             alias_for: tagsMap['mfm'], |             alias_for: tagsMap['mfm'], | ||||||
|  |  | ||||||
|  | @ -128,7 +128,7 @@ async function storeRelease(release) { | ||||||
|                 ? storePhotos(release, releaseEntry) : Promise.resolve(), |                 ? storePhotos(release, releaseEntry) : Promise.resolve(), | ||||||
|             release.poster |             release.poster | ||||||
|                 ? storePoster(release, releaseEntry) : Promise.resolve(), |                 ? storePoster(release, releaseEntry) : Promise.resolve(), | ||||||
|             release.trailer |             release.trailer && release.trailer.src | ||||||
|                 ? storeTrailer(release, releaseEntry) : Promise.resolve(), |                 ? storeTrailer(release, releaseEntry) : Promise.resolve(), | ||||||
|         ]); |         ]); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -75,7 +75,7 @@ async function scrapeScene(html, url, site) { | ||||||
|     const siteElement = $('.niche-site-logo'); |     const siteElement = $('.niche-site-logo'); | ||||||
|     const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; |     const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; | ||||||
|     const siteName = siteElement.attr('title'); |     const siteName = siteElement.attr('title'); | ||||||
|     const siteSlug = siteName.replace(/\s+/g, ''); |     const siteSlug = siteName.replace(/\s+/g, '').toLowerCase(); | ||||||
| 
 | 
 | ||||||
|     const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); |     const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||||
| 
 | 
 | ||||||
|  | @ -85,7 +85,13 @@ async function scrapeScene(html, url, site) { | ||||||
| 
 | 
 | ||||||
|     const [tags, channelSite] = await Promise.all([ |     const [tags, channelSite] = await Promise.all([ | ||||||
|         matchTags(rawTags), |         matchTags(rawTags), | ||||||
|         site.isFallback ? fetchSites(null, siteSlug, siteName, siteUrl) : site, |         site.isFallback | ||||||
|  |             ? [fetchSites({ | ||||||
|  |                 slug: siteSlug, | ||||||
|  |                 name: siteName, | ||||||
|  |                 url: siteUrl, | ||||||
|  |             })] | ||||||
|  |             : site, | ||||||
|     ]); |     ]); | ||||||
| 
 | 
 | ||||||
|     return { |     return { | ||||||
|  |  | ||||||
|  | @ -5,11 +5,13 @@ const bhttp = require('bhttp'); | ||||||
| const cheerio = require('cheerio'); | const cheerio = require('cheerio'); | ||||||
| const moment = require('moment'); | const moment = require('moment'); | ||||||
| 
 | 
 | ||||||
| const knex = require('../knex'); | const fetchSites = require('../sites'); | ||||||
| const { matchTags } = require('../tags'); | const { matchTags } = require('../tags'); | ||||||
| 
 | 
 | ||||||
| async function getPhotos(shootId) { | async function getPhotos(shootId, site) { | ||||||
|     const res = await bhttp.get(`https://www.private.com/gallery.php?type=highres&id=${shootId}`); |     const { hostname } = new URL(site.url); | ||||||
|  | 
 | ||||||
|  |     const res = await bhttp.get(`https://${hostname}/gallery.php?type=highres&id=${shootId}`); | ||||||
|     const html = res.body.toString(); |     const html = res.body.toString(); | ||||||
| 
 | 
 | ||||||
|     const $ = cheerio.load(html, { normalizeWhitespace: true }); |     const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||||
|  | @ -18,28 +20,49 @@ async function getPhotos(shootId) { | ||||||
|     return photos; |     return photos; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | async function getChannelSite($, site) { | ||||||
|  |     const { hostname } = new URL(site.url); | ||||||
|  | 
 | ||||||
|  |     if (site.isFallback && hostname.match('private.com')) { | ||||||
|  |         const siteElement = $('.content-wrapper .logos-sites a'); | ||||||
|  |         const siteUrl = siteElement.attr('href').slice(0, -1); | ||||||
|  |         const siteName = siteElement.text(); | ||||||
|  |         const siteSlug = siteName.replace(/\s+/g, '').toLowerCase(); | ||||||
|  | 
 | ||||||
|  |         const channelSite = await fetchSites({ | ||||||
|  |             slug: siteSlug, | ||||||
|  |             name: siteName, | ||||||
|  |             url: siteUrl, | ||||||
|  |         }); | ||||||
|  | 
 | ||||||
|  |         return channelSite; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return site; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| function scrapeLatest(html, site) { | function scrapeLatest(html, site) { | ||||||
|     const $ = cheerio.load(html, { normalizeWhitespace: true }); |     const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||||
|     const sceneElements = $('.content-wrapper.container .scene').toArray(); |     const sceneElements = $('.content-wrapper .scene').toArray(); | ||||||
| 
 | 
 | ||||||
|     return sceneElements.map((element) => { |     return sceneElements.map((element) => { | ||||||
|         const sceneLinkElement = $(element).find('a[data-track="TITLE_LINK"]'); |         const sceneLinkElement = $(element).find('h3 a'); | ||||||
| 
 | 
 | ||||||
|         const url = sceneLinkElement.attr('href'); |         const url = sceneLinkElement.attr('href'); | ||||||
|         const title = sceneLinkElement.text(); |         const title = sceneLinkElement.text(); | ||||||
|         const shootId = url.split('/').slice(-1)[0]; |         const shootId = url.split('/').slice(-1)[0]; | ||||||
| 
 | 
 | ||||||
|         const thumbnailElement = $(element).find('img.thumbs_onhover'); |         const date = moment.utc($(element).find('.scene-date'), 'MM/DD/YYYY').toDate(); | ||||||
|  | 
 | ||||||
|  |         const actors = $(element).find('.scene-models a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); | ||||||
|  |         const likes = Number($(element).find('.scene-votes').text()); | ||||||
|  | 
 | ||||||
|  |         const thumbnailElement = $(element).find('img.img-responsive'); | ||||||
|         const photoCount = Number(thumbnailElement.attr('thumbs_num')); |         const photoCount = Number(thumbnailElement.attr('thumbs_num')); | ||||||
|         const poster = thumbnailElement.attr('src'); |         const poster = thumbnailElement.attr('src'); | ||||||
|         const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`)); |         const photos = Array.from({ length: photoCount }, (val, index) => thumbnailElement.attr(`src${index + 1}`)); | ||||||
| 
 | 
 | ||||||
|         const date = moment.utc($(element).find('.scene-date'), 'MM/DD/YYYY').toDate(); |         const scene = { | ||||||
| 
 |  | ||||||
|         const actors = $(element).find('a[data-track="PORNSTAR_LINK"]').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); |  | ||||||
|         const likes = Number($(element).find('.scene-votes').text()); |  | ||||||
| 
 |  | ||||||
|         return { |  | ||||||
|             url, |             url, | ||||||
|             shootId, |             shootId, | ||||||
|             title, |             title, | ||||||
|  | @ -52,6 +75,8 @@ function scrapeLatest(html, site) { | ||||||
|             }, |             }, | ||||||
|             site, |             site, | ||||||
|         }; |         }; | ||||||
|  | 
 | ||||||
|  |         return scene; | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -70,25 +95,18 @@ async function scrapeScene(html, url, site) { | ||||||
| 
 | 
 | ||||||
|     const poster = $('meta[property="og:image"]').attr('content'); |     const poster = $('meta[property="og:image"]').attr('content'); | ||||||
|     const trailer = $('meta[property="og:video"]').attr('content'); |     const trailer = $('meta[property="og:video"]').attr('content'); | ||||||
|     const photos = await getPhotos(shootId); |  | ||||||
| 
 | 
 | ||||||
|     const likes = Number($('.content-desc #social-actions #likes').text()); |     const likes = Number($('.content-desc #social-actions #likes').text()); | ||||||
| 
 | 
 | ||||||
|     const siteElement = $('.content-wrapper .logos-sites a'); |  | ||||||
|     const siteUrl = siteElement.attr('href').slice(0, -1); |  | ||||||
|     const siteName = siteElement.text(); |  | ||||||
| 
 |  | ||||||
|     const rawTags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); |     const rawTags = $('.content-desc .scene-tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||||
| 
 | 
 | ||||||
|     const [tags, channelSite] = await Promise.all([ |     const [tags, photos, channelSite] = await Promise.all([ | ||||||
|         matchTags(rawTags), |         matchTags(rawTags), | ||||||
|         knex('sites') |         getPhotos(shootId, site), | ||||||
|             .where({ url: siteUrl }) |         getChannelSite($, site), | ||||||
|             .orWhere({ name: siteName }) |  | ||||||
|             .first(), |  | ||||||
|     ]); |     ]); | ||||||
| 
 | 
 | ||||||
|     return { |     const scene = { | ||||||
|         url, |         url, | ||||||
|         shootId, |         shootId, | ||||||
|         title, |         title, | ||||||
|  | @ -107,14 +125,24 @@ async function scrapeScene(html, url, site) { | ||||||
|         }, |         }, | ||||||
|         site: channelSite || site, |         site: channelSite || site, | ||||||
|     }; |     }; | ||||||
|  | 
 | ||||||
|  |     return scene; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function fetchLatest(site, page = 1) { | async function fetchLatest(site, page = 1) { | ||||||
|  |     const { hostname } = new URL(site.url); | ||||||
|  | 
 | ||||||
|  |     if (hostname.match('private.com')) { | ||||||
|         const res = await bhttp.get(`${site.url}/${page}/`); |         const res = await bhttp.get(`${site.url}/${page}/`); | ||||||
| 
 | 
 | ||||||
|         return scrapeLatest(res.body.toString(), site); |         return scrapeLatest(res.body.toString(), site); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     const res = await bhttp.get(`${site.url}/scenes/${page}/`); | ||||||
|  | 
 | ||||||
|  |     return scrapeLatest(res.body.toString(), site); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| async function fetchScene(url, site) { | async function fetchScene(url, site) { | ||||||
|     const res = await bhttp.get(url); |     const res = await bhttp.get(url); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||