Updated Naughty America scraper, added media support..
|  | @ -184,7 +184,6 @@ exports.up = knex => Promise.resolve() | ||||||
| 
 | 
 | ||||||
|         table.string('shoot_id'); |         table.string('shoot_id'); | ||||||
|         table.string('entry_id'); |         table.string('entry_id'); | ||||||
|         table.unique(['site_id', 'shoot_id']); |  | ||||||
|         table.unique(['site_id', 'entry_id']); |         table.unique(['site_id', 'entry_id']); | ||||||
| 
 | 
 | ||||||
|         table.string('url', 1000); |         table.string('url', 1000); | ||||||
|  |  | ||||||
| After Width: | Height: | Size: 79 KiB | 
| After Width: | Height: | Size: 58 KiB | 
| After Width: | Height: | Size: 31 KiB | 
| After Width: | Height: | Size: 9.6 KiB | 
| After Width: | Height: | Size: 21 KiB | 
| After Width: | Height: | Size: 14 KiB | 
| After Width: | Height: | Size: 12 KiB | 
| After Width: | Height: | Size: 15 KiB | 
| After Width: | Height: | Size: 48 KiB | 
| After Width: | Height: | Size: 19 KiB | 
| After Width: | Height: | Size: 11 KiB | 
| After Width: | Height: | Size: 9.3 KiB | 
| After Width: | Height: | Size: 76 KiB | 
| After Width: | Height: | Size: 19 KiB | 
| After Width: | Height: | Size: 17 KiB | 
| After Width: | Height: | Size: 20 KiB | 
| After Width: | Height: | Size: 64 KiB | 
| After Width: | Height: | Size: 17 KiB | 
| After Width: | Height: | Size: 37 KiB | 
| After Width: | Height: | Size: 48 KiB | 
| After Width: | Height: | Size: 38 KiB | 
| After Width: | Height: | Size: 54 KiB | 
| After Width: | Height: | Size: 50 KiB | 
| After Width: | Height: | Size: 57 KiB | 
| After Width: | Height: | Size: 21 KiB | 
| After Width: | Height: | Size: 29 KiB | 
| After Width: | Height: | Size: 12 KiB | 
| After Width: | Height: | Size: 15 KiB | 
| After Width: | Height: | Size: 36 KiB | 
| After Width: | Height: | Size: 25 KiB | 
| After Width: | Height: | Size: 33 KiB | 
| After Width: | Height: | Size: 15 KiB | 
| After Width: | Height: | Size: 11 KiB | 
| After Width: | Height: | Size: 30 KiB | 
| After Width: | Height: | Size: 42 KiB | 
| After Width: | Height: | Size: 28 KiB | 
| After Width: | Height: | Size: 5.5 KiB | 
| After Width: | Height: | Size: 77 KiB | 
| After Width: | Height: | Size: 20 KiB | 
| After Width: | Height: | Size: 25 KiB | 
| After Width: | Height: | Size: 7.7 KiB | 
| After Width: | Height: | Size: 36 KiB | 
| After Width: | Height: | Size: 73 KiB | 
| After Width: | Height: | Size: 38 KiB | 
| After Width: | Height: | Size: 14 KiB | 
| After Width: | Height: | Size: 56 KiB | 
| After Width: | Height: | Size: 31 KiB | 
| After Width: | Height: | Size: 66 KiB | 
| Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 25 KiB | 
| After Width: | Height: | Size: 17 KiB | 
| After Width: | Height: | Size: 29 KiB | 
| After Width: | Height: | Size: 102 KiB | 
| After Width: | Height: | Size: 18 KiB | 
| After Width: | Height: | Size: 15 KiB | 
| After Width: | Height: | Size: 16 KiB | 
| After Width: | Height: | Size: 16 KiB | 
| After Width: | Height: | Size: 55 KiB | 
| After Width: | Height: | Size: 14 KiB | 
| After Width: | Height: | Size: 16 KiB | 
| After Width: | Height: | Size: 12 KiB | 
| After Width: | Height: | Size: 14 KiB | 
| After Width: | Height: | Size: 15 KiB | 
| After Width: | Height: | Size: 12 KiB | 
| After Width: | Height: | Size: 22 KiB | 
|  | @ -223,12 +223,16 @@ async function fetchTagReleases(queryObject, options = {}) { | ||||||
| async function storeReleaseAssets(release, releaseId) { | async function storeReleaseAssets(release, releaseId) { | ||||||
|     await createReleaseMediaDirectory(release, releaseId); |     await createReleaseMediaDirectory(release, releaseId); | ||||||
| 
 | 
 | ||||||
|     await Promise.all([ |     try { | ||||||
|         associateTags(release, releaseId), |         await Promise.all([ | ||||||
|         storePhotos(release, releaseId), |             associateTags(release, releaseId), | ||||||
|         storePoster(release, releaseId), |             storePhotos(release, releaseId), | ||||||
|         storeTrailer(release, releaseId), |             storePoster(release, releaseId), | ||||||
|     ]); |             storeTrailer(release, releaseId), | ||||||
|  |         ]); | ||||||
|  |     } catch (error) { | ||||||
|  |         console.log(release, error); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function storeRelease(release) { | async function storeRelease(release) { | ||||||
|  |  | ||||||
|  | @ -5,9 +5,6 @@ const bhttp = require('bhttp'); | ||||||
| const cheerio = require('cheerio'); | const cheerio = require('cheerio'); | ||||||
| const moment = require('moment'); | const moment = require('moment'); | ||||||
| 
 | 
 | ||||||
| const knex = require('../knex'); |  | ||||||
| const { matchTags } = require('../tags'); |  | ||||||
| 
 |  | ||||||
| function titleExtractor(pathname) { | function titleExtractor(pathname) { | ||||||
|     const components = pathname.split('/')[2].split('-'); |     const components = pathname.split('/')[2].split('-'); | ||||||
|     const entryId = components.slice(-1)[0]; |     const entryId = components.slice(-1)[0]; | ||||||
|  | @ -34,6 +31,9 @@ function scrapeLatest(html, site) { | ||||||
| 
 | 
 | ||||||
|         const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60; |         const duration = Number(element.find('.scene-runtime').text().slice(0, -4)) * 60; | ||||||
| 
 | 
 | ||||||
|  |         const posterString = sceneLinkElement.find('img[data-srcset]').attr('data-srcset') || sceneLinkElement.find('img[data-src]').attr('data-src'); | ||||||
|  |         const poster = `https:${posterString.match(/[\w/.]+$/)[0]}`; | ||||||
|  | 
 | ||||||
|         return { |         return { | ||||||
|             url, |             url, | ||||||
|             entryId, |             entryId, | ||||||
|  | @ -41,6 +41,7 @@ function scrapeLatest(html, site) { | ||||||
|             actors, |             actors, | ||||||
|             date, |             date, | ||||||
|             duration, |             duration, | ||||||
|  |             poster, | ||||||
|             rating: null, |             rating: null, | ||||||
|             site, |             site, | ||||||
|         }; |         }; | ||||||
|  | @ -63,18 +64,17 @@ async function scrapeScene(html, url, site) { | ||||||
| 
 | 
 | ||||||
|     const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60; |     const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60; | ||||||
| 
 | 
 | ||||||
|  |     const poster = `https:${$('video, dl8-video').attr('poster')}`; | ||||||
|  |     const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray(); | ||||||
|  | 
 | ||||||
|  |     const trailerEl = $('source'); | ||||||
|  |     const trailerSrc = trailerEl.attr('src'); | ||||||
|  |     const trailerType = trailerEl.attr('type'); | ||||||
|  | 
 | ||||||
|     const siteName = sceneElement.find('a.site-title').text(); |     const siteName = sceneElement.find('a.site-title').text(); | ||||||
|     const siteId = siteName.replace(/[\s']+/g, '').toLowerCase(); |     const channel = siteName.replace(/[\s']+/g, '').toLowerCase(); | ||||||
| 
 | 
 | ||||||
|     const rawTags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); |     const tags = $('.categories a.cat-tag').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||||
| 
 |  | ||||||
|     const [channelSite, tags] = await Promise.all([ |  | ||||||
|         knex('sites') |  | ||||||
|             .where({ slug: siteId }) |  | ||||||
|             .orWhere({ name: siteName }) |  | ||||||
|             .first(), |  | ||||||
|         matchTags(rawTags), |  | ||||||
|     ]); |  | ||||||
| 
 | 
 | ||||||
|     return { |     return { | ||||||
|         url, |         url, | ||||||
|  | @ -85,8 +85,15 @@ async function scrapeScene(html, url, site) { | ||||||
|         date, |         date, | ||||||
|         duration, |         duration, | ||||||
|         tags, |         tags, | ||||||
|  |         photos, | ||||||
|  |         poster, | ||||||
|  |         trailer: { | ||||||
|  |             src: trailerSrc, | ||||||
|  |             type: trailerType, | ||||||
|  |         }, | ||||||
|         rating: null, |         rating: null, | ||||||
|         site: channelSite || site, |         site, | ||||||
|  |         channel, | ||||||
|     }; |     }; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||