Added media to LegalPorno scraper.
This commit is contained in:
		
							parent
							
								
									5b7880a37d
								
							
						
					
					
						commit
						a4d936523b
					
				|  | @ -1,4 +1,5 @@ | |||
| $primary: #ff886c; | ||||
| /* $primary: #ff886c; */ | ||||
| $primary: #ff6c88; | ||||
| 
 | ||||
| $text: #222; | ||||
| $text-contrast: #fff; | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| /* $primary: #ff886c; */ | ||||
| .filters-bar[data-v-5533e378] { | ||||
|   display: block; | ||||
|   background: rgba(0, 0, 0, 0.1); | ||||
|  | @ -27,7 +28,7 @@ | |||
| } | ||||
| .filters .toggle.active[data-v-5533e378] { | ||||
|       color: #fff; | ||||
|       background: #ff886c; | ||||
|       background: #ff6c88; | ||||
| } | ||||
| .filter[data-v-5533e378] { | ||||
|   display: inline-block; | ||||
|  | @ -142,6 +143,7 @@ | |||
|   width: 300px; | ||||
| } | ||||
| 
 | ||||
| /* $primary: #ff886c; */ | ||||
| .banner[data-v-2bc41e74] { | ||||
|   background: #222; | ||||
|   white-space: nowrap; | ||||
|  | @ -182,6 +184,7 @@ | |||
|     width: .6rem; | ||||
| } | ||||
| 
 | ||||
| /* $primary: #ff886c; */ | ||||
| .noselect { | ||||
|   user-select: none; | ||||
|   -webkit-user-select: none; | ||||
|  | @ -206,12 +209,13 @@ body { | |||
|     display: inline-block; } | ||||
| 
 | ||||
| .heading { | ||||
|   color: #ff886c; | ||||
|   color: #ff6c88; | ||||
|   margin: 0 0 1rem 0; } | ||||
| 
 | ||||
| /* $primary: #ff886c; */ | ||||
| .header[data-v-10b7ec04] { | ||||
|   color: #fff; | ||||
|   background: #ff886c; | ||||
|   background: #ff6c88; | ||||
|   padding: 1rem; | ||||
| } | ||||
| .logo-link[data-v-10b7ec04] { | ||||
|  | @ -241,6 +245,7 @@ body { | |||
|   overflow-y: auto; | ||||
| } | ||||
| 
 | ||||
| /* $primary: #ff886c; */ | ||||
| .icon { | ||||
|   fill: #222; | ||||
|   display: inline-block; | ||||
|  |  | |||
|  | @ -142,10 +142,11 @@ async function storePhotos(release, releaseEntry) { | |||
| async function storePoster(release, releaseEntry) { | ||||
|     console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); | ||||
| 
 | ||||
|     const { pathname } = new URL(release.poster); | ||||
|     const mimetype = mime.getType(pathname); | ||||
| 
 | ||||
|     const res = await bhttp.get(release.poster); | ||||
| 
 | ||||
|     const { pathname } = new URL(release.poster); | ||||
|     const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg'; | ||||
| 
 | ||||
|     const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`); | ||||
|     await fs.writeFile(path.join(config.photoPath, filepath), res.body); | ||||
| 
 | ||||
|  |  | |||
|  | @ -15,6 +15,28 @@ function extractTitle(originalTitle) { | |||
|     return { shootId, title }; | ||||
| } | ||||
| 
 | ||||
| function getPoster(posterElement, sceneId) { | ||||
|     const posterStyle = posterElement.attr('style'); | ||||
| 
 | ||||
|     if (posterStyle) { | ||||
|         return posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||
|     } | ||||
| 
 | ||||
|     const posterRange = posterElement.attr('data-casting'); | ||||
|     const posterRangeData = posterRange ? JSON.parse(posterRange) : null; | ||||
|     const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)]; | ||||
| 
 | ||||
|     if (typeof posterTimeRange === 'number') { | ||||
|         // poster time is already a single time value
 | ||||
|         return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`; | ||||
|     } | ||||
| 
 | ||||
|     const [max, min] = posterTimeRange.split('-'); | ||||
|     const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min)); | ||||
| 
 | ||||
|     return `https://legalporno.com/casting/${sceneId}/${posterTime}`; | ||||
| } | ||||
| 
 | ||||
| function scrapeLatest(html, site) { | ||||
|     const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||
|     const scenesElements = $('.thumbnails > div').toArray(); | ||||
|  | @ -29,19 +51,27 @@ function scrapeLatest(html, site) { | |||
| 
 | ||||
|         const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); | ||||
| 
 | ||||
|         const sceneId = $(element).attr('data-content'); | ||||
|         const posterElement = $(element).find('.thumbnail-avatar'); | ||||
| 
 | ||||
|         const poster = getPoster(posterElement, sceneId); | ||||
| 
 | ||||
|         return { | ||||
|             url, | ||||
|             shootId, | ||||
|             entryId, | ||||
|             title, | ||||
|             date, | ||||
|             poster, | ||||
|             site, | ||||
|         }; | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| async function scrapeScene(html, url, site) { | ||||
| async function scrapeScene(html, url, site, useGallery) { | ||||
|     const $ = cheerio.load(html, { normalizeWhitespace: true }); | ||||
|     const playerObject = $('script:contains("new VideoPlayer")').html(); | ||||
|     const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1)); | ||||
| 
 | ||||
|     const originalTitle = $('h1.watchpage-title').text().trim(); | ||||
|     const { shootId, title } = extractTitle(originalTitle); | ||||
|  | @ -56,6 +86,15 @@ async function scrapeScene(html, url, site) { | |||
| 
 | ||||
|     const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); | ||||
| 
 | ||||
|     const posterStyle = $('#player').attr('style'); | ||||
|     const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1); | ||||
| 
 | ||||
|     const photos = useGallery | ||||
|         ? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray() | ||||
|         : $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray(); | ||||
| 
 | ||||
|     const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd'); | ||||
| 
 | ||||
|     const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); | ||||
|     const tags = await matchTags(rawTags); | ||||
| 
 | ||||
|  | @ -67,6 +106,13 @@ async function scrapeScene(html, url, site) { | |||
|         date, | ||||
|         actors, | ||||
|         duration, | ||||
|         poster, | ||||
|         photos, | ||||
|         trailer: { | ||||
|             src: trailer.src, | ||||
|             type: trailer.type, | ||||
|             quality: trailer.quality === 'vga' ? 480 : 720, | ||||
|         }, | ||||
|         tags, | ||||
|         site, | ||||
|     }; | ||||
|  | @ -79,9 +125,13 @@ async function fetchLatest(site, page = 1) { | |||
| } | ||||
| 
 | ||||
| async function fetchScene(url, site) { | ||||
|     const res = await bhttp.get(url); | ||||
|     const useGallery = true; | ||||
| 
 | ||||
|     return scrapeScene(res.body.toString(), url, site); | ||||
|     const res = useGallery | ||||
|         ? await bhttp.get(`${url}/gallery#gallery`) | ||||
|         : await bhttp.get(`${url}/screenshots#screenshots`); | ||||
| 
 | ||||
|     return scrapeScene(res.body.toString(), url, site, useGallery); | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue