forked from DebaucheryLibrarian/traxxx
				
			Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.
This commit is contained in:
		
							parent
							
								
									88c8bb1ced
								
							
						
					
					
						commit
						bd041c528d
					
				|  | @ -133,7 +133,9 @@ async function storeRelease(release) { | |||
| async function storeReleases(releases) { | ||||
|     return Promise.map(releases, async (release) => { | ||||
|         try { | ||||
|             return storeRelease(release); | ||||
|             const releaseId = await storeRelease(release); | ||||
| 
 | ||||
|             return releaseId; | ||||
|         } catch (error) { | ||||
|             console.error(error); | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,8 +24,8 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) { | |||
|     // include accumulated releases as duplicates to prevent an infinite
 | ||||
|     // loop when the next page contains the same releases as the previous
 | ||||
|     return new Set(duplicateReleases | ||||
|         .map(release => release.entry_id) | ||||
|         .concat(accReleases.map(release => release.entryId))); | ||||
|         .map(release => String(release.entry_id)) | ||||
|         .concat(accReleases.map(release => String(release.entryId)))); | ||||
| } | ||||
| 
 | ||||
| async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) { | ||||
|  | @ -122,11 +122,11 @@ async function scrapeReleases() { | |||
|             } | ||||
|         } catch (error) { | ||||
|             if (argv.debug) { | ||||
|                 console.error(`${site.id}: Failed to fetch releases`, error); | ||||
|                 console.error(`${site.id}: Failed to scrape releases`, error); | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             console.warn(`${site.id}: Failed to fetch releases`); | ||||
|             console.warn(`${site.id}: Failed to scrape releases`); | ||||
|         } | ||||
|     }, { | ||||
|         concurrency: 2, | ||||
|  |  | |||
|  | @ -8,6 +8,20 @@ const { JSDOM } = require('jsdom'); | |||
| 
 | ||||
| const { matchTags } = require('../tags'); | ||||
| 
 | ||||
| function getThumbs(scene) { | ||||
|     if (scene.images.poster) { | ||||
|         return scene.images.poster.map(image => image.xl.url); | ||||
|     } | ||||
| 
 | ||||
|     if (scene.images.card_main_rect) { | ||||
|         return scene.images.card_main_rect | ||||
|             .concat(scene.images.card_secondary_rect || []) | ||||
|             .map(image => image.xl.url.replace('.thumb', '')); | ||||
|     } | ||||
| 
 | ||||
|     return []; | ||||
| } | ||||
| 
 | ||||
| async function scrapeLatest(html, site) { | ||||
|     const { document } = new JSDOM(html).window; | ||||
| 
 | ||||
|  | @ -29,24 +43,17 @@ async function scrapeLatest(html, site) { | |||
|             description, | ||||
|         } = scene; | ||||
| 
 | ||||
|         const url = `https://www.realitykings.com/scene/${entryId}`; | ||||
|         const url = `https://www.realitykings.com/scene/${entryId}/`; | ||||
|         const date = new Date(scene.dateReleased); | ||||
|         const actors = scene.actors.map(actorId => actorsMap[actorId].name); | ||||
|         const duration = scene.videos.mediabook && scene.videos.mediabook.length; | ||||
| 
 | ||||
|         const rawTags = scene.tags.map(tagId => tagsMap[tagId].name); | ||||
|         const tags = await matchTags(rawTags); | ||||
| 
 | ||||
|         if (!scene.images.poster) { | ||||
|             console.log(site.name, site.id); | ||||
|             console.log(scene); | ||||
|             console.log(title, url, scene.images); | ||||
|         } | ||||
| 
 | ||||
|         const [poster, ...photos] = scene.images.poster.map(image => image.xl.url); | ||||
| 
 | ||||
|         const duration = scene.videos.mediabook.length; | ||||
|         const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view; | ||||
|         const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view; | ||||
|         const [poster, ...photos] = getThumbs(scene); | ||||
|         const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view; | ||||
|         const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view; | ||||
| 
 | ||||
|         const { likes, dislikes } = scene.stats; | ||||
| 
 | ||||
|  | @ -86,11 +93,11 @@ async function scrapeScene(data, url, site) { | |||
|     const rawTags = data.tags.map(tag => tag.name); | ||||
|     const tags = await matchTags(rawTags); | ||||
| 
 | ||||
|     const [poster, ...photos] = data.images.poster.map(image => image.xl.url); | ||||
|     const [poster, ...photos] = getThumbs(data); | ||||
| 
 | ||||
|     const duration = data.videos.mediabook.length; | ||||
|     const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view; | ||||
|     const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view; | ||||
|     const duration = data.videos.mediabook && data.videos.mediabook.length; | ||||
|     const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view; | ||||
|     const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view; | ||||
| 
 | ||||
|     return { | ||||
|         url, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue