Fixed ID number/string incompatability in duplicate detection. Expanded Reality Kings scraper to handle older scenes and fix URLs.
This commit is contained in:
		
							parent
							
								
									88c8bb1ced
								
							
						
					
					
						commit
						bd041c528d
					
				|  | @ -133,7 +133,9 @@ async function storeRelease(release) { | ||||||
| async function storeReleases(releases) { | async function storeReleases(releases) { | ||||||
|     return Promise.map(releases, async (release) => { |     return Promise.map(releases, async (release) => { | ||||||
|         try { |         try { | ||||||
|             return storeRelease(release); |             const releaseId = await storeRelease(release); | ||||||
|  | 
 | ||||||
|  |             return releaseId; | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             console.error(error); |             console.error(error); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -24,8 +24,8 @@ async function findDuplicateReleaseIds(latestReleases, accReleases) { | ||||||
|     // include accumulated releases as duplicates to prevent an infinite
 |     // include accumulated releases as duplicates to prevent an infinite
 | ||||||
|     // loop when the next page contains the same releases as the previous
 |     // loop when the next page contains the same releases as the previous
 | ||||||
|     return new Set(duplicateReleases |     return new Set(duplicateReleases | ||||||
|         .map(release => release.entry_id) |         .map(release => String(release.entry_id)) | ||||||
|         .concat(accReleases.map(release => release.entryId))); |         .concat(accReleases.map(release => String(release.entryId)))); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) { | async function scrapeUniqueReleases(scraper, site, afterDate = getAfterDate(), accReleases = [], page = 1) { | ||||||
|  | @ -122,11 +122,11 @@ async function scrapeReleases() { | ||||||
|             } |             } | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             if (argv.debug) { |             if (argv.debug) { | ||||||
|                 console.error(`${site.id}: Failed to fetch releases`, error); |                 console.error(`${site.id}: Failed to scrape releases`, error); | ||||||
|                 return; |                 return; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             console.warn(`${site.id}: Failed to fetch releases`); |             console.warn(`${site.id}: Failed to scrape releases`); | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         concurrency: 2, |         concurrency: 2, | ||||||
|  |  | ||||||
|  | @ -8,6 +8,20 @@ const { JSDOM } = require('jsdom'); | ||||||
| 
 | 
 | ||||||
| const { matchTags } = require('../tags'); | const { matchTags } = require('../tags'); | ||||||
| 
 | 
 | ||||||
|  | function getThumbs(scene) { | ||||||
|  |     if (scene.images.poster) { | ||||||
|  |         return scene.images.poster.map(image => image.xl.url); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (scene.images.card_main_rect) { | ||||||
|  |         return scene.images.card_main_rect | ||||||
|  |             .concat(scene.images.card_secondary_rect || []) | ||||||
|  |             .map(image => image.xl.url.replace('.thumb', '')); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return []; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| async function scrapeLatest(html, site) { | async function scrapeLatest(html, site) { | ||||||
|     const { document } = new JSDOM(html).window; |     const { document } = new JSDOM(html).window; | ||||||
| 
 | 
 | ||||||
|  | @ -29,24 +43,17 @@ async function scrapeLatest(html, site) { | ||||||
|             description, |             description, | ||||||
|         } = scene; |         } = scene; | ||||||
| 
 | 
 | ||||||
|         const url = `https://www.realitykings.com/scene/${entryId}`; |         const url = `https://www.realitykings.com/scene/${entryId}/`; | ||||||
|         const date = new Date(scene.dateReleased); |         const date = new Date(scene.dateReleased); | ||||||
|         const actors = scene.actors.map(actorId => actorsMap[actorId].name); |         const actors = scene.actors.map(actorId => actorsMap[actorId].name); | ||||||
|  |         const duration = scene.videos.mediabook && scene.videos.mediabook.length; | ||||||
| 
 | 
 | ||||||
|         const rawTags = scene.tags.map(tagId => tagsMap[tagId].name); |         const rawTags = scene.tags.map(tagId => tagsMap[tagId].name); | ||||||
|         const tags = await matchTags(rawTags); |         const tags = await matchTags(rawTags); | ||||||
| 
 | 
 | ||||||
|         if (!scene.images.poster) { |         const [poster, ...photos] = getThumbs(scene); | ||||||
|             console.log(site.name, site.id); |         const trailer720p = scene.videos.mediabook && scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view; | ||||||
|             console.log(scene); |         const trailer360p = scene.videos.mediabook && scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view; | ||||||
|             console.log(title, url, scene.images); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const [poster, ...photos] = scene.images.poster.map(image => image.xl.url); |  | ||||||
| 
 |  | ||||||
|         const duration = scene.videos.mediabook.length; |  | ||||||
|         const trailer720p = scene.videos.mediabook.files['720p'] && scene.videos.mediabook.files['720p'].urls.view; |  | ||||||
|         const trailer360p = scene.videos.mediabook.files['360p'] && scene.videos.mediabook.files['360p'].urls.view; |  | ||||||
| 
 | 
 | ||||||
|         const { likes, dislikes } = scene.stats; |         const { likes, dislikes } = scene.stats; | ||||||
| 
 | 
 | ||||||
|  | @ -86,11 +93,11 @@ async function scrapeScene(data, url, site) { | ||||||
|     const rawTags = data.tags.map(tag => tag.name); |     const rawTags = data.tags.map(tag => tag.name); | ||||||
|     const tags = await matchTags(rawTags); |     const tags = await matchTags(rawTags); | ||||||
| 
 | 
 | ||||||
|     const [poster, ...photos] = data.images.poster.map(image => image.xl.url); |     const [poster, ...photos] = getThumbs(data); | ||||||
| 
 | 
 | ||||||
|     const duration = data.videos.mediabook.length; |     const duration = data.videos.mediabook && data.videos.mediabook.length; | ||||||
|     const trailer720p = data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view; |     const trailer720p = data.videos.mediabook && data.videos.mediabook.files['720p'] && data.videos.mediabook.files['720p'].urls.view; | ||||||
|     const trailer360p = data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view; |     const trailer360p = data.videos.mediabook && data.videos.mediabook.files['360p'] && data.videos.mediabook.files['360p'].urls.view; | ||||||
| 
 | 
 | ||||||
|     return { |     return { | ||||||
|         url, |         url, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue