Added site aliases. Migrated various scrapers to qu. Added BAM Visions base.
This commit is contained in:
		
							parent
							
								
									c020d5659e
								
							
						
					
					
						commit
						37e188a0df
					
				|  | @ -151,6 +151,11 @@ exports.up = knex => Promise.resolve() | |||
|             .inTable('networks'); | ||||
| 
 | ||||
|         table.string('name'); | ||||
|         table.string('slug', 32) | ||||
|             .unique(); | ||||
| 
 | ||||
|         table.string('alias'); | ||||
| 
 | ||||
|         table.string('url'); | ||||
|         table.text('description'); | ||||
|         table.json('parameters'); | ||||
|  | @ -162,9 +167,6 @@ exports.up = knex => Promise.resolve() | |||
|         table.boolean('scrape') | ||||
|             .defaultTo(true); | ||||
| 
 | ||||
|         table.string('slug', 32) | ||||
|             .unique(); | ||||
| 
 | ||||
|         table.datetime('created_at') | ||||
|             .defaultTo(knex.fn.now()); | ||||
|     })) | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 8.7 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 2.5 KiB | 
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 7.6 KiB | 
|  | @ -78,6 +78,11 @@ const networks = [ | |||
|         url: 'https://www.babes.com', | ||||
|         parent: 'mindgeek', | ||||
|     }, | ||||
|     { | ||||
|         slug: 'bamvisions', | ||||
|         name: 'BAM Visions', | ||||
|         url: 'https://www.bamvisions.com', | ||||
|     }, | ||||
|     { | ||||
|         slug: 'bang', | ||||
|         name: 'Bang!', | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -375,10 +375,11 @@ async function updateReleasesSearch(releaseIds) { | |||
|             TO_TSVECTOR( | ||||
|                 'traxxx', | ||||
|                 releases.title || ' ' || | ||||
|                 sites.name || ' ' || | ||||
|                 sites.slug || ' ' || | ||||
|                 networks.name || ' ' || | ||||
|                 networks.slug || ' ' || | ||||
|                 sites.name || ' ' || | ||||
|                 sites.slug || ' ' || | ||||
|                 COALESCE(sites.alias, '') || ' ' || | ||||
|                 COALESCE(releases.shoot_id, '') || ' ' || | ||||
|                 COALESCE(TO_CHAR(releases.date, 'YYYY YY MM FMMM FMmonth mon DD FMDD'), '') || ' ' || | ||||
|                 STRING_AGG(COALESCE(actors.name, ''), ' ') || ' ' || | ||||
|  | @ -394,7 +395,7 @@ async function updateReleasesSearch(releaseIds) { | |||
|         LEFT JOIN tags ON local_tags.tag_id = tags.id | ||||
|         LEFT JOIN tags as tags_aliases ON local_tags.tag_id = tags_aliases.alias_for | ||||
|         ${releaseIds ? 'WHERE releases.id = ANY(?)' : ''} | ||||
|         GROUP BY releases.id, sites.name, sites.slug, networks.name, networks.slug; | ||||
|         GROUP BY releases.id, sites.name, sites.slug, sites.alias, networks.name, networks.slug; | ||||
|     `, releaseIds && [releaseIds]);
 | ||||
| 
 | ||||
|     if (documents.rows?.length > 0) { | ||||
|  |  | |||
|  | @ -41,22 +41,22 @@ function scrapeLatest(scenes, site, models) { | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| function scrapeScene({ html, q, qa, qd, qis }, url, site, models) { | ||||
| function scrapeScene({ html, qu }, url, site, models) { | ||||
|     const release = { url }; | ||||
| 
 | ||||
|     [release.entryId] = url.split('/').slice(-1); | ||||
|     release.title = q('.mas_title', true); | ||||
|     release.description = q('.mas_longdescription', true); | ||||
|     release.date = qd('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); | ||||
|     release.title = qu.q('.mas_title', true); | ||||
|     release.description = qu.q('.mas_longdescription', true); | ||||
|     release.date = qu.date('.mas_description', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); | ||||
| 
 | ||||
|     const actorString = q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, ''); | ||||
|     const actorString = qu.q('.mas_description', true).replace(/\w+ \d{1,2}, \d{4}/, ''); | ||||
|     const actors = matchActors(actorString, models); | ||||
|     if (actors.length > 0) release.actors = actors; | ||||
|     else release.actors = extractActors(actorString); | ||||
| 
 | ||||
|     release.tags = qa('.tags a', true); | ||||
|     release.tags = qu.all('.tags a', true); | ||||
| 
 | ||||
|     release.photos = qis('.stills img').map(photoPath => `${site.url}/${photoPath}`); | ||||
|     release.photos = qu.imgs('.stills img').map(photoPath => `${site.url}/${photoPath}`); | ||||
| 
 | ||||
|     const posterIndex = 'splash:'; | ||||
|     const poster = html.slice(html.indexOf('faceimages/', posterIndex), html.indexOf('.jpg', posterIndex) + 4); | ||||
|  |  | |||
|  | @ -0,0 +1,26 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const { geta } = require('../utils/qu'); | ||||
| 
 | ||||
| function scrapeLatest(scenes, _site) { | ||||
|     return scenes.map(( qu ) => { | ||||
|         const release = {}; | ||||
| 
 | ||||
|         release.title = qu.q('h3 a', true); | ||||
|         release.url = qu.url('h3 a'); | ||||
| 
 | ||||
|         console.log(release); | ||||
|         return release; | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| async function fetchLatest(site, page = 1) { | ||||
|     const url = `https://tour.bamvisions.com/categories/movies/${page}/latest/`; | ||||
|     const res = geta(url, '.item-episode'); | ||||
| 
 | ||||
|     return res.ok ? scrapeLatest(res.items, site) : res.status; | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|     fetchLatest, | ||||
| }; | ||||
|  | @ -134,16 +134,16 @@ function scrapeAllTour(scenes) { | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { | ||||
| function scrapeScene({ html, qu }, site, url, baseRelease) { | ||||
|     const release = { url }; | ||||
| 
 | ||||
|     release.title = q('.centerwrap h2', true); | ||||
|     release.description = q('.videocontent p', true); | ||||
|     release.title = qu.q('.centerwrap h2', true); | ||||
|     release.description = qu.q('.videocontent p', true); | ||||
| 
 | ||||
|     release.date = qd('.videodetails .date', 'MM/DD/YYYY'); | ||||
|     release.duration = ql('.videodetails .date'); | ||||
|     release.date = qu.date('.videodetails .date', 'MM/DD/YYYY'); | ||||
|     release.duration = qu.dur('.videodetails .date'); | ||||
| 
 | ||||
|     release.actors = qa('.modelname a', true); | ||||
|     release.actors = qu.all('.modelname a', true); | ||||
| 
 | ||||
|     const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1]; | ||||
|     [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); | ||||
|  | @ -151,7 +151,7 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { | |||
|     const trailerPath = html.match(/\/trailers\/.*.mp4/); | ||||
|     if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` }; | ||||
| 
 | ||||
|     const stars = q('.modelrates + p', true).match(/\d.\d/)?.[0]; | ||||
|     const stars = qu.q('.modelrates + p', true).match(/\d.\d/)?.[0]; | ||||
|     if (stars) release.stars = Number(stars); | ||||
| 
 | ||||
|     // release.entryId = html.match(/set-target-(\d+)/)[1];
 | ||||
|  | @ -160,31 +160,31 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { | |||
|     return release; | ||||
| } | ||||
| 
 | ||||
| function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, channelRegExp) { | ||||
| function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) { | ||||
|     const release = { url }; | ||||
| 
 | ||||
|     release.title = q('.trailer-section-head .section-title', true); | ||||
|     release.description = qtx('.row .update-info-block'); | ||||
|     release.title = qu.q('.trailer-section-head .section-title', true); | ||||
|     release.description = qu.text('.row .update-info-block'); | ||||
| 
 | ||||
|     release.date = qd('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); | ||||
|     release.duration = ql('.update-info-row:nth-child(2)'); | ||||
|     release.date = qu.date('.update-info-row', 'MMM D, YYYY', /\w+ \d{1,2}, \d{4}/); | ||||
|     release.duration = qu.dur('.update-info-row:nth-child(2)'); | ||||
| 
 | ||||
|     release.actors = qa('.models-list-thumbs a').map(el => ({ | ||||
|         name: q(el, 'span', true), | ||||
|         avatar: getImageWithFallbacks(q, 'img', site, el), | ||||
|     release.actors = qu.all('.models-list-thumbs a').map(el => ({ | ||||
|         name: qu.q(el, 'span', true), | ||||
|         avatar: getImageWithFallbacks(qu.q, 'img', site, el), | ||||
|     })); | ||||
| 
 | ||||
|     release.tags = qa('.tags a', true); | ||||
|     release.tags = qu.all('.tags a', true); | ||||
| 
 | ||||
|     // const posterPath = html.match(/poster="(.*\.jpg)/)?.[1];
 | ||||
|     const posterPath = q('.player-thumb img', 'src0_1x'); | ||||
|     const posterPath = qu.q('.player-thumb img', 'src0_1x'); | ||||
|     [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); | ||||
| 
 | ||||
|     const trailer = html.match(/<video.*src="(.*\.mp4)/)?.[1]; | ||||
|     if (trailer && /^http/.test(trailer)) release.trailer = { src: trailer, referer: url }; | ||||
|     else if (trailer) release.trailer = { src: `${site.parameters?.media || site.url}${trailer}`, referer: url }; | ||||
| 
 | ||||
|     const stars = q('.update-rating', true).match(/\d.\d/)?.[0]; | ||||
|     const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0]; | ||||
|     if (stars) release.stars = Number(stars); | ||||
| 
 | ||||
|     if (channelRegExp) { | ||||
|  | @ -204,26 +204,26 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease, cha | |||
|     return release; | ||||
| } | ||||
| 
 | ||||
| function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) { | ||||
| function scrapeSceneTour({ html, qu }, site, url) { | ||||
|     const release = {}; | ||||
| 
 | ||||
|     if (url) release.url = url; | ||||
|     release.title = q('.update_title, .video-title', true); | ||||
|     release.description = q('.latest_update_description, .video-summary', true); | ||||
|     release.title = qu.q('.update_title, .video-title', true); | ||||
|     release.description = qu.q('.latest_update_description, .video-summary', true); | ||||
| 
 | ||||
|     const date = qd('.availdate, .update_date', 'YYYY-MM-DD'); | ||||
|     const date = qu.date('.availdate, .update_date', 'YYYY-MM-DD'); | ||||
|     if (date) release.date = date; | ||||
| 
 | ||||
|     release.actors = qa('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true); | ||||
|     release.tags = qa('.update_tags a, .tour_update_tags a', true); | ||||
|     release.actors = qu.all('.update_block_info .tour_update_models a, .video-model .tour_update_models a', true); | ||||
|     release.tags = qu.all('.update_tags a, .tour_update_tags a', true); | ||||
| 
 | ||||
|     const [photo, poster, ...photos] = qis('.update_image img:not(.play_icon_overlay)'); | ||||
|     const [photo, poster, ...photos] = qu.imgs('.update_image img:not(.play_icon_overlay)'); | ||||
|     if (poster || photo) release.poster = poster || photo; | ||||
|     if ((photo && poster) || photos) release.photos = poster ? [photo, ...photos] : photos; // don't use first photo when already used as fallback poster
 | ||||
| 
 | ||||
|     if (release.date) release.entryId = deriveEntryId(release); | ||||
| 
 | ||||
|     const trailerCode = q('.update_image a', 'onclick'); | ||||
|     const trailerCode = qu.q('.update_image a', 'onclick'); | ||||
|     const trailerPath = trailerCode?.match(/tload\('(.*)'\)/)?.[1] || html.match(/\/trailer\/.*\.mp4/)?.[0]; | ||||
|     if (trailerPath && /^http/.test(trailerPath)) release.trailer = { src: trailerPath }; | ||||
|     else if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` }; | ||||
|  | @ -231,10 +231,10 @@ function scrapeSceneTour({ html, q, qd, qa, qis }, site, url) { | |||
|     return release; | ||||
| } | ||||
| 
 | ||||
| function scrapeProfile({ el, q, qtxs }, site) { | ||||
| function scrapeProfile({ el, qu }, site) { | ||||
|     const profile = {}; | ||||
| 
 | ||||
|     const bio = qtxs('.stats p').reduce((acc, info) => { | ||||
|     const bio = qu.texts('.stats p').reduce((acc, info) => { | ||||
|         const [key, value] = info.split(':'); | ||||
| 
 | ||||
|         return { | ||||
|  | @ -254,7 +254,7 @@ function scrapeProfile({ el, q, qtxs }, site) { | |||
|     if (bio.age) profile.age = Number(bio.age); | ||||
|     if (bio.height) profile.height = feetInchesToCm(bio.height); | ||||
| 
 | ||||
|     profile.avatar = getImageWithFallbacks(q, '.profileimg img', site); | ||||
|     profile.avatar = getImageWithFallbacks(qu.q, '.profileimg img', site); | ||||
| 
 | ||||
|     const qReleases = ctxa(el, '.modelFeatures .modelfeature'); | ||||
|     profile.releases = scrapeAll(qReleases, site); | ||||
|  | @ -262,10 +262,10 @@ function scrapeProfile({ el, q, qtxs }, site) { | |||
|     return profile; | ||||
| } | ||||
| 
 | ||||
| function scrapeProfileT1({ el, q, qa }, site) { | ||||
| function scrapeProfileT1({ el, qu }, site) { | ||||
|     const profile = {}; | ||||
| 
 | ||||
|     const bio = qa('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => { | ||||
|     const bio = qu.all('.detail-div + .detail-div p, .detail-div p', true).reduce((acc, info) => { | ||||
|         const [key, value] = info.split(':'); | ||||
| 
 | ||||
|         if (!value) return acc; | ||||
|  | @ -292,7 +292,7 @@ function scrapeProfileT1({ el, q, qa }, site) { | |||
|     if (heightMetric) profile.height = Number(heightMetric[1]); | ||||
|     if (heightImperial) profile.height = feetInchesToCm(Number(heightImperial[0]), Number(heightImperial[1])); | ||||
| 
 | ||||
|     profile.avatar = getImageWithFallbacks(q, '.img-div img', site); | ||||
|     profile.avatar = getImageWithFallbacks(qu.q, '.img-div img', site); | ||||
| 
 | ||||
|     const qReleases = ctxa(el, '.item-video'); | ||||
|     profile.releases = scrapeAllT1(qReleases, site); | ||||
|  | @ -300,10 +300,10 @@ function scrapeProfileT1({ el, q, qa }, site) { | |||
|     return profile; | ||||
| } | ||||
| 
 | ||||
| function scrapeProfileTour({ el, q, qtxs }, site) { | ||||
| function scrapeProfileTour({ el, qu }, site) { | ||||
|     const profile = {}; | ||||
| 
 | ||||
|     const bio = qtxs('.model_bio').reduce((acc, info) => { | ||||
|     const bio = qu.texts('.model_bio').reduce((acc, info) => { | ||||
|         const [key, value] = info.split(':'); | ||||
| 
 | ||||
|         return { | ||||
|  | @ -339,7 +339,7 @@ function scrapeProfileTour({ el, q, qtxs }, site) { | |||
| 
 | ||||
|     if (bio.aliases) profile.aliases = bio.aliases.split(',').map(alias => alias.trim()); | ||||
| 
 | ||||
|     profile.avatar = getImageWithFallbacks(q, '.model_picture img', site); | ||||
|     profile.avatar = getImageWithFallbacks(qu.q, '.model_picture img', site); | ||||
| 
 | ||||
|     const qReleases = ctxa(el, '.update_block'); | ||||
|     profile.releases = qReleases.map((qRelease) => { | ||||
|  |  | |||
|  | @ -4,6 +4,7 @@ const adulttime = require('./adulttime'); | |||
| const assylum = require('./assylum'); | ||||
| const amateurallure = require('./amateurallure'); | ||||
| const babes = require('./babes'); | ||||
| const bamvisions = require('./bamvisions'); | ||||
| const bang = require('./bang'); | ||||
| const bangbros = require('./bangbros'); | ||||
| const blowpass = require('./blowpass'); | ||||
|  | @ -70,6 +71,7 @@ module.exports = { | |||
|         amateurallure, | ||||
|         assylum, | ||||
|         babes, | ||||
|         bamvisions, | ||||
|         bang, | ||||
|         bangbros, | ||||
|         blowpass, | ||||
|  | @ -108,7 +110,7 @@ module.exports = { | |||
|         perfectgonzo, | ||||
|         pervcity, | ||||
|         pimpxxx: cherrypimps, | ||||
|         pornpros: whalemember, | ||||
|         ornpros: whalemember, | ||||
|         private: privateNetwork, | ||||
|         puretaboo, | ||||
|         realitykings, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue