Added Gaywire, modified Bang Bros scraper to accomodate.

2021-01-17 01:43:55 +01:00
parent 8387f676fc
commit 251bb9476d
44 changed files with 228 additions and 70 deletions
--- a/src/scrapers/bangbros.js
+++ b/src/scrapers/bangbros.js
@@ -7,66 +7,57 @@ const moment = require('moment');
 const logger = require('../logger')(__filename);
 const slugify = require('../utils/slugify');
 const http = require('../utils/http');
-const { get, getAll, ex } = require('../utils/q');
+const qu = require('../utils/qu');

 function scrape(html, site) {
 	const $ = cheerio.load(html, { normalizeWhitespace: true });
 	const sceneElements = $('.echThumb').toArray();

 	return sceneElements.map((element) => {
-		const sceneLinkElement = $(element).find('.thmb_lnk');
-		const title = sceneLinkElement.attr('title');
-		const url = site.parameters?.legacy
-			? `https://${site.url}{sceneLinkElement.attr('href')}`
-			: `https://bangbros.com${sceneLinkElement.attr('href')}`;
-		const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
-		const entryId = url.split('/')[3].slice(5);
+		const release = {};

-		const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
-		const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
+		const sceneLinkElement = $(element).find('.thmb_lnk');
+
+		release.title = sceneLinkElement.attr('title');
+		release.url = site.parameters?.legacy
+			? `${site.url}{sceneLinkElement.attr('href')}`
+			: `${site.parent.url}${sceneLinkElement.attr('href')}`;
+
+		release.shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
+		release.entryId = new URL(release.url).pathname.match(/video(\d+)/)?.[1];
+
+		release.date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
+		release.actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();

 		const photoElement = $(element).find('.rollover-image');
-		const poster = `https:${photoElement.attr('data-original')}`;
-
 		const photosUrl = photoElement.attr('data-rollover-url');
 		const photosMaxIndex = photoElement.attr('data-rollover-max-index');
-		const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);

-		const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
-		const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
+		release.poster = `https:${photoElement.attr('data-original')}`;
+		release.photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);

-		return {
-			url,
-			entryId,
-			shootId,
-			title,
-			actors,
-			date,
-			duration,
-			poster,
-			photos,
-			rating: null,
-			site,
-			channel,
-		};
+		release.duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
+		release.channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
+
+		return release;
 	});
 }

 function scrapeLegacy(scenes, site) {
-	return scenes.map(({ qu }) => {
+	return scenes.map(({ query }) => {
 		const release = {};

-		const pathname = qu.url('.mainplayer a, .palyer a'); // sic
+		const pathname = query.url('.mainplayer a, .palyer a'); // sic
 		release.url = `${site.url}${pathname}`;
-		release.entryId = pathname.match(/video\d+/)?.[0];
+		release.entryId = pathname.match(/video(\d+)/)?.[1];

-		release.title = qu.q('h2', true);
-		release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
-		release.description = qu.q('div + .videoDisc p', true);
-		release.duration = qu.dur('.videoTag .title');
+		release.title = query.q('h2', true);
+		release.date = query.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
+		release.description = query.q('div + .videoDisc p', true);
+		release.duration = query.dur('.videoTag .title');

-		release.poster = qu.img('.mainplayer img, .palyer img'); // sic
-		release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
+		release.poster = query.img('.mainplayer img, .palyer img'); // sic
+		release.photos = query.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);

 		return release;
 	});
@@ -102,32 +93,38 @@ function scrapeUpcoming(html, site) {
 */

 function scrapeScene(html, url, _site) {
-	const { qu } = ex(html, '.playerSection');
+	const { query } = qu.ex(html, '.playerSection');
 	const release = {};

-	[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
-	[release.entryId] = url.split('/')[3].match(/\d+$/);
-	release.title = qu.q('.ps-vdoHdd h1', true);
-	release.description = qu.q('.vdoDesc', true);
+	const { pathname, hostname } = new URL(url);

-	release.actors = qu.all('a[href*="/model"]', true);
-	release.tags = qu.all('.vdoTags a', true);
+	[release.shootId] = query.cnt('.vdoTags + .vdoCast')?.match(/\w+$/) || [];
+	release.entryId = pathname.match(/video(\d+)/)?.[1];

-	release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
+	release.title = query.cnt('.ps-vdoHdd h1');
+	release.description = query.cnt('.vdoDesc');

-	const poster = qu.img('img#player-overlay-image');
-	release.poster = [
-		poster,
-		poster.replace('/big_trailer', '/members/450x340'), // load error fallback
-	];
+	release.actors = query.all('a[href*="/model"]', true);
+	release.tags = query.all('.vdoTags a', true);

-	release.trailer = { src: qu.trailer() };
+	release.stars = Number(query.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
+
+	const poster = query.img('img#player-overlay-image, img.playerPic');
+
+	if (poster) {
+		release.poster = [
+			poster,
+			poster.replace('/big_trailer', '/members/450x340'), // load error fallback
+		];
+	}
+
+	release.trailer = query.trailer() || qu.prefixUrl(html.match(/'(\/\/trailers.*mp4)'/)?.[1], hostname);

 	// all scenes seem to have 12 album photos available, not always included on the page
-	const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
+	const firstPhotoUrl = qu.ex(html).query.img('img[data-slider-index="1"]');
 	release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));

-	const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
+	const [channel] = query.url('a[href*="/websites"]').match(/\w+$/);

 	if (channel === 'bangcasting') release.channel = 'bangbroscasting';
 	if (channel === 'remaster') release.channel = 'bangbrosremastered';
@@ -136,25 +133,25 @@ function scrapeScene(html, url, _site) {
 	return release;
 }

-function scrapeSceneLegacy({ qu }, url) {
+function scrapeSceneLegacy({ query }, url) {
 	const release = {};

 	release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];

-	release.title = qu.q('h1', true);
-	release.description = qu.q('.videoDetail', true);
-	release.duration = qu.dur('.tags p span');
+	release.title = query.q('h1', true);
+	release.description = query.q('.videoDetail', true);
+	release.duration = query.dur('.tags p span');

-	release.poster = qu.img('#video_container + div img, .videoOverlay img');
+	release.poster = query.img('#video_container + div img, .videoOverlay img');

 	return release;
 }

 function scrapeProfile(html, scope) {
-	const { q } = ex(html);
+	const { query } = qu.ex(html);
 	const profile = {};

-	const avatar = q('.profilePic img', 'src');
+	const avatar = query.q('.profilePic img', 'src');
 	if (avatar) profile.avatar = `https:${avatar}`;

 	profile.releases = scrape(html, scope.network);
@@ -163,16 +160,16 @@ function scrapeProfile(html, scope) {
 }

 function scrapeProfileSearch(html, actorName) {
-	const { qu } = ex(html);
-	const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
+	const { query } = qu.ex(html);
+	const actorLink = query.url(`a[title="${actorName}" i][href*="model"]`);

 	return actorLink ? `https://bangbros.com${actorLink}` : null;
 }

 async function fetchLatest(site, page = 1) {
 	if (site.parameters?.legacy) {
-		const url = `${site.url}/videos/${page}`;
-		const res = await getAll(url, '.videoList');
+		const url = `${site.parameters?.latest || site.url}/videos/${page}`;
+		const res = await qu.getAll(url, '.videoList');

 		if (res.ok) {
 			return scrapeLegacy(res.items, site);
@@ -181,7 +178,7 @@ async function fetchLatest(site, page = 1) {
 		return res.status;
 	}

-	const res = await get(`${site.url}/${page}`);
+	const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);

 	if (res.ok) {
 		return scrape(res.item.html, site);
@@ -204,7 +201,7 @@ async function fetchScene(url, site, release) {
 	}

 	const { origin } = new URL(url);
-	const res = await get(url);
+	const res = await qu.get(url);

 	if (!res.ok) {
 		return res.status;
@@ -214,8 +211,8 @@ async function fetchScene(url, site, release) {
 		return scrapeSceneLegacy(res.item, url, site);
 	}

-	if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
-		throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
+	if (!/https?:\/\/(www.)?(bangbros|gaywire).com\/?$/.test(origin)) {
+		throw new Error('Cannot fetch from this URL. Please find the scene on Bang Bros or Gaywire and try again.');
 	}

 	return scrapeScene(res.item.html, url, site);
--- a/src/scrapers/gaywire.js
+++ b/src/scrapers/gaywire.js
@@ -0,0 +1,28 @@
+'use strict';
+
+const qu = require('../utils/qu');
+
+function scrapeAll(scenes) {
+	return scenes.map(({ query }) => {
+		const release = {};
+
+		release.title = query.cnt('.thmb_ttl');
+
+		console.log(release);
+		return release;
+	});
+}
+
+async function fetchLatest(channel, page) {
+	const res = await qu.getAll(`https://gaywire.com/h1/websites/${channel.slug}/${page}`);
+
+	if (res.ok) {
+		return scrapeAll(res.items, '.echThumb');
+	}
+
+	return res.status;
+}
+
+module.exports = {
+	fetchLatest,
+};
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@@ -25,6 +25,7 @@ const fantasymassage = require('./fantasymassage');
 const firstanalquest = require('./firstanalquest');
 const fcuk = require('./fcuk');
 const fullpornnetwork = require('./fullpornnetwork');
+const gaywire = require('./gaywire');
 const girlsway = require('./girlsway');
 const hitzefrei = require('./hitzefrei');
 const hookuphotshot = require('./hookuphotshot');
@@ -117,6 +118,7 @@ const scrapers = {
 		firstanalquest,
 		forbondage: porndoe,
 		fullpornnetwork,
+		gaywire: bangbros,
 		girlsway,
 		girlgirl: julesjordan,
 		hitzefrei,