Added Gaywire, modified Bang Bros scraper to accomodate.

This commit is contained in:
DebaucheryLibrarian 2021-01-17 01:43:55 +01:00
parent 8387f676fc
commit 251bb9476d
44 changed files with 228 additions and 70 deletions

View File

@ -142,6 +142,7 @@ A GraphQL API is available at `/graphql`, and a REST API is available at the fol
* First Anal Quest * First Anal Quest
* ForBondage * ForBondage
* Full Porn Network (Analized, James Deen) * Full Porn Network (Analized, James Deen)
* Gaywire
* Girlsway * Girlsway
* Hitzefrei * Hitzefrei
* Hookup Hotshot * Hookup Hotshot

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 579 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 813 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 610 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -244,6 +244,12 @@ const networks = [
url: 'https://www.fullpornnetwork.com', url: 'https://www.fullpornnetwork.com',
description: 'FullPornNetwork.com is the latest and greatest for one stop shop porn sites. Check out the expanding library of the multi-site network. All of fan\'s favorite content from ANALIZED.COM, DTFsluts.com, YourMomDoesPorn.com and many more. Give die hard porn fans access to an array of premium content available in 4k and 1080p. Full access included streaming hd and unlimited downloads. Be exclusive, be a member to FullPornNetwork.com Today.', description: 'FullPornNetwork.com is the latest and greatest for one stop shop porn sites. Check out the expanding library of the multi-site network. All of fan\'s favorite content from ANALIZED.COM, DTFsluts.com, YourMomDoesPorn.com and many more. Give die hard porn fans access to an array of premium content available in 4k and 1080p. Full access included streaming hd and unlimited downloads. Be exclusive, be a member to FullPornNetwork.com Today.',
}, },
{
slug: 'gaywire',
name: 'Gaywire',
url: 'https://www.gaywire.com',
tags: ['gay'],
},
{ {
slug: 'girlsway', slug: 'girlsway',
name: 'Girlsway', name: 'Girlsway',

View File

@ -2946,6 +2946,130 @@ const sites = [
tags: ['facefucking', 'blowjob'], tags: ['facefucking', 'blowjob'],
parent: 'fullpornnetwork', parent: 'fullpornnetwork',
}, },
// GAYWIRE
{
slug: 'outinpublic',
name: 'Out In Public',
url: 'http://www.outinpublic.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/barebackcasting',
},
},
{
slug: 'rubhim',
name: 'Rub Him',
url: 'http://www.rubhim.com',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
parameters: {
latest: 'https://gaywire.com/h1/websites/rubhim',
},
},
{
slug: 'ungloryhole',
name: 'UngloryHole',
url: 'http://www.ungloryhole.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/ungloryhole',
},
},
{
slug: 'barebackcasting',
name: 'Bareback Casting',
url: 'https://gaywire.com/h1/websites/barebackcasting',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'itsgonnahurt',
name: 'It\'s Gonna Hurt',
url: 'http://www.itsgonnahurt.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/itsgonnahurt',
},
},
{
slug: 'barebackattack',
name: 'Bareback Attack',
url: 'https://gaywire.com/h1/websites/barebackattack',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'hazehim',
name: 'Haze Him',
url: 'http://www.hazehim.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/hazehim',
},
},
{
slug: 'thughunter',
name: 'Thug Hunter',
url: 'http://www.thughunter.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/thughunter',
},
},
{
slug: 'poundhisass',
name: 'Pound His Ass',
url: 'https://gaywire.com/h1/websites/poundhisass',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'exbf',
name: 'ExBF',
url: 'http://www.exbf.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/exbf',
},
},
{
slug: 'sausageparty',
name: 'Sausage Party',
url: 'http://www.sausageparty.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/sausageparty',
},
},
{
slug: 'projectcitybus',
name: 'Project City Bus',
url: 'http://www.projectcitybus.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/projectcitybus',
},
},
{
slug: 'urbaninvasion',
name: 'Urban Invasion',
url: 'https://gaywire.com/h1/websites/urbaninvasion',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
// GIRLSWAY // GIRLSWAY
{ {
slug: 'girlsway', slug: 'girlsway',
@ -9361,7 +9485,7 @@ exports.seed = knex => Promise.resolve()
priority: site.priority, priority: site.priority,
independent: !!site.independent, independent: !!site.independent,
visible: site.visible, visible: site.visible,
has_logo: site.hasLogo, has_logo: site.hasLogo === undefined ? true : site.hasLogo,
})); }));
const { inserted, updated } = await upsert('entities', sitesWithNetworks, ['slug', 'type'], knex); const { inserted, updated } = await upsert('entities', sitesWithNetworks, ['slug', 'type'], knex);

View File

@ -7,66 +7,57 @@ const moment = require('moment');
const logger = require('../logger')(__filename); const logger = require('../logger')(__filename);
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
const http = require('../utils/http'); const http = require('../utils/http');
const { get, getAll, ex } = require('../utils/q'); const qu = require('../utils/qu');
function scrape(html, site) { function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray(); const sceneElements = $('.echThumb').toArray();
return sceneElements.map((element) => { return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk'); const release = {};
const title = sceneLinkElement.attr('title');
const url = site.parameters?.legacy
? `https://${site.url}{sceneLinkElement.attr('href')}`
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate(); const sceneLinkElement = $(element).find('.thmb_lnk');
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
release.title = sceneLinkElement.attr('title');
release.url = site.parameters?.legacy
? `${site.url}{sceneLinkElement.attr('href')}`
: `${site.parent.url}${sceneLinkElement.attr('href')}`;
release.shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
release.entryId = new URL(release.url).pathname.match(/video(\d+)/)?.[1];
release.date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
release.actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const photoElement = $(element).find('.rollover-image'); const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photosUrl = photoElement.attr('data-rollover-url'); const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index'); const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds(); release.poster = `https:${photoElement.attr('data-original')}`;
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0]; release.photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
return { release.duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
url, release.channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
entryId,
shootId, return release;
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
}); });
} }
function scrapeLegacy(scenes, site) { function scrapeLegacy(scenes, site) {
return scenes.map(({ qu }) => { return scenes.map(({ query }) => {
const release = {}; const release = {};
const pathname = qu.url('.mainplayer a, .palyer a'); // sic const pathname = query.url('.mainplayer a, .palyer a'); // sic
release.url = `${site.url}${pathname}`; release.url = `${site.url}${pathname}`;
release.entryId = pathname.match(/video\d+/)?.[0]; release.entryId = pathname.match(/video(\d+)/)?.[1];
release.title = qu.q('h2', true); release.title = query.q('h2', true);
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); release.date = query.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = qu.q('div + .videoDisc p', true); release.description = query.q('div + .videoDisc p', true);
release.duration = qu.dur('.videoTag .title'); release.duration = query.dur('.videoTag .title');
release.poster = qu.img('.mainplayer img, .palyer img'); // sic release.poster = query.img('.mainplayer img, .palyer img'); // sic
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean); release.photos = query.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
return release; return release;
}); });
@ -102,32 +93,38 @@ function scrapeUpcoming(html, site) {
*/ */
function scrapeScene(html, url, _site) { function scrapeScene(html, url, _site) {
const { qu } = ex(html, '.playerSection'); const { query } = qu.ex(html, '.playerSection');
const release = {}; const release = {};
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/); const { pathname, hostname } = new URL(url);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
release.actors = qu.all('a[href*="/model"]', true); [release.shootId] = query.cnt('.vdoTags + .vdoCast')?.match(/\w+$/) || [];
release.tags = qu.all('.vdoTags a', true); release.entryId = pathname.match(/video(\d+)/)?.[1];
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20; release.title = query.cnt('.ps-vdoHdd h1');
release.description = query.cnt('.vdoDesc');
const poster = qu.img('img#player-overlay-image'); release.actors = query.all('a[href*="/model"]', true);
release.poster = [ release.tags = query.all('.vdoTags a', true);
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
release.trailer = { src: qu.trailer() }; release.stars = Number(query.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
const poster = query.img('img#player-overlay-image, img.playerPic');
if (poster) {
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
}
release.trailer = query.trailer() || qu.prefixUrl(html.match(/'(\/\/trailers.*mp4)'/)?.[1], hostname);
// all scenes seem to have 12 album photos available, not always included on the page // all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]'); const firstPhotoUrl = qu.ex(html).query.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`)); release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/); const [channel] = query.url('a[href*="/websites"]').match(/\w+$/);
if (channel === 'bangcasting') release.channel = 'bangbroscasting'; if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered'; if (channel === 'remaster') release.channel = 'bangbrosremastered';
@ -136,25 +133,25 @@ function scrapeScene(html, url, _site) {
return release; return release;
} }
function scrapeSceneLegacy({ qu }, url) { function scrapeSceneLegacy({ query }, url) {
const release = {}; const release = {};
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0]; release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
release.title = qu.q('h1', true); release.title = query.q('h1', true);
release.description = qu.q('.videoDetail', true); release.description = query.q('.videoDetail', true);
release.duration = qu.dur('.tags p span'); release.duration = query.dur('.tags p span');
release.poster = qu.img('#video_container + div img, .videoOverlay img'); release.poster = query.img('#video_container + div img, .videoOverlay img');
return release; return release;
} }
function scrapeProfile(html, scope) { function scrapeProfile(html, scope) {
const { q } = ex(html); const { query } = qu.ex(html);
const profile = {}; const profile = {};
const avatar = q('.profilePic img', 'src'); const avatar = query.q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`; if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = scrape(html, scope.network); profile.releases = scrape(html, scope.network);
@ -163,16 +160,16 @@ function scrapeProfile(html, scope) {
} }
function scrapeProfileSearch(html, actorName) { function scrapeProfileSearch(html, actorName) {
const { qu } = ex(html); const { query } = qu.ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`); const actorLink = query.url(`a[title="${actorName}" i][href*="model"]`);
return actorLink ? `https://bangbros.com${actorLink}` : null; return actorLink ? `https://bangbros.com${actorLink}` : null;
} }
async function fetchLatest(site, page = 1) { async function fetchLatest(site, page = 1) {
if (site.parameters?.legacy) { if (site.parameters?.legacy) {
const url = `${site.url}/videos/${page}`; const url = `${site.parameters?.latest || site.url}/videos/${page}`;
const res = await getAll(url, '.videoList'); const res = await qu.getAll(url, '.videoList');
if (res.ok) { if (res.ok) {
return scrapeLegacy(res.items, site); return scrapeLegacy(res.items, site);
@ -181,7 +178,7 @@ async function fetchLatest(site, page = 1) {
return res.status; return res.status;
} }
const res = await get(`${site.url}/${page}`); const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);
if (res.ok) { if (res.ok) {
return scrape(res.item.html, site); return scrape(res.item.html, site);
@ -204,7 +201,7 @@ async function fetchScene(url, site, release) {
} }
const { origin } = new URL(url); const { origin } = new URL(url);
const res = await get(url); const res = await qu.get(url);
if (!res.ok) { if (!res.ok) {
return res.status; return res.status;
@ -214,8 +211,8 @@ async function fetchScene(url, site, release) {
return scrapeSceneLegacy(res.item, url, site); return scrapeSceneLegacy(res.item, url, site);
} }
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) { if (!/https?:\/\/(www.)?(bangbros|gaywire).com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.'); throw new Error('Cannot fetch from this URL. Please find the scene on Bang Bros or Gaywire and try again.');
} }
return scrapeScene(res.item.html, url, site); return scrapeScene(res.item.html, url, site);

28
src/scrapers/gaywire.js Normal file
View File

@ -0,0 +1,28 @@
'use strict';
const qu = require('../utils/qu');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.thmb_ttl');
console.log(release);
return release;
});
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`https://gaywire.com/h1/websites/${channel.slug}/${page}`);
if (res.ok) {
return scrapeAll(res.items, '.echThumb');
}
return res.status;
}
module.exports = {
fetchLatest,
};

View File

@ -25,6 +25,7 @@ const fantasymassage = require('./fantasymassage');
const firstanalquest = require('./firstanalquest'); const firstanalquest = require('./firstanalquest');
const fcuk = require('./fcuk'); const fcuk = require('./fcuk');
const fullpornnetwork = require('./fullpornnetwork'); const fullpornnetwork = require('./fullpornnetwork');
const gaywire = require('./gaywire');
const girlsway = require('./girlsway'); const girlsway = require('./girlsway');
const hitzefrei = require('./hitzefrei'); const hitzefrei = require('./hitzefrei');
const hookuphotshot = require('./hookuphotshot'); const hookuphotshot = require('./hookuphotshot');
@ -117,6 +118,7 @@ const scrapers = {
firstanalquest, firstanalquest,
forbondage: porndoe, forbondage: porndoe,
fullpornnetwork, fullpornnetwork,
gaywire: bangbros,
girlsway, girlsway,
girlgirl: julesjordan, girlgirl: julesjordan,
hitzefrei, hitzefrei,