Added Gaywire, modified Bang Bros scraper to accomodate.

This commit is contained in:
DebaucheryLibrarian 2021-01-17 01:43:55 +01:00
parent 8387f676fc
commit 251bb9476d
44 changed files with 228 additions and 70 deletions

View File

@ -142,6 +142,7 @@ A GraphQL API is available at `/graphql`, and a REST API is available at the fol
* First Anal Quest
* ForBondage
* Full Porn Network (Analized, James Deen)
* Gaywire
* Girlsway
* Hitzefrei
* Hookup Hotshot

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 579 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 813 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 610 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -244,6 +244,12 @@ const networks = [
url: 'https://www.fullpornnetwork.com',
description: 'FullPornNetwork.com is the latest and greatest for one stop shop porn sites. Check out the expanding library of the multi-site network. All of fan\'s favorite content from ANALIZED.COM, DTFsluts.com, YourMomDoesPorn.com and many more. Give die hard porn fans access to an array of premium content available in 4k and 1080p. Full access included streaming hd and unlimited downloads. Be exclusive, be a member to FullPornNetwork.com Today.',
},
{
slug: 'gaywire',
name: 'Gaywire',
url: 'https://www.gaywire.com',
tags: ['gay'],
},
{
slug: 'girlsway',
name: 'Girlsway',

View File

@ -2946,6 +2946,130 @@ const sites = [
tags: ['facefucking', 'blowjob'],
parent: 'fullpornnetwork',
},
// GAYWIRE
{
slug: 'outinpublic',
name: 'Out In Public',
url: 'http://www.outinpublic.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/barebackcasting',
},
},
{
slug: 'rubhim',
name: 'Rub Him',
url: 'http://www.rubhim.com',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
parameters: {
latest: 'https://gaywire.com/h1/websites/rubhim',
},
},
{
slug: 'ungloryhole',
name: 'UngloryHole',
url: 'http://www.ungloryhole.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/ungloryhole',
},
},
{
slug: 'barebackcasting',
name: 'Bareback Casting',
url: 'https://gaywire.com/h1/websites/barebackcasting',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'itsgonnahurt',
name: 'It\'s Gonna Hurt',
url: 'http://www.itsgonnahurt.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/itsgonnahurt',
},
},
{
slug: 'barebackattack',
name: 'Bareback Attack',
url: 'https://gaywire.com/h1/websites/barebackattack',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'hazehim',
name: 'Haze Him',
url: 'http://www.hazehim.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/hazehim',
},
},
{
slug: 'thughunter',
name: 'Thug Hunter',
url: 'http://www.thughunter.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/thughunter',
},
},
{
slug: 'poundhisass',
name: 'Pound His Ass',
url: 'https://gaywire.com/h1/websites/poundhisass',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
{
slug: 'exbf',
name: 'ExBF',
url: 'http://www.exbf.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/exbf',
},
},
{
slug: 'sausageparty',
name: 'Sausage Party',
url: 'http://www.sausageparty.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/sausageparty',
},
},
{
slug: 'projectcitybus',
name: 'Project City Bus',
url: 'http://www.projectcitybus.com',
tags: ['gay'],
parent: 'gaywire',
parameters: {
latest: 'https://gaywire.com/h1/websites/projectcitybus',
},
},
{
slug: 'urbaninvasion',
name: 'Urban Invasion',
url: 'https://gaywire.com/h1/websites/urbaninvasion',
tags: ['gay'],
parent: 'gaywire',
hasLogo: false,
},
// GIRLSWAY
{
slug: 'girlsway',
@ -9361,7 +9485,7 @@ exports.seed = knex => Promise.resolve()
priority: site.priority,
independent: !!site.independent,
visible: site.visible,
has_logo: site.hasLogo,
has_logo: site.hasLogo === undefined ? true : site.hasLogo,
}));
const { inserted, updated } = await upsert('entities', sitesWithNetworks, ['slug', 'type'], knex);

View File

@ -7,66 +7,57 @@ const moment = require('moment');
const logger = require('../logger')(__filename);
const slugify = require('../utils/slugify');
const http = require('../utils/http');
const { get, getAll, ex } = require('../utils/q');
const qu = require('../utils/qu');
function scrape(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const sceneElements = $('.echThumb').toArray();
return sceneElements.map((element) => {
const sceneLinkElement = $(element).find('.thmb_lnk');
const title = sceneLinkElement.attr('title');
const url = site.parameters?.legacy
? `https://${site.url}{sceneLinkElement.attr('href')}`
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
const entryId = url.split('/')[3].slice(5);
const release = {};
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const sceneLinkElement = $(element).find('.thmb_lnk');
release.title = sceneLinkElement.attr('title');
release.url = site.parameters?.legacy
? `${site.url}{sceneLinkElement.attr('href')}`
: `${site.parent.url}${sceneLinkElement.attr('href')}`;
release.shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
release.entryId = new URL(release.url).pathname.match(/video(\d+)/)?.[1];
release.date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
release.actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
const photoElement = $(element).find('.rollover-image');
const poster = `https:${photoElement.attr('data-original')}`;
const photosUrl = photoElement.attr('data-rollover-url');
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
release.poster = `https:${photoElement.attr('data-original')}`;
release.photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
return {
url,
entryId,
shootId,
title,
actors,
date,
duration,
poster,
photos,
rating: null,
site,
channel,
};
release.duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
release.channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
return release;
});
}
function scrapeLegacy(scenes, site) {
return scenes.map(({ qu }) => {
return scenes.map(({ query }) => {
const release = {};
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
const pathname = query.url('.mainplayer a, .palyer a'); // sic
release.url = `${site.url}${pathname}`;
release.entryId = pathname.match(/video\d+/)?.[0];
release.entryId = pathname.match(/video(\d+)/)?.[1];
release.title = qu.q('h2', true);
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = qu.q('div + .videoDisc p', true);
release.duration = qu.dur('.videoTag .title');
release.title = query.q('h2', true);
release.date = query.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
release.description = query.q('div + .videoDisc p', true);
release.duration = query.dur('.videoTag .title');
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
release.poster = query.img('.mainplayer img, .palyer img'); // sic
release.photos = query.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
return release;
});
@ -102,32 +93,38 @@ function scrapeUpcoming(html, site) {
*/
function scrapeScene(html, url, _site) {
const { qu } = ex(html, '.playerSection');
const { query } = qu.ex(html, '.playerSection');
const release = {};
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
[release.entryId] = url.split('/')[3].match(/\d+$/);
release.title = qu.q('.ps-vdoHdd h1', true);
release.description = qu.q('.vdoDesc', true);
const { pathname, hostname } = new URL(url);
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('.vdoTags a', true);
[release.shootId] = query.cnt('.vdoTags + .vdoCast')?.match(/\w+$/) || [];
release.entryId = pathname.match(/video(\d+)/)?.[1];
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
release.title = query.cnt('.ps-vdoHdd h1');
release.description = query.cnt('.vdoDesc');
const poster = qu.img('img#player-overlay-image');
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
release.actors = query.all('a[href*="/model"]', true);
release.tags = query.all('.vdoTags a', true);
release.trailer = { src: qu.trailer() };
release.stars = Number(query.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
const poster = query.img('img#player-overlay-image, img.playerPic');
if (poster) {
release.poster = [
poster,
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
];
}
release.trailer = query.trailer() || qu.prefixUrl(html.match(/'(\/\/trailers.*mp4)'/)?.[1], hostname);
// all scenes seem to have 12 album photos available, not always included on the page
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
const firstPhotoUrl = qu.ex(html).query.img('img[data-slider-index="1"]');
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
const [channel] = query.url('a[href*="/websites"]').match(/\w+$/);
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
if (channel === 'remaster') release.channel = 'bangbrosremastered';
@ -136,25 +133,25 @@ function scrapeScene(html, url, _site) {
return release;
}
function scrapeSceneLegacy({ qu }, url) {
function scrapeSceneLegacy({ query }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
release.title = qu.q('h1', true);
release.description = qu.q('.videoDetail', true);
release.duration = qu.dur('.tags p span');
release.title = query.q('h1', true);
release.description = query.q('.videoDetail', true);
release.duration = query.dur('.tags p span');
release.poster = qu.img('#video_container + div img, .videoOverlay img');
release.poster = query.img('#video_container + div img, .videoOverlay img');
return release;
}
function scrapeProfile(html, scope) {
const { q } = ex(html);
const { query } = qu.ex(html);
const profile = {};
const avatar = q('.profilePic img', 'src');
const avatar = query.q('.profilePic img', 'src');
if (avatar) profile.avatar = `https:${avatar}`;
profile.releases = scrape(html, scope.network);
@ -163,16 +160,16 @@ function scrapeProfile(html, scope) {
}
function scrapeProfileSearch(html, actorName) {
const { qu } = ex(html);
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
const { query } = qu.ex(html);
const actorLink = query.url(`a[title="${actorName}" i][href*="model"]`);
return actorLink ? `https://bangbros.com${actorLink}` : null;
}
async function fetchLatest(site, page = 1) {
if (site.parameters?.legacy) {
const url = `${site.url}/videos/${page}`;
const res = await getAll(url, '.videoList');
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
const res = await qu.getAll(url, '.videoList');
if (res.ok) {
return scrapeLegacy(res.items, site);
@ -181,7 +178,7 @@ async function fetchLatest(site, page = 1) {
return res.status;
}
const res = await get(`${site.url}/${page}`);
const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);
if (res.ok) {
return scrape(res.item.html, site);
@ -204,7 +201,7 @@ async function fetchScene(url, site, release) {
}
const { origin } = new URL(url);
const res = await get(url);
const res = await qu.get(url);
if (!res.ok) {
return res.status;
@ -214,8 +211,8 @@ async function fetchScene(url, site, release) {
return scrapeSceneLegacy(res.item, url, site);
}
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
if (!/https?:\/\/(www.)?(bangbros|gaywire).com\/?$/.test(origin)) {
throw new Error('Cannot fetch from this URL. Please find the scene on Bang Bros or Gaywire and try again.');
}
return scrapeScene(res.item.html, url, site);

28
src/scrapers/gaywire.js Normal file
View File

@ -0,0 +1,28 @@
'use strict';
const qu = require('../utils/qu');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.thmb_ttl');
console.log(release);
return release;
});
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`https://gaywire.com/h1/websites/${channel.slug}/${page}`);
if (res.ok) {
return scrapeAll(res.items, '.echThumb');
}
return res.status;
}
module.exports = {
fetchLatest,
};

View File

@ -25,6 +25,7 @@ const fantasymassage = require('./fantasymassage');
const firstanalquest = require('./firstanalquest');
const fcuk = require('./fcuk');
const fullpornnetwork = require('./fullpornnetwork');
const gaywire = require('./gaywire');
const girlsway = require('./girlsway');
const hitzefrei = require('./hitzefrei');
const hookuphotshot = require('./hookuphotshot');
@ -117,6 +118,7 @@ const scrapers = {
firstanalquest,
forbondage: porndoe,
fullpornnetwork,
gaywire: bangbros,
girlsway,
girlgirl: julesjordan,
hitzefrei,