Added Gaywire, modified Bang Bros scraper to accomodate.
|
@ -142,6 +142,7 @@ A GraphQL API is available at `/graphql`, and a REST API is available at the fol
|
|||
* First Anal Quest
|
||||
* ForBondage
|
||||
* Full Porn Network (Analized, James Deen)
|
||||
* Gaywire
|
||||
* Girlsway
|
||||
* Hitzefrei
|
||||
* Hookup Hotshot
|
||||
|
|
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 113 KiB |
After Width: | Height: | Size: 1.5 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 3.9 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 3.1 KiB |
After Width: | Height: | Size: 2.4 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 2.2 KiB |
After Width: | Height: | Size: 3.8 KiB |
After Width: | Height: | Size: 5.5 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 900 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 579 KiB |
After Width: | Height: | Size: 813 KiB |
After Width: | Height: | Size: 610 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 90 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 50 KiB |
|
@ -244,6 +244,12 @@ const networks = [
|
|||
url: 'https://www.fullpornnetwork.com',
|
||||
description: 'FullPornNetwork.com is the latest and greatest for one stop shop porn sites. Check out the expanding library of the multi-site network. All of fan\'s favorite content from ANALIZED.COM, DTFsluts.com, YourMomDoesPorn.com and many more. Give die hard porn fans access to an array of premium content available in 4k and 1080p. Full access included streaming hd and unlimited downloads. Be exclusive, be a member to FullPornNetwork.com Today.',
|
||||
},
|
||||
{
|
||||
slug: 'gaywire',
|
||||
name: 'Gaywire',
|
||||
url: 'https://www.gaywire.com',
|
||||
tags: ['gay'],
|
||||
},
|
||||
{
|
||||
slug: 'girlsway',
|
||||
name: 'Girlsway',
|
||||
|
|
|
@ -2946,6 +2946,130 @@ const sites = [
|
|||
tags: ['facefucking', 'blowjob'],
|
||||
parent: 'fullpornnetwork',
|
||||
},
|
||||
// GAYWIRE
|
||||
{
|
||||
slug: 'outinpublic',
|
||||
name: 'Out In Public',
|
||||
url: 'http://www.outinpublic.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/barebackcasting',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'rubhim',
|
||||
name: 'Rub Him',
|
||||
url: 'http://www.rubhim.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
hasLogo: false,
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/rubhim',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'ungloryhole',
|
||||
name: 'UngloryHole',
|
||||
url: 'http://www.ungloryhole.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/ungloryhole',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'barebackcasting',
|
||||
name: 'Bareback Casting',
|
||||
url: 'https://gaywire.com/h1/websites/barebackcasting',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
hasLogo: false,
|
||||
},
|
||||
{
|
||||
slug: 'itsgonnahurt',
|
||||
name: 'It\'s Gonna Hurt',
|
||||
url: 'http://www.itsgonnahurt.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/itsgonnahurt',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'barebackattack',
|
||||
name: 'Bareback Attack',
|
||||
url: 'https://gaywire.com/h1/websites/barebackattack',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
hasLogo: false,
|
||||
},
|
||||
{
|
||||
slug: 'hazehim',
|
||||
name: 'Haze Him',
|
||||
url: 'http://www.hazehim.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/hazehim',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'thughunter',
|
||||
name: 'Thug Hunter',
|
||||
url: 'http://www.thughunter.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/thughunter',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'poundhisass',
|
||||
name: 'Pound His Ass',
|
||||
url: 'https://gaywire.com/h1/websites/poundhisass',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
hasLogo: false,
|
||||
},
|
||||
{
|
||||
slug: 'exbf',
|
||||
name: 'ExBF',
|
||||
url: 'http://www.exbf.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/exbf',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'sausageparty',
|
||||
name: 'Sausage Party',
|
||||
url: 'http://www.sausageparty.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/sausageparty',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'projectcitybus',
|
||||
name: 'Project City Bus',
|
||||
url: 'http://www.projectcitybus.com',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
parameters: {
|
||||
latest: 'https://gaywire.com/h1/websites/projectcitybus',
|
||||
},
|
||||
},
|
||||
{
|
||||
slug: 'urbaninvasion',
|
||||
name: 'Urban Invasion',
|
||||
url: 'https://gaywire.com/h1/websites/urbaninvasion',
|
||||
tags: ['gay'],
|
||||
parent: 'gaywire',
|
||||
hasLogo: false,
|
||||
},
|
||||
// GIRLSWAY
|
||||
{
|
||||
slug: 'girlsway',
|
||||
|
@ -9361,7 +9485,7 @@ exports.seed = knex => Promise.resolve()
|
|||
priority: site.priority,
|
||||
independent: !!site.independent,
|
||||
visible: site.visible,
|
||||
has_logo: site.hasLogo,
|
||||
has_logo: site.hasLogo === undefined ? true : site.hasLogo,
|
||||
}));
|
||||
|
||||
const { inserted, updated } = await upsert('entities', sitesWithNetworks, ['slug', 'type'], knex);
|
||||
|
|
|
@ -7,66 +7,57 @@ const moment = require('moment');
|
|||
const logger = require('../logger')(__filename);
|
||||
const slugify = require('../utils/slugify');
|
||||
const http = require('../utils/http');
|
||||
const { get, getAll, ex } = require('../utils/q');
|
||||
const qu = require('../utils/qu');
|
||||
|
||||
function scrape(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const sceneElements = $('.echThumb').toArray();
|
||||
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = site.parameters?.legacy
|
||||
? `https://${site.url}{sceneLinkElement.attr('href')}`
|
||||
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
const entryId = url.split('/')[3].slice(5);
|
||||
const release = {};
|
||||
|
||||
const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
|
||||
release.title = sceneLinkElement.attr('title');
|
||||
release.url = site.parameters?.legacy
|
||||
? `${site.url}{sceneLinkElement.attr('href')}`
|
||||
: `${site.parent.url}${sceneLinkElement.attr('href')}`;
|
||||
|
||||
release.shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
release.entryId = new URL(release.url).pathname.match(/video(\d+)/)?.[1];
|
||||
|
||||
release.date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
|
||||
release.actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
|
||||
|
||||
const photoElement = $(element).find('.rollover-image');
|
||||
const poster = `https:${photoElement.attr('data-original')}`;
|
||||
|
||||
const photosUrl = photoElement.attr('data-rollover-url');
|
||||
const photosMaxIndex = photoElement.attr('data-rollover-max-index');
|
||||
const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
|
||||
const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
const channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
release.poster = `https:${photoElement.attr('data-original')}`;
|
||||
release.photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
|
||||
|
||||
return {
|
||||
url,
|
||||
entryId,
|
||||
shootId,
|
||||
title,
|
||||
actors,
|
||||
date,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
release.duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();
|
||||
release.channel = $(element).find('a[href*="/websites"]').attr('href').split('/').slice(-1)[0];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeLegacy(scenes, site) {
|
||||
return scenes.map(({ qu }) => {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
const pathname = qu.url('.mainplayer a, .palyer a'); // sic
|
||||
const pathname = query.url('.mainplayer a, .palyer a'); // sic
|
||||
release.url = `${site.url}${pathname}`;
|
||||
release.entryId = pathname.match(/video\d+/)?.[0];
|
||||
release.entryId = pathname.match(/video(\d+)/)?.[1];
|
||||
|
||||
release.title = qu.q('h2', true);
|
||||
release.date = qu.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.description = qu.q('div + .videoDisc p', true);
|
||||
release.duration = qu.dur('.videoTag .title');
|
||||
release.title = query.q('h2', true);
|
||||
release.date = query.date('div:not(.videoDisc)', 'MMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
release.description = query.q('div + .videoDisc p', true);
|
||||
release.duration = query.dur('.videoTag .title');
|
||||
|
||||
release.poster = qu.img('.mainplayer img, .palyer img'); // sic
|
||||
release.photos = qu.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
|
||||
release.poster = query.img('.mainplayer img, .palyer img'); // sic
|
||||
release.photos = query.imgs('article img').concat(qu.imgs('article img', 'data-original')).filter(Boolean);
|
||||
|
||||
return release;
|
||||
});
|
||||
|
@ -102,32 +93,38 @@ function scrapeUpcoming(html, site) {
|
|||
*/
|
||||
|
||||
function scrapeScene(html, url, _site) {
|
||||
const { qu } = ex(html, '.playerSection');
|
||||
const { query } = qu.ex(html, '.playerSection');
|
||||
const release = {};
|
||||
|
||||
[release.shootId] = qu.q('.vdoTags + .vdoCast', true).match(/\w+$/);
|
||||
[release.entryId] = url.split('/')[3].match(/\d+$/);
|
||||
release.title = qu.q('.ps-vdoHdd h1', true);
|
||||
release.description = qu.q('.vdoDesc', true);
|
||||
const { pathname, hostname } = new URL(url);
|
||||
|
||||
release.actors = qu.all('a[href*="/model"]', true);
|
||||
release.tags = qu.all('.vdoTags a', true);
|
||||
[release.shootId] = query.cnt('.vdoTags + .vdoCast')?.match(/\w+$/) || [];
|
||||
release.entryId = pathname.match(/video(\d+)/)?.[1];
|
||||
|
||||
release.stars = Number(qu.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
release.title = query.cnt('.ps-vdoHdd h1');
|
||||
release.description = query.cnt('.vdoDesc');
|
||||
|
||||
const poster = qu.img('img#player-overlay-image');
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
release.actors = query.all('a[href*="/model"]', true);
|
||||
release.tags = query.all('.vdoTags a', true);
|
||||
|
||||
release.trailer = { src: qu.trailer() };
|
||||
release.stars = Number(query.q('div[class*="like"]', true).match(/^\d+/)[0]) / 20;
|
||||
|
||||
const poster = query.img('img#player-overlay-image, img.playerPic');
|
||||
|
||||
if (poster) {
|
||||
release.poster = [
|
||||
poster,
|
||||
poster.replace('/big_trailer', '/members/450x340'), // load error fallback
|
||||
];
|
||||
}
|
||||
|
||||
release.trailer = query.trailer() || qu.prefixUrl(html.match(/'(\/\/trailers.*mp4)'/)?.[1], hostname);
|
||||
|
||||
// all scenes seem to have 12 album photos available, not always included on the page
|
||||
const firstPhotoUrl = ex(html).qu.img('img[data-slider-index="1"]');
|
||||
const firstPhotoUrl = qu.ex(html).query.img('img[data-slider-index="1"]');
|
||||
release.photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
|
||||
|
||||
const [channel] = qu.url('a[href*="/websites"]').match(/\w+$/);
|
||||
const [channel] = query.url('a[href*="/websites"]').match(/\w+$/);
|
||||
|
||||
if (channel === 'bangcasting') release.channel = 'bangbroscasting';
|
||||
if (channel === 'remaster') release.channel = 'bangbrosremastered';
|
||||
|
@ -136,25 +133,25 @@ function scrapeScene(html, url, _site) {
|
|||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneLegacy({ qu }, url) {
|
||||
function scrapeSceneLegacy({ query }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/video\d+/)?.[0];
|
||||
|
||||
release.title = qu.q('h1', true);
|
||||
release.description = qu.q('.videoDetail', true);
|
||||
release.duration = qu.dur('.tags p span');
|
||||
release.title = query.q('h1', true);
|
||||
release.description = query.q('.videoDetail', true);
|
||||
release.duration = query.dur('.tags p span');
|
||||
|
||||
release.poster = qu.img('#video_container + div img, .videoOverlay img');
|
||||
release.poster = query.img('#video_container + div img, .videoOverlay img');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html, scope) {
|
||||
const { q } = ex(html);
|
||||
const { query } = qu.ex(html);
|
||||
const profile = {};
|
||||
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
const avatar = query.q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = scrape(html, scope.network);
|
||||
|
@ -163,16 +160,16 @@ function scrapeProfile(html, scope) {
|
|||
}
|
||||
|
||||
function scrapeProfileSearch(html, actorName) {
|
||||
const { qu } = ex(html);
|
||||
const actorLink = qu.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
const { query } = qu.ex(html);
|
||||
const actorLink = query.url(`a[title="${actorName}" i][href*="model"]`);
|
||||
|
||||
return actorLink ? `https://bangbros.com${actorLink}` : null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters?.legacy) {
|
||||
const url = `${site.url}/videos/${page}`;
|
||||
const res = await getAll(url, '.videoList');
|
||||
const url = `${site.parameters?.latest || site.url}/videos/${page}`;
|
||||
const res = await qu.getAll(url, '.videoList');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeLegacy(res.items, site);
|
||||
|
@ -181,7 +178,7 @@ async function fetchLatest(site, page = 1) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
const res = await get(`${site.url}/${page}`);
|
||||
const res = await qu.get(`${site.parameters?.latest || site.url}/${page}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrape(res.item.html, site);
|
||||
|
@ -204,7 +201,7 @@ async function fetchScene(url, site, release) {
|
|||
}
|
||||
|
||||
const { origin } = new URL(url);
|
||||
const res = await get(url);
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (!res.ok) {
|
||||
return res.status;
|
||||
|
@ -214,8 +211,8 @@ async function fetchScene(url, site, release) {
|
|||
return scrapeSceneLegacy(res.item, url, site);
|
||||
}
|
||||
|
||||
if (!/https?:\/\/(www.)?bangbros.com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.');
|
||||
if (!/https?:\/\/(www.)?(bangbros|gaywire).com\/?$/.test(origin)) {
|
||||
throw new Error('Cannot fetch from this URL. Please find the scene on Bang Bros or Gaywire and try again.');
|
||||
}
|
||||
|
||||
return scrapeScene(res.item.html, url, site);
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.cnt('.thmb_ttl');
|
||||
|
||||
console.log(release);
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`https://gaywire.com/h1/websites/${channel.slug}/${page}`);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, '.echThumb');
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
};
|
|
@ -25,6 +25,7 @@ const fantasymassage = require('./fantasymassage');
|
|||
const firstanalquest = require('./firstanalquest');
|
||||
const fcuk = require('./fcuk');
|
||||
const fullpornnetwork = require('./fullpornnetwork');
|
||||
const gaywire = require('./gaywire');
|
||||
const girlsway = require('./girlsway');
|
||||
const hitzefrei = require('./hitzefrei');
|
||||
const hookuphotshot = require('./hookuphotshot');
|
||||
|
@ -117,6 +118,7 @@ const scrapers = {
|
|||
firstanalquest,
|
||||
forbondage: porndoe,
|
||||
fullpornnetwork,
|
||||
gaywire: bangbros,
|
||||
girlsway,
|
||||
girlgirl: julesjordan,
|
||||
hitzefrei,
|
||||
|
|