diff --git a/.gitignore b/.gitignore index d59fb887..64ed4414 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,4 @@ node_modules/ dist/ config/* !config/default.js -src/utils/link-result.json db.sqlite diff --git a/README.md b/README.md index 657d61d3..959f813b 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js * `--debug`: Show full error stack trace. ## Supported networks & sites -233 sites on 14 networks, continuously expanding! +285 sites on 15 networks, continuously expanding! * **21Sextury** * Anal Teen Angels * Asshole Fever @@ -38,6 +38,59 @@ Do not modify `config/default.js`, but instead create a copy at `config/local.js * Gapeland * Lez Cuties * Pix and Video +* **Bang Bros** + * Ass Parade + * AvaSpice + * Back Room Facials + * Backroom MILF + * Ball Honeys + * Bang Bus + * Bang Casting + * Bang POV + * Bang Tryouts + * BangBros 18 + * BangBros Angels + * BangBros Remastered + * Bangbros Clips + * Big Mouthfuls + * Big Tit Cream Pie + * Big Tits, Round Asses + * BlowJob Fridays + * Blowjob Ninjas + * Boob Squad + * Brown Bunnies + * Can He Score? + * Casting + * Chongas + * Colombia Fuck Fest + * Dirty World Tour + * Dorm Invasion + * Facial Fest + * Fuck Team Five + * Glory Hole Loads + * Latina Rampage + * Living With Anna + * MILF Lessons + * Magical Feet + * Milf Soup + * MomIsHorny + * Monsters of Cock + * Mr CamelToe + * Mr. Anal + * My Dirty Maid + * My Life In Brazil + * Newbie Black + * Party of 3 + * Pawg + * Penny Show + * Porn Star Spa + * Power Munch + * Public Bang + * Slutty White Girls + * Stepmom Videos + * Street Ranger + * Tugjobs + * Working Latinas * **Blowpass** * 1000 Facials * Immoral Live diff --git a/config/default.js b/config/default.js index 187aab48..69ecefea 100644 --- a/config/default.js +++ b/config/default.js @@ -49,11 +49,15 @@ module.exports = { }, { value: 'network', - width: 20, + width: 15, }, { value: 'site', - width: 30, + width: 25, + }, + { + value: 'shootId', + width: 15, }, { value: 'title', diff --git a/seeds/sites.js b/seeds/sites.js index 201a0b93..a69ecfdc 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -91,7 +91,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Ass Parade', - url: 'http://assparade.com', + url: 'https://bangbros.com/websites/assparade', description: null, parameters: null, }, @@ -109,7 +109,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Back Room Facials', - url: 'http://backroomfacials.com', + url: 'https://bangbros.com/websites/backroomfacials', description: null, parameters: null, }, @@ -118,7 +118,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Backroom MILF', - url: 'http://backroommilf.com', + url: 'https://bangbros.com/websites/backroommilf', description: null, parameters: null, }, @@ -136,7 +136,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'BangBros 18', - url: 'http://bangbros18.com', + url: 'https://bangbros.com/websites/bangbros18', description: null, parameters: null, }, @@ -154,7 +154,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Bangbros Clips', - url: 'http://bangbrosclips.com', + url: 'https://bangbros.com/websites/bangbrosclips', description: null, parameters: null, }, @@ -163,7 +163,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'BangBros Remastered', - url: 'https://bangbros.com/websites/remaster', + url: 'https://bangbros.com/websites/bangbrosremastered', description: null, parameters: null, }, @@ -172,7 +172,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Bang Bus', - url: 'http://bangbus.com', + url: 'https://bangbros.com/websites/bangbus', description: null, parameters: null, }, @@ -190,7 +190,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Bang POV', - url: 'http://bangpov.com', + url: 'https://bangbros.com/websites/bangpov', description: null, parameters: null, }, @@ -208,7 +208,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Big Mouthfuls', - url: 'http://bigmouthfuls.com', + url: 'https://bangbros.com/websites/bigmouthfuls', description: null, parameters: null, }, @@ -217,7 +217,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Big Tit Cream Pie', - url: 'http://bigtitcreampie.com', + url: 'https://bangbros.com/websites/bigtitcreampie', description: null, parameters: null, }, @@ -226,7 +226,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Big Tits, Round Asses', - url: 'http://bigtitsroundasses.com', + url: 'https://bangbros.com/websites/bigtitsroundasses', description: null, parameters: null, }, @@ -235,7 +235,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'BlowJob Fridays', - url: 'http://blowjobfridays.com', + url: 'https://bangbros.com/websites/blowjobfridays', description: null, parameters: null, }, @@ -244,7 +244,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Blowjob Ninjas', - url: 'http://blowjobninjas.com', + url: 'https://bangbros.com/websites/blowjobninjas', description: null, parameters: null, }, @@ -253,7 +253,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Boob Squad', - url: 'http://boobsquad.com', + url: 'https://bangbros.com/websites/boobsquad', description: null, parameters: null, }, @@ -262,7 +262,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Brown Bunnies', - url: 'http://brownbunnies.com', + url: 'https://bangbros.com/websites/brownbunnies', description: null, parameters: null, }, @@ -271,7 +271,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Can He Score?', - url: 'http://canhescore.com', + url: 'https://bangbros.com/websites/canhescore', description: null, parameters: null, }, @@ -289,7 +289,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Chongas', - url: 'http://chongas.com', + url: 'https://bangbros.com/websites/chongas', description: null, parameters: null, }, @@ -298,7 +298,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Colombia Fuck Fest', - url: 'http://colombiafuckfest.com', + url: 'https://bangbros.com/websites/colombiafuckfest', description: null, parameters: null, }, @@ -316,7 +316,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Dorm Invasion', - url: 'http://dorminvasion.com', + url: 'https://bangbros.com/websites/dorminvasion', description: null, parameters: null, }, @@ -325,7 +325,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Facial Fest', - url: 'http://facialfest.com', + url: 'https://bangbros.com/websites/facialfest', description: null, parameters: null, }, @@ -334,7 +334,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Fuck Team Five', - url: 'http://fuckteamfive.com', + url: 'https://bangbros.com/websites/fuckteamfive', description: null, parameters: null, }, @@ -352,7 +352,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Latina Rampage', - url: 'http://latinarampage.com', + url: 'https://bangbros.com/websites/latinarampage', description: null, parameters: null, }, @@ -361,7 +361,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Living With Anna', - url: 'http://livingwithanna.com', + url: 'https://bangbros.com/websites/livingwithanna', description: null, parameters: null, }, @@ -370,7 +370,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Magical Feet', - url: 'http://magicalfeet.com', + url: 'https://bangbros.com/websites/magicalfeet', description: null, parameters: null, }, @@ -388,7 +388,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Milf Soup', - url: 'http://milfsoup.com', + url: 'https://bangbros.com/websites/milfsoup', description: null, parameters: null, }, @@ -397,7 +397,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'MomIsHorny', - url: 'http://momishorny.com', + url: 'https://bangbros.com/websites/momishorny', description: null, parameters: null, }, @@ -406,7 +406,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Monsters of Cock', - url: 'http://monstersofcock.com', + url: 'https://bangbros.com/websites/monstersofcock', description: null, parameters: null, }, @@ -415,7 +415,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Mr. Anal', - url: 'http://mranal.com', + url: 'https://bangbros.com/websites/mranal', description: null, parameters: null, }, @@ -433,7 +433,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'My Dirty Maid', - url: 'http://mydirtymaid.com', + url: 'https://bangbros.com/websites/mydirtymaid', description: null, parameters: null, }, @@ -442,7 +442,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'My Life In Brazil', - url: 'http://mylifeinbrazil.com', + url: 'https://bangbros.com/websites/mylifeinbrazil', description: null, parameters: null, }, @@ -460,7 +460,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Party of 3', - url: 'http://partyof3.com', + url: 'https://bangbros.com/websites/partyof3', description: null, parameters: null, }, @@ -469,7 +469,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Pawg', - url: 'http://pawg.com', + url: 'https://bangbros.com/websites/pawg', description: null, parameters: null, }, @@ -487,7 +487,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Porn Star Spa', - url: 'http://pornstarspa.com', + url: 'https://bangbros.com/websites/pornstarspa', description: null, parameters: null, }, @@ -505,7 +505,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Public Bang', - url: 'http://publicbang.com', + url: 'https://bangbros.com/websites/publicbang', description: null, parameters: null, }, @@ -523,7 +523,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Stepmom Videos', - url: 'http://stepmomvideos.com', + url: 'https://bangbros.com/websites/stepmomvideos', description: null, parameters: null, }, @@ -532,7 +532,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Street Ranger', - url: 'https://bangbros.com/websites/thewheeler', + url: 'https://bangbros.com/websites/streetranger', description: null, parameters: null, }, @@ -541,7 +541,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Tugjobs', - url: 'http://tugjobs.com', + url: 'https://bangbros.com/websites/tugjobs', description: null, parameters: null, }, @@ -550,7 +550,7 @@ exports.seed = knex => Promise.resolve() label: null, network_id: 'bangbros', name: 'Working Latinas', - url: 'http://workinglatinas.com', + url: 'https://bangbros.com/websites/workinglatinas', description: null, parameters: null, }, diff --git a/seeds/tags.js b/seeds/tags.js index aa08ebcb..3f4e76ea 100644 --- a/seeds/tags.js +++ b/seeds/tags.js @@ -57,6 +57,10 @@ exports.seed = knex => Promise.resolve() alias_for: null, group_id: 'penetration', }, + { + tag: 'amateur', + alias_for: null, + }, { tag: 'American', alias_for: null, diff --git a/src/scrapers/bangbros.js b/src/scrapers/bangbros.js new file mode 100644 index 00000000..9ffe5f74 --- /dev/null +++ b/src/scrapers/bangbros.js @@ -0,0 +1,100 @@ +'use strict'; + +/* eslint-disable newline-per-chained-call */ +const bhttp = require('bhttp'); +const cheerio = require('cheerio'); +const moment = require('moment'); + +const knex = require('../knex'); +const { matchTags } = require('../tags'); + +function scrapeLatest(html, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const sceneElements = $('.echThumb').toArray(); + + return sceneElements.map((element) => { + const sceneLinkElement = $(element).find('.thmb_lnk'); + const title = sceneLinkElement.attr('title'); + const url = `https://bangbros.com${sceneLinkElement.attr('href')}`; + const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1]; + const entryId = url.split('/')[3].slice(5); + + const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate(); + const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); + + const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds(); + + return { + url, + entryId, + shootId, + title, + actors, + date, + duration, + rating: null, + site, + }; + }); +} + +async function scrapeScene(html, url, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const sceneElement = $('.playerSection'); + + const shootId = sceneElement.find('.vdoCast:contains("Release")').text().replace('Release: ', ''); + const entryId = url.split('/')[3].slice(5); + const title = sceneElement.find('.ps-vdoHdd h1').text(); + const description = sceneElement.find('.vdoDesc').text().trim(); + + const [siteName, ...actors] = sceneElement.find('.vdoCast a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); + const siteId = siteName.replace(/[\s']+/g, '').toLowerCase(); + + const rawTags = $('.vdoTags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); + + const [channelSite, tags] = await Promise.all([ + knex('sites') + .where({ id: siteId }) + .orWhere({ name: siteName }) + .first(), + matchTags(rawTags), + ]); + + const stars = Number(sceneElement.find('.bVdPl_it_like .bVdPl_txt').text().replace('% like', '')) / 20; + + return { + url, + shootId, + entryId, + title, + description, + actors, + tags, + rating: { + stars, + }, + site: channelSite || site, + }; +} + +async function fetchLatest(site, page = 1) { + const res = await bhttp.get(`https://bangbros.com/websites/${site.id}/${page}`); + + return scrapeLatest(res.body.toString(), site); +} + +async function fetchScene(url, site) { + const { origin } = new URL(url); + const res = await bhttp.get(url); + + if (origin !== 'https://bangbros.com') { + throw new Error('Cannot fetch from this URL. Please find the scene on https://bangbros.com and try again.'); + } + + return scrapeScene(res.body.toString(), url, site); +} + +module.exports = { + fetchLatest, + fetchScene, +}; diff --git a/src/scrapers/index.js b/src/scrapers/index.js index 43b07dba..3c087e88 100644 --- a/src/scrapers/index.js +++ b/src/scrapers/index.js @@ -1,6 +1,7 @@ 'use strict'; const twentyonesextury = require('./21sextury'); +const bangbros = require('./bangbros'); const blowpass = require('./blowpass'); const brazzers = require('./brazzers'); const ddfnetwork = require('./ddfnetwork'); @@ -17,6 +18,7 @@ const xempire = require('./xempire'); module.exports = { '21sextury': twentyonesextury, + bangbros, blowpass, brazzers, ddfnetwork, diff --git a/src/utils/try-links.js b/src/utils/try-links.js deleted file mode 100644 index 2bc2fe7e..00000000 --- a/src/utils/try-links.js +++ /dev/null @@ -1,51 +0,0 @@ -'use strict'; - -const Promise = require('bluebird'); -const bhttp = require('bhttp'); -const fs = Promise.promisifyAll(require('fs')); - -const knex = require('../knex'); -const argv = require('../argv'); - -const options = { - responseTimeout: 30000, -}; - -async function tryLinks() { - const sites = await knex('sites').whereIn('network_id', argv.network); - - const results = await Promise.all(sites.map(async (site) => { - console.log(`Trying ${site.name} URLs`); - - const [resHttp, resHttpWww, resHttps, resHttpsWww] = await Promise.all([ - bhttp.get(`http://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), - bhttp.get(`http://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), - bhttp.get(`https://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), - bhttp.get(`https://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), - ]); - - console.log(`Got results for ${site.name}`); - - return { - ...site, - url: (resHttp.statusCode === 200 && `http://${site.id}.com`) - || (resHttpWww.statusCode === 200 && `http://www.${site.id}.com`) - || (resHttps.statusCode === 200 && `https://${site.id}.com`) - || (resHttpsWww.statusCode === 200 && `https://www.${site.id}.com`) - || site.url, - network_id: site.network_id, - }; - })); - - const sortedResults = results.sort((siteA, siteB) => { - if (siteA.id > siteB.id) return 1; - if (siteA.id < siteB.id) return -1; - - return 0; - }); - - console.log(sortedResults); - await fs.writeFileAsync('./src/utils/link-results.json', JSON.stringify(sortedResults, null, 4)); -} - -tryLinks();