From bc01cbe6dc267afe4bed953578d0da207b7ab04f Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Wed, 10 Apr 2019 03:42:20 +0200 Subject: [PATCH] Added Bang Bros data (no scraper yet). Added 21Sextury scene fetch date fallback. --- .gitignore | 1 + config/default.js | 4 + seeds/networks.js | 6 + seeds/sites.js | 469 +++++++++++++++++++++++++++++++++++ src/fetch-releases.js | 31 ++- src/fetch-scene.js | 6 +- src/scrapers/21sextury.js | 6 +- src/scrapers/realitykings.js | 2 +- src/tui/formatters.js | 1 + src/utils/try-links.js | 51 ++++ 10 files changed, 566 insertions(+), 11 deletions(-) create mode 100644 src/utils/try-links.js diff --git a/.gitignore b/.gitignore index 64ed4414..d59fb887 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules/ dist/ config/* !config/default.js +src/utils/link-result.json db.sqlite diff --git a/config/default.js b/config/default.js index 04a13fb4..187aab48 100644 --- a/config/default.js +++ b/config/default.js @@ -47,6 +47,10 @@ module.exports = { format: 'MMM DD, YYYY', width: 14, }, + { + value: 'network', + width: 20, + }, { value: 'site', width: 30, diff --git a/seeds/networks.js b/seeds/networks.js index ee6423cc..22137894 100644 --- a/seeds/networks.js +++ b/seeds/networks.js @@ -10,6 +10,12 @@ exports.seed = knex => Promise.resolve() url: 'https://www.21sextury.com', description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.', }, + { + id: 'bangbros', + name: 'Bang Bros', + url: 'https://bangbros.com', + description: 'Here at Bang Bros, we only film the best highest quality porn with the sexiest Amateur girls and the top pornstars. Updated daily on Bangbros.com.', + }, { id: 'blowpass', name: 'Blowpass', diff --git a/seeds/sites.js b/seeds/sites.js index 4d049e8d..201a0b93 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -85,6 +85,475 @@ exports.seed = knex => Promise.resolve() network_id: '21sextury', parameters: JSON.stringify({ filter: true }), }, + // BANGBROS + { + id: 'assparade', + label: null, + network_id: 'bangbros', + name: 'Ass Parade', + url: 'http://assparade.com', + description: null, + parameters: null, + }, + { + id: 'avaspice', + label: null, + network_id: 'bangbros', + name: 'AvaSpice', + url: 'https://bangbros.com/websites/avaspice', + description: null, + parameters: null, + }, + { + id: 'backroomfacials', + label: null, + network_id: 'bangbros', + name: 'Back Room Facials', + url: 'http://backroomfacials.com', + description: null, + parameters: null, + }, + { + id: 'backroommilf', + label: null, + network_id: 'bangbros', + name: 'Backroom MILF', + url: 'http://backroommilf.com', + description: null, + parameters: null, + }, + { + id: 'ballhoneys', + label: null, + network_id: 'bangbros', + name: 'Ball Honeys', + url: 'https://bangbros.com/websites/ballhoneys', + description: null, + parameters: null, + }, + { + id: 'bangbros18', + label: null, + network_id: 'bangbros', + name: 'BangBros 18', + url: 'http://bangbros18.com', + description: null, + parameters: null, + }, + { + id: 'bangbrosangels', + label: null, + network_id: 'bangbros', + name: 'BangBros Angels', + url: 'https://bangbros.com/websites/bangbrosangels', + description: null, + parameters: null, + }, + { + id: 'bangbrosclips', + label: null, + network_id: 'bangbros', + name: 'Bangbros Clips', + url: 'http://bangbrosclips.com', + description: null, + parameters: null, + }, + { + id: 'bangbrosremastered', + label: null, + network_id: 'bangbros', + name: 'BangBros Remastered', + url: 'https://bangbros.com/websites/remaster', + description: null, + parameters: null, + }, + { + id: 'bangbus', + label: null, + network_id: 'bangbros', + name: 'Bang Bus', + url: 'http://bangbus.com', + description: null, + parameters: null, + }, + { + id: 'bangcasting', + label: null, + network_id: 'bangbros', + name: 'Bang Casting', + url: 'https://bangbros.com/websites/bangcasting', + description: null, + parameters: null, + }, + { + id: 'bangpov', + label: null, + network_id: 'bangbros', + name: 'Bang POV', + url: 'http://bangpov.com', + description: null, + parameters: null, + }, + { + id: 'bangtryouts', + label: null, + network_id: 'bangbros', + name: 'Bang Tryouts', + url: 'https://bangbros.com/websites/bangtryouts', + description: null, + parameters: null, + }, + { + id: 'bigmouthfuls', + label: null, + network_id: 'bangbros', + name: 'Big Mouthfuls', + url: 'http://bigmouthfuls.com', + description: null, + parameters: null, + }, + { + id: 'bigtitcreampie', + label: null, + network_id: 'bangbros', + name: 'Big Tit Cream Pie', + url: 'http://bigtitcreampie.com', + description: null, + parameters: null, + }, + { + id: 'bigtitsroundasses', + label: null, + network_id: 'bangbros', + name: 'Big Tits, Round Asses', + url: 'http://bigtitsroundasses.com', + description: null, + parameters: null, + }, + { + id: 'blowjobfridays', + label: null, + network_id: 'bangbros', + name: 'BlowJob Fridays', + url: 'http://blowjobfridays.com', + description: null, + parameters: null, + }, + { + id: 'blowjobninjas', + label: null, + network_id: 'bangbros', + name: 'Blowjob Ninjas', + url: 'http://blowjobninjas.com', + description: null, + parameters: null, + }, + { + id: 'boobsquad', + label: null, + network_id: 'bangbros', + name: 'Boob Squad', + url: 'http://boobsquad.com', + description: null, + parameters: null, + }, + { + id: 'brownbunnies', + label: null, + network_id: 'bangbros', + name: 'Brown Bunnies', + url: 'http://brownbunnies.com', + description: null, + parameters: null, + }, + { + id: 'canhescore', + label: null, + network_id: 'bangbros', + name: 'Can He Score?', + url: 'http://canhescore.com', + description: null, + parameters: null, + }, + { + id: 'casting', + label: null, + network_id: 'bangbros', + name: 'Casting', + url: 'https://bangbros.com/websites/casting', + description: null, + parameters: null, + }, + { + id: 'chongas', + label: null, + network_id: 'bangbros', + name: 'Chongas', + url: 'http://chongas.com', + description: null, + parameters: null, + }, + { + id: 'colombiafuckfest', + label: null, + network_id: 'bangbros', + name: 'Colombia Fuck Fest', + url: 'http://colombiafuckfest.com', + description: null, + parameters: null, + }, + { + id: 'dirtyworldtour', + label: null, + network_id: 'bangbros', + name: 'Dirty World Tour', + url: 'https://bangbros.com/websites/dirtyworldtour', + description: null, + parameters: null, + }, + { + id: 'dorminvasion', + label: null, + network_id: 'bangbros', + name: 'Dorm Invasion', + url: 'http://dorminvasion.com', + description: null, + parameters: null, + }, + { + id: 'facialfest', + label: null, + network_id: 'bangbros', + name: 'Facial Fest', + url: 'http://facialfest.com', + description: null, + parameters: null, + }, + { + id: 'fuckteamfive', + label: null, + network_id: 'bangbros', + name: 'Fuck Team Five', + url: 'http://fuckteamfive.com', + description: null, + parameters: null, + }, + { + id: 'gloryholeloads', + label: null, + network_id: 'bangbros', + name: 'Glory Hole Loads', + url: 'https://bangbros.com/websites/gloryholeloads', + description: null, + parameters: null, + }, + { + id: 'latinarampage', + label: null, + network_id: 'bangbros', + name: 'Latina Rampage', + url: 'http://latinarampage.com', + description: null, + parameters: null, + }, + { + id: 'livingwithanna', + label: null, + network_id: 'bangbros', + name: 'Living With Anna', + url: 'http://livingwithanna.com', + description: null, + parameters: null, + }, + { + id: 'magicalfeet', + label: null, + network_id: 'bangbros', + name: 'Magical Feet', + url: 'http://magicalfeet.com', + description: null, + parameters: null, + }, + { + id: 'milflessons', + label: null, + network_id: 'bangbros', + name: 'MILF Lessons', + url: 'https://bangbros.com/websites/milflessons', + description: null, + parameters: null, + }, + { + id: 'milfsoup', + label: null, + network_id: 'bangbros', + name: 'Milf Soup', + url: 'http://milfsoup.com', + description: null, + parameters: null, + }, + { + id: 'momishorny', + label: null, + network_id: 'bangbros', + name: 'MomIsHorny', + url: 'http://momishorny.com', + description: null, + parameters: null, + }, + { + id: 'monstersofcock', + label: null, + network_id: 'bangbros', + name: 'Monsters of Cock', + url: 'http://monstersofcock.com', + description: null, + parameters: null, + }, + { + id: 'mranal', + label: null, + network_id: 'bangbros', + name: 'Mr. Anal', + url: 'http://mranal.com', + description: null, + parameters: null, + }, + { + id: 'mrcameltoe', + label: null, + network_id: 'bangbros', + name: 'Mr CamelToe', + url: 'https://bangbros.com/websites/mrcameltoe', + description: null, + parameters: null, + }, + { + id: 'mydirtymaid', + label: null, + network_id: 'bangbros', + name: 'My Dirty Maid', + url: 'http://mydirtymaid.com', + description: null, + parameters: null, + }, + { + id: 'mylifeinbrazil', + label: null, + network_id: 'bangbros', + name: 'My Life In Brazil', + url: 'http://mylifeinbrazil.com', + description: null, + parameters: null, + }, + { + id: 'newbieblack', + label: null, + network_id: 'bangbros', + name: 'Newbie Black', + url: 'https://bangbros.com/websites/newbieblack', + description: null, + parameters: null, + }, + { + id: 'partyof3', + label: null, + network_id: 'bangbros', + name: 'Party of 3', + url: 'http://partyof3.com', + description: null, + parameters: null, + }, + { + id: 'pawg', + label: null, + network_id: 'bangbros', + name: 'Pawg', + url: 'http://pawg.com', + description: null, + parameters: null, + }, + { + id: 'pennyshow', + label: null, + network_id: 'bangbros', + name: 'Penny Show', + url: 'https://bangbros.com/websites/pennyshow', + description: null, + parameters: null, + }, + { + id: 'pornstarspa', + label: null, + network_id: 'bangbros', + name: 'Porn Star Spa', + url: 'http://pornstarspa.com', + description: null, + parameters: null, + }, + { + id: 'powermunch', + label: null, + network_id: 'bangbros', + name: 'Power Munch', + url: 'https://bangbros.com/websites/powermunch', + description: null, + parameters: null, + }, + { + id: 'publicbang', + label: null, + network_id: 'bangbros', + name: 'Public Bang', + url: 'http://publicbang.com', + description: null, + parameters: null, + }, + { + id: 'sluttywhitegirls', + label: null, + network_id: 'bangbros', + name: 'Slutty White Girls', + url: 'https://bangbros.com/websites/sluttywhitegirls', + description: null, + parameters: null, + }, + { + id: 'stepmomvideos', + label: null, + network_id: 'bangbros', + name: 'Stepmom Videos', + url: 'http://stepmomvideos.com', + description: null, + parameters: null, + }, + { + id: 'streetranger', + label: null, + network_id: 'bangbros', + name: 'Street Ranger', + url: 'https://bangbros.com/websites/thewheeler', + description: null, + parameters: null, + }, + { + id: 'tugjobs', + label: null, + network_id: 'bangbros', + name: 'Tugjobs', + url: 'http://tugjobs.com', + description: null, + parameters: null, + }, + { + id: 'workinglatinas', + label: null, + network_id: 'bangbros', + name: 'Working Latinas', + url: 'http://workinglatinas.com', + description: null, + parameters: null, + }, // BLOWPASS { id: '1000facials', diff --git a/src/fetch-releases.js b/src/fetch-releases.js index 921217b8..ec6b2e63 100644 --- a/src/fetch-releases.js +++ b/src/fetch-releases.js @@ -33,7 +33,10 @@ function curateSites(sites) { name: site.name, description: site.description, url: site.url, - networkId: site.network_id, + network: { + id: site.network_id, + name: site.network_name, + }, parameters: JSON.parse(site.parameters), })); } @@ -41,8 +44,10 @@ function curateSites(sites) { async function accumulateIncludedSites() { if (argv.networks || argv.sites) { const rawSites = await knex('sites') - .whereIn('id', argv.sites || []) - .orWhereIn('network_id', argv.networks || []); + .select('sites.*', 'networks.name as network_name') + .whereIn('sites.id', argv.sites || []) + .orWhereIn('network_id', argv.networks || []) + .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } @@ -50,8 +55,10 @@ async function accumulateIncludedSites() { const included = destructConfigNetworks(config.include); const rawSites = await knex('sites') - .whereIn('id', included.sites) - .orWhereIn('network_id', included.networks); + .select('sites.*', 'networks.name as network_name') + .whereIn('sites.id', included.sites) + .orWhereIn('network_id', included.networks) + .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } @@ -127,7 +134,7 @@ async function fetchReleases() { const sites = await accumulateIncludedSites(); const scenesPerSite = await Promise.all(sites.map(async (site) => { - const scraper = scrapers[site.id] || scrapers[site.networkId]; + const scraper = scrapers[site.id] || scrapers[site.network.id]; if (scraper) { try { @@ -144,7 +151,17 @@ async function fetchReleases() { await storeReleases(newReleases); } - return [...newReleases, ...upcomingReleases.map(release => ({ ...release, upcoming: true }))]; + return [ + ...newReleases.map(release => ({ + ...release, + network: site.network, + })), + ...upcomingReleases.map(release => ({ + ...release, + network: site.network, + upcoming: true, + })), + ]; } catch (error) { if (argv.debug) { console.error(`${site.id}: Failed to fetch releases`, error); diff --git a/src/fetch-scene.js b/src/fetch-scene.js index f253026f..09a43a19 100644 --- a/src/fetch-scene.js +++ b/src/fetch-scene.js @@ -35,7 +35,9 @@ async function findSite(url) { name: site.name, description: site.description, url: site.url, - networkId: site.network_id || site.id, + network: { + id: site.network_id || site.id, + }, parameters: site.parameters && JSON.parse(site.parameters), isFallback: site.network_id === undefined, }; @@ -72,7 +74,7 @@ function deriveFilename(scene) { async function fetchScene(url) { const site = await findSite(url); - const scraper = scrapers[site.id] || scrapers[site.networkId]; + const scraper = scrapers[site.id] || scrapers[site.network.id]; if (!scraper) { throw new Error('Could not find scraper for URL'); diff --git a/src/scrapers/21sextury.js b/src/scrapers/21sextury.js index d2c6855a..c6b7030b 100644 --- a/src/scrapers/21sextury.js +++ b/src/scrapers/21sextury.js @@ -63,7 +63,11 @@ async function scrapeScene(html, url, site) { const entryId = new URL(url).pathname.split('/').slice(-1)[0]; const title = data.name; - const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); + const dataDate = moment.utc(data.dateCreated, 'YYYY-MM-DD'); + + const date = dataDate.isValid() + ? dataDate.toDate() + : moment.utc(sceneElement.find('.updatedDate').text().trim(), 'MM-DD-YYYY').toDate(); const actors = data.actor .sort(({ gender: genderA }, { gender: genderB }) => { diff --git a/src/scrapers/realitykings.js b/src/scrapers/realitykings.js index 640d2da8..72b740ca 100644 --- a/src/scrapers/realitykings.js +++ b/src/scrapers/realitykings.js @@ -101,7 +101,7 @@ async function fetchScene(url, site) { }, }); - return scrapeScene(res.body.result.parent, url, site); + return scrapeScene(res.body.result.parent || res.body.result, url, site); } module.exports = { diff --git a/src/tui/formatters.js b/src/tui/formatters.js index 208ec6b0..a9a846b3 100644 --- a/src/tui/formatters.js +++ b/src/tui/formatters.js @@ -4,6 +4,7 @@ const moment = require('moment'); const formatters = { site: site => site.name, + network: network => network.name, date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'), actors: actors => actors.join(', '), rating: (rating) => { diff --git a/src/utils/try-links.js b/src/utils/try-links.js new file mode 100644 index 00000000..2bc2fe7e --- /dev/null +++ b/src/utils/try-links.js @@ -0,0 +1,51 @@ +'use strict'; + +const Promise = require('bluebird'); +const bhttp = require('bhttp'); +const fs = Promise.promisifyAll(require('fs')); + +const knex = require('../knex'); +const argv = require('../argv'); + +const options = { + responseTimeout: 30000, +}; + +async function tryLinks() { + const sites = await knex('sites').whereIn('network_id', argv.network); + + const results = await Promise.all(sites.map(async (site) => { + console.log(`Trying ${site.name} URLs`); + + const [resHttp, resHttpWww, resHttps, resHttpsWww] = await Promise.all([ + bhttp.get(`http://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`http://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`https://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`https://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + ]); + + console.log(`Got results for ${site.name}`); + + return { + ...site, + url: (resHttp.statusCode === 200 && `http://${site.id}.com`) + || (resHttpWww.statusCode === 200 && `http://www.${site.id}.com`) + || (resHttps.statusCode === 200 && `https://${site.id}.com`) + || (resHttpsWww.statusCode === 200 && `https://www.${site.id}.com`) + || site.url, + network_id: site.network_id, + }; + })); + + const sortedResults = results.sort((siteA, siteB) => { + if (siteA.id > siteB.id) return 1; + if (siteA.id < siteB.id) return -1; + + return 0; + }); + + console.log(sortedResults); + await fs.writeFileAsync('./src/utils/link-results.json', JSON.stringify(sortedResults, null, 4)); +} + +tryLinks();