From bc01cbe6dc267afe4bed953578d0da207b7ab04f Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Wed, 10 Apr 2019 03:42:20 +0200 Subject: [PATCH] Added Bang Bros data (no scraper yet). Added 21Sextury scene fetch date fallback. --- .gitignore | 1 + config/default.js | 4 + seeds/networks.js | 6 + seeds/sites.js | 469 +++++++++++++++++++++++++++++++++++ src/fetch-releases.js | 31 ++- src/fetch-scene.js | 6 +- src/scrapers/21sextury.js | 6 +- src/scrapers/realitykings.js | 2 +- src/tui/formatters.js | 1 + src/utils/try-links.js | 51 ++++ 10 files changed, 566 insertions(+), 11 deletions(-) create mode 100644 src/utils/try-links.js diff --git a/.gitignore b/.gitignore index 64ed44148..d59fb887c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules/ dist/ config/* !config/default.js +src/utils/link-result.json db.sqlite diff --git a/config/default.js b/config/default.js index 04a13fb41..187aab48c 100644 --- a/config/default.js +++ b/config/default.js @@ -47,6 +47,10 @@ module.exports = { format: 'MMM DD, YYYY', width: 14, }, + { + value: 'network', + width: 20, + }, { value: 'site', width: 30, diff --git a/seeds/networks.js b/seeds/networks.js index ee6423ccb..221378948 100644 --- a/seeds/networks.js +++ b/seeds/networks.js @@ -10,6 +10,12 @@ exports.seed = knex => Promise.resolve() url: 'https://www.21sextury.com', description: 'Watch all the latest scenes and porn video updates on 21Sextury.com, the best European porn site with the hottest pornstars from all over the world! Watch porn videos from the large network here.', }, + { + id: 'bangbros', + name: 'Bang Bros', + url: 'https://bangbros.com', + description: 'Here at Bang Bros, we only film the best highest quality porn with the sexiest Amateur girls and the top pornstars. Updated daily on Bangbros.com.', + }, { id: 'blowpass', name: 'Blowpass', diff --git a/seeds/sites.js b/seeds/sites.js index 4d049e8d6..201a0b933 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -85,6 +85,475 @@ exports.seed = knex => Promise.resolve() network_id: '21sextury', parameters: JSON.stringify({ filter: true }), }, + // BANGBROS + { + id: 'assparade', + label: null, + network_id: 'bangbros', + name: 'Ass Parade', + url: 'http://assparade.com', + description: null, + parameters: null, + }, + { + id: 'avaspice', + label: null, + network_id: 'bangbros', + name: 'AvaSpice', + url: 'https://bangbros.com/websites/avaspice', + description: null, + parameters: null, + }, + { + id: 'backroomfacials', + label: null, + network_id: 'bangbros', + name: 'Back Room Facials', + url: 'http://backroomfacials.com', + description: null, + parameters: null, + }, + { + id: 'backroommilf', + label: null, + network_id: 'bangbros', + name: 'Backroom MILF', + url: 'http://backroommilf.com', + description: null, + parameters: null, + }, + { + id: 'ballhoneys', + label: null, + network_id: 'bangbros', + name: 'Ball Honeys', + url: 'https://bangbros.com/websites/ballhoneys', + description: null, + parameters: null, + }, + { + id: 'bangbros18', + label: null, + network_id: 'bangbros', + name: 'BangBros 18', + url: 'http://bangbros18.com', + description: null, + parameters: null, + }, + { + id: 'bangbrosangels', + label: null, + network_id: 'bangbros', + name: 'BangBros Angels', + url: 'https://bangbros.com/websites/bangbrosangels', + description: null, + parameters: null, + }, + { + id: 'bangbrosclips', + label: null, + network_id: 'bangbros', + name: 'Bangbros Clips', + url: 'http://bangbrosclips.com', + description: null, + parameters: null, + }, + { + id: 'bangbrosremastered', + label: null, + network_id: 'bangbros', + name: 'BangBros Remastered', + url: 'https://bangbros.com/websites/remaster', + description: null, + parameters: null, + }, + { + id: 'bangbus', + label: null, + network_id: 'bangbros', + name: 'Bang Bus', + url: 'http://bangbus.com', + description: null, + parameters: null, + }, + { + id: 'bangcasting', + label: null, + network_id: 'bangbros', + name: 'Bang Casting', + url: 'https://bangbros.com/websites/bangcasting', + description: null, + parameters: null, + }, + { + id: 'bangpov', + label: null, + network_id: 'bangbros', + name: 'Bang POV', + url: 'http://bangpov.com', + description: null, + parameters: null, + }, + { + id: 'bangtryouts', + label: null, + network_id: 'bangbros', + name: 'Bang Tryouts', + url: 'https://bangbros.com/websites/bangtryouts', + description: null, + parameters: null, + }, + { + id: 'bigmouthfuls', + label: null, + network_id: 'bangbros', + name: 'Big Mouthfuls', + url: 'http://bigmouthfuls.com', + description: null, + parameters: null, + }, + { + id: 'bigtitcreampie', + label: null, + network_id: 'bangbros', + name: 'Big Tit Cream Pie', + url: 'http://bigtitcreampie.com', + description: null, + parameters: null, + }, + { + id: 'bigtitsroundasses', + label: null, + network_id: 'bangbros', + name: 'Big Tits, Round Asses', + url: 'http://bigtitsroundasses.com', + description: null, + parameters: null, + }, + { + id: 'blowjobfridays', + label: null, + network_id: 'bangbros', + name: 'BlowJob Fridays', + url: 'http://blowjobfridays.com', + description: null, + parameters: null, + }, + { + id: 'blowjobninjas', + label: null, + network_id: 'bangbros', + name: 'Blowjob Ninjas', + url: 'http://blowjobninjas.com', + description: null, + parameters: null, + }, + { + id: 'boobsquad', + label: null, + network_id: 'bangbros', + name: 'Boob Squad', + url: 'http://boobsquad.com', + description: null, + parameters: null, + }, + { + id: 'brownbunnies', + label: null, + network_id: 'bangbros', + name: 'Brown Bunnies', + url: 'http://brownbunnies.com', + description: null, + parameters: null, + }, + { + id: 'canhescore', + label: null, + network_id: 'bangbros', + name: 'Can He Score?', + url: 'http://canhescore.com', + description: null, + parameters: null, + }, + { + id: 'casting', + label: null, + network_id: 'bangbros', + name: 'Casting', + url: 'https://bangbros.com/websites/casting', + description: null, + parameters: null, + }, + { + id: 'chongas', + label: null, + network_id: 'bangbros', + name: 'Chongas', + url: 'http://chongas.com', + description: null, + parameters: null, + }, + { + id: 'colombiafuckfest', + label: null, + network_id: 'bangbros', + name: 'Colombia Fuck Fest', + url: 'http://colombiafuckfest.com', + description: null, + parameters: null, + }, + { + id: 'dirtyworldtour', + label: null, + network_id: 'bangbros', + name: 'Dirty World Tour', + url: 'https://bangbros.com/websites/dirtyworldtour', + description: null, + parameters: null, + }, + { + id: 'dorminvasion', + label: null, + network_id: 'bangbros', + name: 'Dorm Invasion', + url: 'http://dorminvasion.com', + description: null, + parameters: null, + }, + { + id: 'facialfest', + label: null, + network_id: 'bangbros', + name: 'Facial Fest', + url: 'http://facialfest.com', + description: null, + parameters: null, + }, + { + id: 'fuckteamfive', + label: null, + network_id: 'bangbros', + name: 'Fuck Team Five', + url: 'http://fuckteamfive.com', + description: null, + parameters: null, + }, + { + id: 'gloryholeloads', + label: null, + network_id: 'bangbros', + name: 'Glory Hole Loads', + url: 'https://bangbros.com/websites/gloryholeloads', + description: null, + parameters: null, + }, + { + id: 'latinarampage', + label: null, + network_id: 'bangbros', + name: 'Latina Rampage', + url: 'http://latinarampage.com', + description: null, + parameters: null, + }, + { + id: 'livingwithanna', + label: null, + network_id: 'bangbros', + name: 'Living With Anna', + url: 'http://livingwithanna.com', + description: null, + parameters: null, + }, + { + id: 'magicalfeet', + label: null, + network_id: 'bangbros', + name: 'Magical Feet', + url: 'http://magicalfeet.com', + description: null, + parameters: null, + }, + { + id: 'milflessons', + label: null, + network_id: 'bangbros', + name: 'MILF Lessons', + url: 'https://bangbros.com/websites/milflessons', + description: null, + parameters: null, + }, + { + id: 'milfsoup', + label: null, + network_id: 'bangbros', + name: 'Milf Soup', + url: 'http://milfsoup.com', + description: null, + parameters: null, + }, + { + id: 'momishorny', + label: null, + network_id: 'bangbros', + name: 'MomIsHorny', + url: 'http://momishorny.com', + description: null, + parameters: null, + }, + { + id: 'monstersofcock', + label: null, + network_id: 'bangbros', + name: 'Monsters of Cock', + url: 'http://monstersofcock.com', + description: null, + parameters: null, + }, + { + id: 'mranal', + label: null, + network_id: 'bangbros', + name: 'Mr. Anal', + url: 'http://mranal.com', + description: null, + parameters: null, + }, + { + id: 'mrcameltoe', + label: null, + network_id: 'bangbros', + name: 'Mr CamelToe', + url: 'https://bangbros.com/websites/mrcameltoe', + description: null, + parameters: null, + }, + { + id: 'mydirtymaid', + label: null, + network_id: 'bangbros', + name: 'My Dirty Maid', + url: 'http://mydirtymaid.com', + description: null, + parameters: null, + }, + { + id: 'mylifeinbrazil', + label: null, + network_id: 'bangbros', + name: 'My Life In Brazil', + url: 'http://mylifeinbrazil.com', + description: null, + parameters: null, + }, + { + id: 'newbieblack', + label: null, + network_id: 'bangbros', + name: 'Newbie Black', + url: 'https://bangbros.com/websites/newbieblack', + description: null, + parameters: null, + }, + { + id: 'partyof3', + label: null, + network_id: 'bangbros', + name: 'Party of 3', + url: 'http://partyof3.com', + description: null, + parameters: null, + }, + { + id: 'pawg', + label: null, + network_id: 'bangbros', + name: 'Pawg', + url: 'http://pawg.com', + description: null, + parameters: null, + }, + { + id: 'pennyshow', + label: null, + network_id: 'bangbros', + name: 'Penny Show', + url: 'https://bangbros.com/websites/pennyshow', + description: null, + parameters: null, + }, + { + id: 'pornstarspa', + label: null, + network_id: 'bangbros', + name: 'Porn Star Spa', + url: 'http://pornstarspa.com', + description: null, + parameters: null, + }, + { + id: 'powermunch', + label: null, + network_id: 'bangbros', + name: 'Power Munch', + url: 'https://bangbros.com/websites/powermunch', + description: null, + parameters: null, + }, + { + id: 'publicbang', + label: null, + network_id: 'bangbros', + name: 'Public Bang', + url: 'http://publicbang.com', + description: null, + parameters: null, + }, + { + id: 'sluttywhitegirls', + label: null, + network_id: 'bangbros', + name: 'Slutty White Girls', + url: 'https://bangbros.com/websites/sluttywhitegirls', + description: null, + parameters: null, + }, + { + id: 'stepmomvideos', + label: null, + network_id: 'bangbros', + name: 'Stepmom Videos', + url: 'http://stepmomvideos.com', + description: null, + parameters: null, + }, + { + id: 'streetranger', + label: null, + network_id: 'bangbros', + name: 'Street Ranger', + url: 'https://bangbros.com/websites/thewheeler', + description: null, + parameters: null, + }, + { + id: 'tugjobs', + label: null, + network_id: 'bangbros', + name: 'Tugjobs', + url: 'http://tugjobs.com', + description: null, + parameters: null, + }, + { + id: 'workinglatinas', + label: null, + network_id: 'bangbros', + name: 'Working Latinas', + url: 'http://workinglatinas.com', + description: null, + parameters: null, + }, // BLOWPASS { id: '1000facials', diff --git a/src/fetch-releases.js b/src/fetch-releases.js index 921217b83..ec6b2e632 100644 --- a/src/fetch-releases.js +++ b/src/fetch-releases.js @@ -33,7 +33,10 @@ function curateSites(sites) { name: site.name, description: site.description, url: site.url, - networkId: site.network_id, + network: { + id: site.network_id, + name: site.network_name, + }, parameters: JSON.parse(site.parameters), })); } @@ -41,8 +44,10 @@ function curateSites(sites) { async function accumulateIncludedSites() { if (argv.networks || argv.sites) { const rawSites = await knex('sites') - .whereIn('id', argv.sites || []) - .orWhereIn('network_id', argv.networks || []); + .select('sites.*', 'networks.name as network_name') + .whereIn('sites.id', argv.sites || []) + .orWhereIn('network_id', argv.networks || []) + .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } @@ -50,8 +55,10 @@ async function accumulateIncludedSites() { const included = destructConfigNetworks(config.include); const rawSites = await knex('sites') - .whereIn('id', included.sites) - .orWhereIn('network_id', included.networks); + .select('sites.*', 'networks.name as network_name') + .whereIn('sites.id', included.sites) + .orWhereIn('network_id', included.networks) + .leftJoin('networks', 'sites.network_id', 'networks.id'); return curateSites(rawSites); } @@ -127,7 +134,7 @@ async function fetchReleases() { const sites = await accumulateIncludedSites(); const scenesPerSite = await Promise.all(sites.map(async (site) => { - const scraper = scrapers[site.id] || scrapers[site.networkId]; + const scraper = scrapers[site.id] || scrapers[site.network.id]; if (scraper) { try { @@ -144,7 +151,17 @@ async function fetchReleases() { await storeReleases(newReleases); } - return [...newReleases, ...upcomingReleases.map(release => ({ ...release, upcoming: true }))]; + return [ + ...newReleases.map(release => ({ + ...release, + network: site.network, + })), + ...upcomingReleases.map(release => ({ + ...release, + network: site.network, + upcoming: true, + })), + ]; } catch (error) { if (argv.debug) { console.error(`${site.id}: Failed to fetch releases`, error); diff --git a/src/fetch-scene.js b/src/fetch-scene.js index f253026ff..09a43a191 100644 --- a/src/fetch-scene.js +++ b/src/fetch-scene.js @@ -35,7 +35,9 @@ async function findSite(url) { name: site.name, description: site.description, url: site.url, - networkId: site.network_id || site.id, + network: { + id: site.network_id || site.id, + }, parameters: site.parameters && JSON.parse(site.parameters), isFallback: site.network_id === undefined, }; @@ -72,7 +74,7 @@ function deriveFilename(scene) { async function fetchScene(url) { const site = await findSite(url); - const scraper = scrapers[site.id] || scrapers[site.networkId]; + const scraper = scrapers[site.id] || scrapers[site.network.id]; if (!scraper) { throw new Error('Could not find scraper for URL'); diff --git a/src/scrapers/21sextury.js b/src/scrapers/21sextury.js index d2c6855a3..c6b7030bf 100644 --- a/src/scrapers/21sextury.js +++ b/src/scrapers/21sextury.js @@ -63,7 +63,11 @@ async function scrapeScene(html, url, site) { const entryId = new URL(url).pathname.split('/').slice(-1)[0]; const title = data.name; - const date = moment.utc(data.dateCreated, 'YYYY-MM-DD').toDate(); + const dataDate = moment.utc(data.dateCreated, 'YYYY-MM-DD'); + + const date = dataDate.isValid() + ? dataDate.toDate() + : moment.utc(sceneElement.find('.updatedDate').text().trim(), 'MM-DD-YYYY').toDate(); const actors = data.actor .sort(({ gender: genderA }, { gender: genderB }) => { diff --git a/src/scrapers/realitykings.js b/src/scrapers/realitykings.js index 640d2da8e..72b740ca1 100644 --- a/src/scrapers/realitykings.js +++ b/src/scrapers/realitykings.js @@ -101,7 +101,7 @@ async function fetchScene(url, site) { }, }); - return scrapeScene(res.body.result.parent, url, site); + return scrapeScene(res.body.result.parent || res.body.result, url, site); } module.exports = { diff --git a/src/tui/formatters.js b/src/tui/formatters.js index 208ec6b08..a9a846b3c 100644 --- a/src/tui/formatters.js +++ b/src/tui/formatters.js @@ -4,6 +4,7 @@ const moment = require('moment'); const formatters = { site: site => site.name, + network: network => network.name, date: (date, column) => moment(date).format(column.format || 'MMM DD, YYYY'), actors: actors => actors.join(', '), rating: (rating) => { diff --git a/src/utils/try-links.js b/src/utils/try-links.js new file mode 100644 index 000000000..2bc2fe7ec --- /dev/null +++ b/src/utils/try-links.js @@ -0,0 +1,51 @@ +'use strict'; + +const Promise = require('bluebird'); +const bhttp = require('bhttp'); +const fs = Promise.promisifyAll(require('fs')); + +const knex = require('../knex'); +const argv = require('../argv'); + +const options = { + responseTimeout: 30000, +}; + +async function tryLinks() { + const sites = await knex('sites').whereIn('network_id', argv.network); + + const results = await Promise.all(sites.map(async (site) => { + console.log(`Trying ${site.name} URLs`); + + const [resHttp, resHttpWww, resHttps, resHttpsWww] = await Promise.all([ + bhttp.get(`http://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`http://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`https://${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + bhttp.get(`https://www.${site.id}.com/`, options).catch(error => ({ statusCode: error.message })), + ]); + + console.log(`Got results for ${site.name}`); + + return { + ...site, + url: (resHttp.statusCode === 200 && `http://${site.id}.com`) + || (resHttpWww.statusCode === 200 && `http://www.${site.id}.com`) + || (resHttps.statusCode === 200 && `https://${site.id}.com`) + || (resHttpsWww.statusCode === 200 && `https://www.${site.id}.com`) + || site.url, + network_id: site.network_id, + }; + })); + + const sortedResults = results.sort((siteA, siteB) => { + if (siteA.id > siteB.id) return 1; + if (siteA.id < siteB.id) return -1; + + return 0; + }); + + console.log(sortedResults); + await fs.writeFileAsync('./src/utils/link-results.json', JSON.stringify(sortedResults, null, 4)); +} + +tryLinks();