From 3a90f98d4145fd4aa91f827b91a29181d103c83c Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Sat, 6 Apr 2019 23:24:26 +0200 Subject: [PATCH] Added Blowpass scraper. Split shootId and pageId. --- README.md | 6 ++ migrations/20190325001339_releases.js | 5 ++ seeds/networks.js | 6 ++ seeds/sites.js | 41 ++++++++++ seeds/tags.js | 12 +++ src/fetch-releases.js | 11 ++- src/fetch-scene.js | 1 + src/scrapers/blowpass.js | 110 ++++++++++++++++++++++++++ src/scrapers/index.js | 2 + src/scrapers/kink.js | 2 + src/scrapers/legalporno.js | 7 +- src/scrapers/private.js | 2 - src/scrapers/template.js | 10 ++- src/tui/formatters.js | 4 + 14 files changed, 208 insertions(+), 11 deletions(-) create mode 100644 src/scrapers/blowpass.js diff --git a/README.md b/README.md index 67b5e529..85cd405a 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,12 @@ The latest releases from your favorite porn studios in one place. ## Supported networks & sites +* **Blowpass** + * 1000 Facials + * Immoral Live + * Mommy Blows Best + * Only Teen Blowjobs + * Throated * **Brazzers** * Asses In Public * Baby Got Boobs diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index e4cc9bc6..c704972b 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -5,6 +5,8 @@ exports.up = knex => Promise.resolve() table.increments('id', 8); table.string('name'); + table.string('gender', 18); + table.integer('alias_for', 8) .references('id') .inTable('actors'); @@ -58,7 +60,9 @@ exports.up = knex => Promise.resolve() .inTable('sites'); table.string('shoot_id'); + table.string('entry_id'); table.unique(['site_id', 'shoot_id']); + table.unique(['site_id', 'entry_id']); table.string('url'); table.string('title'); @@ -116,4 +120,5 @@ exports.down = knex => Promise.resolve() .then(() => knex.schema.dropTable('sites')) .then(() => knex.schema.dropTable('networks')) .then(() => knex.schema.dropTable('actors')) + .then(() => knex.schema.dropTable('directors')) .then(() => knex.schema.dropTable('tags')); diff --git a/seeds/networks.js b/seeds/networks.js index f3e4f280..a27347dd 100644 --- a/seeds/networks.js +++ b/seeds/networks.js @@ -4,6 +4,12 @@ exports.seed = knex => Promise.resolve() .then(() => knex('networks').del()) .then(() => knex('networks').insert([ + { + id: 'blowpass', + name: 'Blowpass', + url: 'https://www.blowpass.com', + description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!', + }, { id: 'brazzers', name: 'Brazzers', diff --git a/seeds/sites.js b/seeds/sites.js index a351502d..718cf81b 100644 --- a/seeds/sites.js +++ b/seeds/sites.js @@ -4,6 +4,47 @@ exports.seed = knex => Promise.resolve() .then(() => knex('sites').del()) .then(() => knex('sites').insert([ + // BLOWPASS + { + id: '1000facials', + name: '1000 Facials', + label: '1000fc', + url: 'https://www.1000facials.com', + description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!', + network_id: 'blowpass', + }, + { + id: 'immorallive', + name: 'Immoral Live', + label: 'imlive', + url: 'https://www.immorallive.com', + description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside', + network_id: 'blowpass', + }, + { + id: 'mommyblowsbest', + name: 'Mommy Blows Best', + label: 'momblb', + url: 'https://www.mommyblowsbest.com', + description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!', + network_id: 'blowpass', + }, + { + id: 'onlyteenblowjobs', + name: 'Only Teen Blowjobs', + label: 'teenbj', + url: 'https://www.onlyteenblowjobs.com', + description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!', + network_id: 'blowpass', + }, + { + id: 'throated', + name: 'Throated', + label: 'throat', + url: 'https://www.throated.com', + description: 'Throated.com is your portal for extreme throat fuck porn, face fucking videos and deepthroat gagging pornstars. Watch teens and MILFs go balls deep, swallowing cock in HD!', + network_id: 'blowpass', + }, // BRAZZERS { id: 'momsincontrol', diff --git a/seeds/tags.js b/seeds/tags.js index 36db2d0f..5c62d20e 100644 --- a/seeds/tags.js +++ b/seeds/tags.js @@ -272,6 +272,10 @@ exports.seed = knex => Promise.resolve() tag: 'nipple clamps', alias_for: null, }, + { + tag: 'oral creampie', + alias_for: null, + }, { tag: 'pain', alias_for: null, @@ -527,6 +531,10 @@ exports.seed = knex => Promise.resolve() tag: 'crop', // a type of whip, not short for corporal alias_for: 'corporal punishment', }, + { + tag: 'cum in mouth', + alias_for: 'oral creampie', + }, { tag: 'cum swallowing', alias_for: 'swallowing', @@ -683,6 +691,10 @@ exports.seed = knex => Promise.resolve() tag: 'MFF', alias_for: 'FMF', }, + { + tag: 'oral', + alias_for: 'blowjob', + }, { tag: 'piercing', alias_for: 'piercings', diff --git a/src/fetch-releases.js b/src/fetch-releases.js index 2f212c92..fd86af33 100644 --- a/src/fetch-releases.js +++ b/src/fetch-releases.js @@ -56,17 +56,20 @@ async function accumulateIncludedSites() { return curateSites(rawSites); } -async function findDuplicateReleases(latestReleases) { - const latestReleasesIds = latestReleases.map(release => release.shootId); +async function findDuplicateReleases(latestReleases, _siteId) { + const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined); + const latestReleasesPageIds = latestReleases.map(release => release.pageId).filter(release => release !== undefined); return knex('releases') - .whereIn('shoot_id', latestReleasesIds); + .whereIn('shoot_id', latestReleasesShootIds) + .orWhereIn('shoot_id', latestReleasesPageIds); } async function storeReleases(releases) { const curatedReleases = releases.map(release => ({ site_id: release.site.id, shoot_id: release.shootId || null, + entry_id: release.entry_id || null, url: release.url, title: release.title, date: release.date, @@ -93,7 +96,7 @@ async function storeReleases(releases) { async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) { const latestReleases = await scraper.fetchLatest(site, page); - const duplicateReleases = await findDuplicateReleases(latestReleases); + const duplicateReleases = await findDuplicateReleases(latestReleases, site.id); const duplicateReleasesShootIds = new Set( duplicateReleases .map(release => release.shoot_id) diff --git a/src/fetch-scene.js b/src/fetch-scene.js index f9630bf7..b17c20c4 100644 --- a/src/fetch-scene.js +++ b/src/fetch-scene.js @@ -19,6 +19,7 @@ async function findSite(url) { .orWhere({ url: `${protocol}//${hostname}` }) .first(); + return { id: site.id, name: site.name, diff --git a/src/scrapers/blowpass.js b/src/scrapers/blowpass.js new file mode 100644 index 00000000..230b8bf9 --- /dev/null +++ b/src/scrapers/blowpass.js @@ -0,0 +1,110 @@ +'use strict'; + +/* eslint-disable */ +const bhttp = require('bhttp'); +const cheerio = require('cheerio'); +const moment = require('moment'); + +const { matchTags } = require('../tags'); + +function scrape(html, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const sceneElements = $('.sceneList .scene').toArray(); + + return sceneElements.map((element) => { + const entryId = $(element).attr('data-itemid'); + + const sceneLinkElement = $(element).find('.sceneTitle a'); + const title = sceneLinkElement.attr('title'); + const url = `${site.url}/en/scene/${sceneLinkElement.attr('href').split('/').slice(-2).join('/')}`; + + const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate(); + const actors = $(element).find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); + + const likes = Number($(element).find('.rating .state_1 .value').text()); + + return { + url, + entryId, + title, + actors, + date, + rating: { + likes, + }, + site, + }; + }); +} + +async function scrapeScene(html, url, site) { + const $ = cheerio.load(html, { normalizeWhitespace: true }); + const json = $('script[type="application/ld+json"]').html(); + const data = JSON.parse(json).slice(-1)[0]; + const sceneElement = $('#wrapper'); + + const workName = data.isPartOf.name.split(' - '); + const shootId = workName.length > 1 ? workName[0] : null; + const entryId = url.split('/').slice(-1)[0]; + const title = data.name; + const description = data.description; + const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate(); + + // const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray(); + const actors = data.actor + .sort(({ genderA }, { genderB }) => { + if (genderA === 'female' && genderB === 'male') return 1; + if (genderA === 'male' && genderB === 'female') return -1; + + return 0; + }) + .map(actor => actor.name); + + const likes = Number(sceneElement.find('.rating .state_1 .value').text()); + const dislikes = Number(sceneElement.find('.rating .state_2 .value').text()); + + const duration = moment.duration(data.duration.slice(2)).asSeconds(); + + const rawTags = data.keywords.split(', '); + const tags = await matchTags(rawTags); + + return { + url, + shootId, + entryId, + title, + actors, + date, + duration, + tags, + rating: { + likes, + dislikes, + }, + site, + }; +} + +async function fetchLatest(site, page = 1) { + const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/latest/All-Categories/0/All-Pornstars/0/${page}`); + + return scrape(res.body.toString(), site); +} + +async function fetchUpcoming(site) { + const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/upcoming`); + + return scrape(res.body.toString(), site); +} + +async function fetchScene(url, site) { + const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`); + + return scrapeScene(res.body.toString(), url, site); +} + +module.exports = { + fetchLatest, + fetchUpcoming, + fetchScene, +}; diff --git a/src/scrapers/index.js b/src/scrapers/index.js index 976c0eb6..9296c5b6 100644 --- a/src/scrapers/index.js +++ b/src/scrapers/index.js @@ -1,5 +1,6 @@ 'use strict'; +const blowpass = require('./blowpass'); const brazzers = require('./brazzers'); const julesjordan = require('./julesjordan'); const kink = require('./kink'); @@ -10,6 +11,7 @@ const vixen = require('./vixen'); const xempire = require('./xempire'); module.exports = { + blowpass, brazzers, julesjordan, kink, diff --git a/src/scrapers/kink.js b/src/scrapers/kink.js index 4dcb6f4c..37d60486 100644 --- a/src/scrapers/kink.js +++ b/src/scrapers/kink.js @@ -29,6 +29,7 @@ function scrapeLatest(html, site) { return { url, shootId, + entryId: shootId, title, actors, date, @@ -73,6 +74,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) { return { url, shootId, + entryId: shootId, title, date, actors, diff --git a/src/scrapers/legalporno.js b/src/scrapers/legalporno.js index ab17cf0e..3467dd22 100644 --- a/src/scrapers/legalporno.js +++ b/src/scrapers/legalporno.js @@ -25,13 +25,14 @@ function scrapeLatest(html, site) { const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping const { shootId, title } = extractTitle(originalTitle); - const internalId = new URL(url).pathname.split('/')[2]; + const entryId = new URL(url).pathname.split('/')[2]; const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); return { url, - shootId: shootId || internalId, + shootId, + entryId, title, date, site, @@ -44,6 +45,7 @@ async function scrapeScene(html, url, site) { const originalTitle = $('h1.watchpage-title').text().trim(); const { shootId, title } = extractTitle(originalTitle); + const entryId = new URL(url).pathname.split('/')[2]; const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate(); @@ -60,6 +62,7 @@ async function scrapeScene(html, url, site) { return { url, shootId, + entryId, title, date, actors, diff --git a/src/scrapers/private.js b/src/scrapers/private.js index 29765f8c..8bbeac2d 100644 --- a/src/scrapers/private.js +++ b/src/scrapers/private.js @@ -32,7 +32,6 @@ function scrapeLatest(html, site) { date, rating: { likes, - dislikes: 0, }, site, }; @@ -79,7 +78,6 @@ async function scrapeScene(html, url, site) { tags, rating: { likes, - dislikes: 0, }, site: channelSite || site, }; diff --git a/src/scrapers/template.js b/src/scrapers/template.js index 9db93acf..b7c1ead4 100644 --- a/src/scrapers/template.js +++ b/src/scrapers/template.js @@ -49,16 +49,20 @@ function scrapeUpcoming(html, site) { }); } -function scrapeScene(html, url, site) { +async function scrapeScene(html, url, site) { const $ = cheerio.load(html, { normalizeWhitespace: true }); + const rawTags = []; + const tags = await matchTags(rawTags); + return { url, shootId, title, actors, - director: '', + director, date, + tags, rating: { likes, dislikes, @@ -68,7 +72,7 @@ function scrapeScene(html, url, site) { }; } -async function fetchLatest(site) { +async function fetchLatest(site, page = 1) { const res = await bhttp.get(`${site.url}/url`); return scrapeLatest(res.body.toString(), site); diff --git a/src/tui/formatters.js b/src/tui/formatters.js index d15e7a83..208ec6b0 100644 --- a/src/tui/formatters.js +++ b/src/tui/formatters.js @@ -11,6 +11,10 @@ const formatters = { return '\x1b[90mUnrated\x1b[0m'; } + if (rating.likes !== undefined && rating.dislikes === undefined) { + return `\x1b[93m★\x1b[0m N/A \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`; + } + if (rating.stars) { return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`; }