Added Blowpass scraper. Split shootId and pageId.

2019-04-06 23:24:26 +02:00
parent 069c2c1628
commit 3a90f98d41
14 changed files with 208 additions and 11 deletions
--- a/README.md
+++ b/README.md
@@ -2,6 +2,12 @@
 The latest releases from your favorite porn studios in one place.
 ## Supported networks & sites
 * **Blowpass**
    * 1000 Facials
    * Immoral Live
    * Mommy Blows Best
    * Only Teen Blowjobs
    * Throated
 * **Brazzers**
    * Asses In Public
    * Baby Got Boobs
--- a/migrations/20190325001339_releases.js
+++ b/migrations/20190325001339_releases.js
@@ -5,6 +5,8 @@ exports.up = knex => Promise.resolve()
        table.increments('id', 8);
        table.string('name');
        table.string('gender', 18);
        table.integer('alias_for', 8)
            .references('id')
            .inTable('actors');
@@ -58,7 +60,9 @@ exports.up = knex => Promise.resolve()
            .inTable('sites');
        table.string('shoot_id');
        table.string('entry_id');
        table.unique(['site_id', 'shoot_id']);
        table.unique(['site_id', 'entry_id']);
        table.string('url');
        table.string('title');
@@ -116,4 +120,5 @@ exports.down = knex => Promise.resolve()
    .then(() => knex.schema.dropTable('sites'))
    .then(() => knex.schema.dropTable('networks'))
    .then(() => knex.schema.dropTable('actors'))
    .then(() => knex.schema.dropTable('directors'))
    .then(() => knex.schema.dropTable('tags'));
--- a/seeds/networks.js
+++ b/seeds/networks.js
@@ -4,6 +4,12 @@
 exports.seed = knex => Promise.resolve()
    .then(() => knex('networks').del())
    .then(() => knex('networks').insert([
        {
            id: 'blowpass',
            name: 'Blowpass',
            url: 'https://www.blowpass.com',
            description: 'Welcome to Blowpass.com, your ultimate source for deepthroat porn, MILF and teen blowjob videos, big cumshots and any and everything oral!',
        },
        {
            id: 'brazzers',
            name: 'Brazzers',
--- a/seeds/sites.js
+++ b/seeds/sites.js
@@ -4,6 +4,47 @@
 exports.seed = knex => Promise.resolve()
    .then(() => knex('sites').del())
    .then(() => knex('sites').insert([
        // BLOWPASS
        {
            id: '1000facials',
            name: '1000 Facials',
            label: '1000fc',
            url: 'https://www.1000facials.com',
            description: 'Welcome to 1000Facials.com, your source for the best facial porn with huge cumshots on your favorite teen and MILF pornstars. Watch all the blowjob action inside!',
            network_id: 'blowpass',
        },
        {
            id: 'immorallive',
            name: 'Immoral Live',
            label: 'imlive',
            url: 'https://www.immorallive.com',
            description: 'Watch live sex shows and videos on ImmoralLive.com, featuring wild and crazy sex orgies, group sex, blowjob competitions and toy play from the famous Porno Dan. The hottest pornstars and amateur girls cum hard inside',
            network_id: 'blowpass',
        },
        {
            id: 'mommyblowsbest',
            name: 'Mommy Blows Best',
            label: 'momblb',
            url: 'https://www.mommyblowsbest.com',
            description: 'Welcome to MommyBlowsBest.com. Home to thousands of MILF blowjobs and hot mom porn! Come see why experience counts, right here at MommyBlowsBest.com!',
            network_id: 'blowpass',
        },
        {
            id: 'onlyteenblowjobs',
            name: 'Only Teen Blowjobs',
            label: 'teenbj',
            url: 'https://www.onlyteenblowjobs.com',
            description: 'OnlyTeenBlowjobs.com brings you the best teen blowjob porn featuring today\'s hottest young pornstars and amateurs. Watch as teens use their little mouths to suck and deepthroat the biggest of cocks!',
            network_id: 'blowpass',
        },
        {
            id: 'throated',
            name: 'Throated',
            label: 'throat',
            url: 'https://www.throated.com',
            description: 'Throated.com is your portal for extreme throat fuck porn, face fucking videos and deepthroat gagging pornstars. Watch teens and MILFs go balls deep, swallowing cock in HD!',
            network_id: 'blowpass',
        },
        // BRAZZERS
        {
            id: 'momsincontrol',
--- a/seeds/tags.js
+++ b/seeds/tags.js
@@ -272,6 +272,10 @@ exports.seed = knex => Promise.resolve()
            tag: 'nipple clamps',
            alias_for: null,
        },
        {
            tag: 'oral creampie',
            alias_for: null,
        },
        {
            tag: 'pain',
            alias_for: null,
@@ -527,6 +531,10 @@ exports.seed = knex => Promise.resolve()
            tag: 'crop', // a type of whip, not short for corporal
            alias_for: 'corporal punishment',
        },
        {
            tag: 'cum in mouth',
            alias_for: 'oral creampie',
        },
        {
            tag: 'cum swallowing',
            alias_for: 'swallowing',
@@ -683,6 +691,10 @@ exports.seed = knex => Promise.resolve()
            tag: 'MFF',
            alias_for: 'FMF',
        },
        {
            tag: 'oral',
            alias_for: 'blowjob',
        },
        {
            tag: 'piercing',
            alias_for: 'piercings',
--- a/src/fetch-releases.js
+++ b/src/fetch-releases.js
@@ -56,17 +56,20 @@ async function accumulateIncludedSites() {
    return curateSites(rawSites);
 }
-async function findDuplicateReleases(latestReleases) {
+async function findDuplicateReleases(latestReleases, _siteId) {
-    const latestReleasesIds = latestReleases.map(release => release.shootId);
+    const latestReleasesShootIds = latestReleases.map(release => release.shootId).filter(release => release !== undefined);
    const latestReleasesPageIds = latestReleases.map(release => release.pageId).filter(release => release !== undefined);
    return knex('releases')
-        .whereIn('shoot_id', latestReleasesIds);
+        .whereIn('shoot_id', latestReleasesShootIds)
        .orWhereIn('shoot_id', latestReleasesPageIds);
 }
 async function storeReleases(releases) {
    const curatedReleases = releases.map(release => ({
        site_id: release.site.id,
        shoot_id: release.shootId || null,
        entry_id: release.entry_id || null,
        url: release.url,
        title: release.title,
        date: release.date,
@@ -93,7 +96,7 @@ async function storeReleases(releases) {
 async function fetchNewReleases(scraper, site, afterDate, accReleases = [], page = 1) {
    const latestReleases = await scraper.fetchLatest(site, page);
-    const duplicateReleases = await findDuplicateReleases(latestReleases);
+    const duplicateReleases = await findDuplicateReleases(latestReleases, site.id);
    const duplicateReleasesShootIds = new Set(
        duplicateReleases
            .map(release => release.shoot_id)
--- a/src/fetch-scene.js
+++ b/src/fetch-scene.js
@@ -19,6 +19,7 @@ async function findSite(url) {
            .orWhere({ url: `${protocol}//${hostname}` })
            .first();
    return {
        id: site.id,
        name: site.name,
--- a/src/scrapers/blowpass.js
+++ b/src/scrapers/blowpass.js
@@ -0,0 +1,110 @@
 'use strict';
 /* eslint-disable */
 const bhttp = require('bhttp');
 const cheerio = require('cheerio');
 const moment = require('moment');
 const { matchTags } = require('../tags');
 function scrape(html, site) {
    const $ = cheerio.load(html, { normalizeWhitespace: true });
    const sceneElements = $('.sceneList .scene').toArray();
    return sceneElements.map((element) => {
        const entryId = $(element).attr('data-itemid');
        const sceneLinkElement = $(element).find('.sceneTitle a');
        const title = sceneLinkElement.attr('title');
        const url = `${site.url}/en/scene/${sceneLinkElement.attr('href').split('/').slice(-2).join('/')}`;
        const date = moment.utc($(element).find('.sceneDate').text(), 'MM-DD-YYYY').toDate();
        const actors = $(element).find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
        const likes = Number($(element).find('.rating .state_1 .value').text());
        return {
            url,
            entryId,
            title,
            actors,
            date,
            rating: {
                likes,
            },
            site,
        };
    });
 }
 async function scrapeScene(html, url, site) {
    const $ = cheerio.load(html, { normalizeWhitespace: true });
    const json = $('script[type="application/ld+json"]').html();
    const data = JSON.parse(json).slice(-1)[0];
    const sceneElement = $('#wrapper');
    const workName = data.isPartOf.name.split(' - ');
    const shootId = workName.length > 1 ? workName[0] : null;
    const entryId = url.split('/').slice(-1)[0];
    const title = data.name;
    const description = data.description;
    const date = moment.utc(data.isPartOf.datePublished, 'YYYY-MM-DD').toDate();
    // const actors = sceneElement.find('.sceneActors a').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();
    const actors = data.actor
        .sort(({ genderA }, { genderB }) => {
            if (genderA === 'female' && genderB === 'male') return 1;
            if (genderA === 'male' && genderB === 'female') return -1;
            return 0;
        })
        .map(actor => actor.name);
    const likes = Number(sceneElement.find('.rating .state_1 .value').text());
    const dislikes = Number(sceneElement.find('.rating .state_2 .value').text());
    const duration = moment.duration(data.duration.slice(2)).asSeconds();
    const rawTags = data.keywords.split(', ');
    const tags = await matchTags(rawTags);
    return {
        url,
        shootId,
        entryId,
        title,
        actors,
        date,
        duration,
        tags,
        rating: {
            likes,
            dislikes,
        },
        site,
    };
 }
 async function fetchLatest(site, page = 1) {
    const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/latest/All-Categories/0/All-Pornstars/0/${page}`);
    return scrape(res.body.toString(), site);
 }
 async function fetchUpcoming(site) {
    const res = await bhttp.get(`https://www.blowpass.com/en/videos/${site.id}/upcoming`);
    return scrape(res.body.toString(), site);
 }
 async function fetchScene(url, site) {
    const res = await bhttp.get(`https://www.blowpass.com/en/video/${site.id}/${new URL(url).pathname.split('/').slice(-2).join('/')}`);
    return scrapeScene(res.body.toString(), url, site);
 }
 module.exports = {
    fetchLatest,
    fetchUpcoming,
    fetchScene,
 };
--- a/src/scrapers/index.js
+++ b/src/scrapers/index.js
@@ -1,5 +1,6 @@
 'use strict';
 const blowpass = require('./blowpass');
 const brazzers = require('./brazzers');
 const julesjordan = require('./julesjordan');
 const kink = require('./kink');
@@ -10,6 +11,7 @@ const vixen = require('./vixen');
 const xempire = require('./xempire');
 module.exports = {
    blowpass,
    brazzers,
    julesjordan,
    kink,
--- a/src/scrapers/kink.js
+++ b/src/scrapers/kink.js
@@ -29,6 +29,7 @@ function scrapeLatest(html, site) {
        return {
            url,
            shootId,
            entryId: shootId,
            title,
            actors,
            date,
@@ -73,6 +74,7 @@ async function scrapeScene(html, url, shootId, ratingRes, site) {
    return {
        url,
        shootId,
        entryId: shootId,
        title,
        date,
        actors,
--- a/src/scrapers/legalporno.js
+++ b/src/scrapers/legalporno.js
@@ -25,13 +25,14 @@ function scrapeLatest(html, site) {
        const originalTitle = sceneLinkElement.text().trim(); // title attribute breaks when they use \\ escaping
        const { shootId, title } = extractTitle(originalTitle);
-        const internalId = new URL(url).pathname.split('/')[2];
+        const entryId = new URL(url).pathname.split('/')[2];
        const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
        return {
            url,
-            shootId: shootId || internalId,
+            shootId,
            entryId,
            title,
            date,
            site,
@@ -44,6 +45,7 @@ async function scrapeScene(html, url, site) {
    const originalTitle = $('h1.watchpage-title').text().trim();
    const { shootId, title } = extractTitle(originalTitle);
    const entryId = new URL(url).pathname.split('/')[2];
    const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
@@ -60,6 +62,7 @@ async function scrapeScene(html, url, site) {
    return {
        url,
        shootId,
        entryId,
        title,
        date,
        actors,
--- a/src/scrapers/private.js
+++ b/src/scrapers/private.js
@@ -32,7 +32,6 @@ function scrapeLatest(html, site) {
            date,
            rating: {
                likes,
                dislikes: 0,
            },
            site,
        };
@@ -79,7 +78,6 @@ async function scrapeScene(html, url, site) {
        tags,
        rating: {
            likes,
            dislikes: 0,
        },
        site: channelSite || site,
    };
--- a/src/scrapers/template.js
+++ b/src/scrapers/template.js
@@ -49,16 +49,20 @@ function scrapeUpcoming(html, site) {
    });
 }
-function scrapeScene(html, url, site) {
+async function scrapeScene(html, url, site) {
    const $ = cheerio.load(html, { normalizeWhitespace: true });
    const rawTags = [];
    const tags = await matchTags(rawTags);
    return {
        url,
        shootId,
        title,
        actors,
-        director: '',
+        director,
        date,
        tags,
        rating: {
            likes,
            dislikes,
@@ -68,7 +72,7 @@ function scrapeScene(html, url, site) {
    };
 }
-async function fetchLatest(site) {
+async function fetchLatest(site, page = 1) {
    const res = await bhttp.get(`${site.url}/url`);
    return scrapeLatest(res.body.toString(), site);
--- a/src/tui/formatters.js
+++ b/src/tui/formatters.js
@@ -11,6 +11,10 @@ const formatters = {
            return '\x1b[90mUnrated\x1b[0m';
        }
        if (rating.likes !== undefined && rating.dislikes === undefined) {
            return `\x1b[93m★\x1b[0m N/A  \x1b[92m▲\x1b[0m ${String(rating.likes).padEnd(3)}`;
        }
        if (rating.stars) {
            return `\x1b[93m★ ${rating.stars.toFixed(2)}\x1b[0m`;
        }