Added media support to Bang Bros scraper. Added untracked files.

2019-10-31 01:53:26 +01:00
parent 78e9f73ea0
commit 2198c7ceb0
4 changed files with 169 additions and 4 deletions
--- a/.nvmrc
+++ b/.nvmrc
@@ -0,0 +1 @@
+12.13.0
--- a/seeds/04_studios.js
+++ b/seeds/04_studios.js
@@ -0,0 +1,145 @@
+'use strict';
+
+/* eslint-disable max-len */
+exports.seed = knex => Promise.resolve()
+    .then(async () => {
+        // find network IDs
+        const networks = await knex('networks').select('*');
+        const networksMap = networks.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {});
+
+        return knex.raw(`${knex('studios').insert([
+            // LegalPorno
+            {
+                slug: 'gonzocom',
+                name: 'Gonzo.com',
+                url: 'https://www.legalporno.com/studios/gonzo_com',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'giorgiograndi',
+                name: 'Giorgio Grandi',
+                url: 'https://www.legalporno.com/studios/giorgio-grandi',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'hardpornworld',
+                name: 'Hard Porn World',
+                url: 'https://www.legalporno.com/studios/hard-porn-world',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'interracialvision',
+                name: 'Interracial Vision',
+                url: 'https://www.legalporno.com/studios/interracial-vision',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'giorgioslab',
+                name: 'Giorgio\'s Lab',
+                url: 'https://www.legalporno.com/studios/giorgio--s-lab',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'americananal',
+                name: 'American Anal',
+                url: 'https://www.legalporno.com/studios/american-anal',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'assablanca',
+                name: 'Assablanca',
+                url: 'https://www.legalporno.com/studios/assablanca',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'focus',
+                name: 'Focus',
+                url: 'https://www.legalporno.com/studios/focus',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'analforever',
+                name: 'Anal Forever',
+                url: 'https://www.legalporno.com/studios/anal-forever',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'gonzoinbrazil',
+                name: 'Gonzo in Brazil',
+                url: 'https://www.legalporno.com/studios/gonzo-in-brazil',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'mranal',
+                name: 'Mr Anal',
+                url: 'https://www.legalporno.com/studios/mr-anal',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'tarrawhite',
+                name: 'Tarra White',
+                url: 'https://www.legalporno.com/studios/tarra-white',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'sineplexsos',
+                name: 'Sineplex SOS',
+                url: 'https://www.legalporno.com/studios/sineplex-sos',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'fmodels',
+                name: 'F Models',
+                url: 'https://www.legalporno.com/studios/f-models',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'sineplexcz',
+                name: 'Sineplex CZ',
+                url: 'https://www.legalporno.com/studios/sineplex-cz',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'gg',
+                name: 'GG',
+                url: 'https://www.legalporno.com/studios/gg',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'firstgape',
+                name: 'First Gape',
+                url: 'https://www.legalporno.com/studios/first-gape',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'omargalantiproductions',
+                name: 'Omar Galanti Productions',
+                url: 'https://www.legalporno.com/studios/omar-galanti-productions',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'norestfortheass',
+                name: 'No Rest For The Ass',
+                url: 'https://www.legalporno.com/studios/no-rest-for-the-ass',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'hairygonzo',
+                name: 'Hairy Gonzo',
+                url: 'https://www.legalporno.com/studios/hairy-gonzo',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'sineplexclassic',
+                name: 'Sineplex Classic',
+                url: 'https://www.legalporno.com/studios/sineplex-classic',
+                network_id: networksMap['legalporno'],
+            },
+            {
+                slug: 'sinemale',
+                name: 'Sinemale',
+                url: 'https://www.legalporno.com/studios/sinemale',
+                network_id: networksMap['legalporno'],
+            },
+        ]).toString()} ON CONFLICT DO NOTHING`);
+    });
--- a/src/fetch-releases.js
+++ b/src/fetch-releases.js
@@ -90,8 +90,6 @@ async function storeActors(release, releaseEntry) {
    const actors = await knex('actors').whereIn('name', release.actors);
    const newActors = release.actors.filter(actorName => !actors.some(actor => actor.name === actorName));

-    console.log(release.actors, actors, newActors);
-
    const { rows: insertedActors } = newActors.length
        ? await knex.raw(`${knex('actors').insert(newActors.map(actorName => ({
            name: actorName,
--- a/src/scrapers/bangbros.js
+++ b/src/scrapers/bangbros.js
@@ -22,6 +22,13 @@ function scrapeLatest(html, site) {
        const date = moment.utc($(element).find('.thmb_mr_2 span.faTxt').text(), 'MMM D, YYYY').toDate();
        const actors = $(element).find('.cast-wrapper a.cast').map((actorIndex, actorElement) => $(actorElement).text().trim()).toArray();

+        const photoElement = $(element).find('.rollover-image');
+        const poster = `https:${photoElement.attr('data-original')}`;
+
+        const photosUrl = photoElement.attr('data-rollover-url');
+        const photosMaxIndex = photoElement.attr('data-rollover-max-index');
+        const photos = Array.from({ length: photosMaxIndex }, (val, index) => `https:${photosUrl}big${index + 1}.jpg`);
+
        const duration = moment.duration(`0:${$(element).find('.thmb_pic b.tTm').text()}`).asSeconds();

        return {
@@ -32,6 +39,8 @@ function scrapeLatest(html, site) {
            actors,
            date,
            duration,
+            poster,
+            photos,
            rating: null,
            site,
        };
@@ -50,11 +59,18 @@ async function scrapeScene(html, url, site) {
    const [siteName, ...actors] = sceneElement.find('.vdoCast a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
    const siteId = siteName.replace(/[\s']+/g, '').toLowerCase();

+    const poster = `https:${$('img#player-overlay-image').attr('src')}`;
+    const trailer = `https:${$('source[type="video/mp4"]').attr('src')}`;
+
+    const firstPhotoUrl = `https:${$('img[data-slider-index="1"]').attr('src')}`;
+    // all scenes seem to have 12 album photos available, not always included on the page
+    const photos = Array.from({ length: 12 }, (val, index) => firstPhotoUrl.replace(/big\d+/, `big${index + 1}`));
+
    const rawTags = $('.vdoTags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();

    const [channelSite, tags] = await Promise.all([
        knex('sites')
-            .where({ id: siteId })
+            .where({ slug: siteId })
            .orWhere({ name: siteName })
            .first(),
        matchTags(rawTags),
@@ -70,6 +86,11 @@ async function scrapeScene(html, url, site) {
        description,
        actors,
        tags,
+        poster,
+        photos,
+        trailer: {
+            src: trailer,
+        },
        rating: {
            stars,
        },
@@ -78,7 +99,7 @@ async function scrapeScene(html, url, site) {
 }

 async function fetchLatest(site, page = 1) {
-    const res = await bhttp.get(`https://bangbros.com/websites/${site.id}/${page}`);
+    const res = await bhttp.get(`https://bangbros.com/websites/${site.slug}/${page}`);

    return scrapeLatest(res.body.toString(), site);
 }