Added teaser support. Added Score network with scraper for Scoreland. Improved q. Added assets.

2020-02-02 05:14:58 +01:00
parent 14e5695b6e
commit a97c6defca
52 changed files with 4291 additions and 3435 deletions
--- a/src/scrapers/boobpedia.js
+++ b/src/scrapers/boobpedia.js
@@ -5,7 +5,7 @@ const bhttp = require('bhttp');
 const { ex } = require('../utils/q');

 function scrapeProfile(html) {
-    const { q, qa, qd, qi, qu } = ex(html); /* eslint-disable-line object-curly-newline */
+    const { q, qa, qd, qi, qus } = ex(html); /* eslint-disable-line object-curly-newline */
    const profile = {};

    const bio = qa('.infobox tr[valign="top"]')
@@ -59,19 +59,15 @@ function scrapeProfile(html) {
    if (bio.Blood_group) profile.blood = bio.Blood_group;
    if (bio.Also_known_as) profile.aliases = bio.Also_known_as.split(', ');

-    const avatars = qi('.image img');
+    const avatarThumbPath = qi('.image img');

-    if (avatars.length > 0) {
-        const [avatarThumbPath] = avatars;
+    if (avatarThumbPath && !/NoImageAvailable/.test(avatarThumbPath)) {
+        const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');

-        if (!/NoImageAvailable/.test(avatarThumbPath)) {
-            const avatarPath = avatarThumbPath.slice(0, avatarThumbPath.lastIndexOf('/')).replace('thumb/', '');
-
-            profile.avatar = `http://www.boobpedia.com${avatarPath}`;
-        }
+        profile.avatar = `http://www.boobpedia.com${avatarPath}`;
    }

-    profile.social = qu('.infobox a.external');
+    profile.social = qus('.infobox a.external');

    return profile;
 }
--- a/src/scrapers/naughtyamerica.js
+++ b/src/scrapers/naughtyamerica.js
@@ -101,7 +101,7 @@ function scrapeScene(html, url, site) {
 }

 function scrapeProfile(html) {
-    const { q, qu } = ex(html);
+    const { q, qus } = ex(html);
    const profile = {};

    profile.description = q('.bio_about_text', true);
@@ -109,7 +109,7 @@ function scrapeProfile(html) {
    const avatar = q('img.performer-pic', 'src');
    if (avatar) profile.avatar = `https:${avatar}`;

-    profile.releases = qu('.scene-item > a:first-child');
+    profile.releases = qus('.scene-item > a:first-child');

    return profile;
 }
--- a/src/scrapers/score.js
+++ b/src/scrapers/score.js
@@ -0,0 +1,206 @@
+'use strict';
+
+const bhttp = require('bhttp');
+
+const { ex, exa } = require('../utils/q');
+const slugify = require('../utils/slugify');
+const { heightToCm, lbsToKg } = require('../utils/convert');
+
+function scrapePhotos(html) {
+    const { qis } = ex(html, '#photos-page');
+    const photos = qis('img');
+
+    return photos.map(photo => [
+        photo
+            .replace('x_800', 'x_xl')
+            .replace('_tn', ''),
+        photo,
+    ]);
+}
+
+async function fetchPhotos(url) {
+    const res = await bhttp.get(url);
+
+    if (res.statusCode === 200) {
+        return scrapePhotos(res.body.toString(), url);
+    }
+
+    return [];
+}
+
+function scrapeAll(html) {
+    return exa(html, '.container .video').map(({ q, qa, qd, ql }) => {
+        const release = {};
+
+        const linkEl = q('a.i-title');
+
+        release.title = linkEl.textContent.trim();
+
+        const url = new URL(linkEl.href);
+        release.url = `${url.origin}${url.pathname}`;
+
+        // this is a photo album, not a scene (used for profiles)
+        if (/photos\//.test(url)) return null;
+
+        [release.entryId] = url.pathname.split('/').slice(-2);
+
+        release.date = qd('.i-date', 'MMM DD', /\w+ \d{1,2}$/);
+        release.actors = qa('.i-model', true);
+        release.duration = ql('.i-amount');
+
+        const posterEl = q('.item-img img');
+
+        if (posterEl) {
+            release.poster = `https:${posterEl.src}`;
+            release.teaser = {
+                src: `https:${posterEl.dataset.gifPreview}`,
+            };
+        }
+
+        return release;
+    }).filter(Boolean);
+}
+
+async function scrapeScene(html, url) {
+    const { q, qa, qd, ql, qu, qp, qt } = ex(html, '#videos-page');
+    const release = {};
+
+    [release.entryId] = new URL(url).pathname.split('/').slice(-2);
+
+    release.title = q('#breadcrumb-top + h1', true);
+    release.description = q('.p-desc', true);
+
+    release.actors = qa('a[href*=models]', true);
+    release.tags = qa('a[href*=tag]', true);
+
+    const dateEl = qa('.value').find(el => /\w+ \d+\w+, \d{4}/.test(el.textContent));
+    release.date = qd(dateEl, null, 'MMMM Do, YYYY');
+
+    const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
+    release.duration = ql(durationEl);
+
+    const photosUrl = qu('a[href*=photos]');
+    release.photos = await fetchPhotos(photosUrl);
+    release.poster = qp('video'); // _800.jpg is larger than _xl.jpg in landscape
+
+    const trailer = qt();
+    release.trailer = [
+        {
+            // don't rely on trailer always being 720p by default
+            src: trailer.replace(/\d+p\.mp4/, '720p.mp4'),
+            quality: 720,
+        },
+        {
+            src: trailer.replace(/\d+p\.mp4/, '360p.mp4'),
+            quality: 360,
+        },
+    ];
+
+    const stars = q('.rate-box').dataset.score;
+    if (stars) release.rating = { stars };
+
+    return release;
+}
+
+function scrapeModels(html, actorName) {
+    const { qa } = ex(html);
+    const model = qa('.model a').find(link => link.title === actorName);
+
+    return model?.href || null;
+}
+
+function scrapeProfile(html) {
+    const { q, qa, qi } = ex(html, '#model-page');
+    const profile = { gender: 'female' };
+
+    const bio = qa('.stat').reduce((acc, el) => {
+        const prop = q(el, '.label', true).slice(0, -1);
+        const key = slugify(prop, false, '_');
+        const value = q(el, '.value', true);
+
+        return {
+            ...acc,
+            [key]: value,
+        };
+    }, {});
+
+    if (bio.location) profile.residencePlace = bio.location.replace('Czech Repulic', 'Czech Republic'); // see Laura Lion
+
+    if (bio.birthday) {
+        const birthMonth = bio.birthday.match(/^\w+/)[0].toLowerCase();
+        const [birthDay] = bio.birthday.match(/\d+/);
+
+        profile.birthday = [birthMonth, birthDay]; // currently unused, not to be confused with birthdate
+    }
+
+    if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
+    if (bio.hair_color) profile.hair = bio.hair_color;
+
+    if (bio.height) profile.height = heightToCm(bio.height);
+    if (bio.weight) profile.weight = lbsToKg(bio.weight);
+
+    if (bio.bra_size) profile.bust = bio.bra_size;
+    if (bio.measurements) [, profile.waist, profile.hip] = bio.measurements.split('-');
+
+    if (bio.occupation) profile.occupation = bio.occupation;
+
+    const avatar = qi('img');
+    if (avatar) profile.avatar = avatar;
+
+    const releases = ex(html, '#model-page + .container');
+    profile.releases = scrapeAll(releases.document.outerHTML);
+
+    return profile;
+}
+
+async function fetchLatest(site, page = 1) {
+    const url = `${site.url}/big-boob-videos?page=${page}`;
+    const res = await bhttp.get(url);
+
+    if (res.statusCode === 200) {
+        return scrapeAll(res.body.toString(), site);
+    }
+
+    return null;
+}
+
+async function fetchScene(url, site) {
+    const res = await bhttp.get(url);
+
+    if (res.statusCode === 200) {
+        return scrapeScene(res.body.toString(), url, site);
+    }
+
+    return null;
+}
+
+async function fetchProfile(actorName, scraperSlug, page = 1) {
+    const letter = actorName.charAt(0).toUpperCase();
+
+    const url = `https://www.scoreland.com/big-boob-models/browse/${letter}/?page=${page}`;
+    const res = await bhttp.get(url);
+
+    if (res.statusCode === 200) {
+        const actorUrl = scrapeModels(res.body.toString(), actorName);
+
+        if (actorUrl) {
+            const actorRes = await bhttp.get(actorUrl);
+
+            if (actorRes.statusCode === 200) {
+                return scrapeProfile(actorRes.body.toString());
+            }
+
+            return null;
+        }
+
+        return fetchProfile(actorName, scraperSlug, page + 1);
+    }
+
+    return null;
+}
+
+module.exports = {
+    fetchLatest,
+    fetchScene,
+    fetchProfile,
+};
--- a/src/scrapers/scrapers.js
+++ b/src/scrapers/scrapers.js
@@ -34,6 +34,7 @@ const men = require('./men');
 const metrohd = require('./metrohd');
 const mofos = require('./mofos');
 const naughtyamerica = require('./naughtyamerica');
+const score = require('./score');
 const twentyonesextury = require('./21sextury');
 const xempire = require('./xempire');
 const wicked = require('./wicked');
@@ -78,6 +79,7 @@ module.exports = {
        puretaboo,
        naughtyamerica,
        realitykings,
+        score,
        teamskeet,
        vixen,
        vogov,
@@ -109,6 +111,7 @@ module.exports = {
        naughtyamerica,
        pornhub,
        realitykings,
+        score,
        transangels,
        wicked,
        xempire,
--- a/src/scrapers/vogov.js
+++ b/src/scrapers/vogov.js
@@ -116,7 +116,7 @@ function scrapeLatest(html) {
 }

 function scrapeScene(html, url) {
-    const { q, qa, qd, qu, ql, qm } = ex(html);
+    const { q, qa, qd, qus, ql, qm } = ex(html);
    const release = { url };

    // release.entryId = slugify(release.title);
@@ -131,7 +131,7 @@ function scrapeScene(html, url) {
    release.actors = qa('.info-video-models a', true);
    release.tags = qa('.info-video-category a', true);

-    release.photos = qu('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
+    release.photos = qus('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
    release.poster = qm('meta[property="og:image"');

    if (!release.poster) {