Added Sperm Swallowers and The Ass Factory to Jules Jordan scraper. Added photo source.

2019-12-12 05:18:43 +01:00 · 2019-12-12 05:18:43 +01:00 · a03a00f5d9
parent ad1a0376e7
commit a03a00f5d9
8 changed files with 43 additions and 11 deletions
--- a/public/img/logos/julesjordan/julesjordan.png
+++ b/public/img/logos/julesjordan/julesjordan.png
--- a/public/img/logos/julesjordan/network.png
+++ b/public/img/logos/julesjordan/network.png
--- a/public/img/logos/julesjordan/spermswallowers.png
+++ b/public/img/logos/julesjordan/spermswallowers.png
--- a/public/img/logos/julesjordan/theassfactory.png
+++ b/public/img/logos/julesjordan/theassfactory.png
--- a/seeds/01_sites.js
+++ b/seeds/01_sites.js
@ -1045,7 +1045,18 @@ function getSites(networksMap) {
            name: 'Jules Jordan',
            url: 'https://www.julesjordan.com',
            description: 'Jules Jordan\'s Official Membership Site',
-            parameters: JSON.stringify({ independent: true }),
+            network_id: networksMap.julesjordan,
+        },
+        {
+            slug: 'theassfactory',
+            name: 'The Ass Factory',
+            url: 'https://www.theassfactory.com',
+            network_id: networksMap.julesjordan,
+        },
+        {
+            slug: 'spermswallowers',
+            name: 'Sperm Swallowers',
+            url: 'https://www.spermswallowers.com',
            network_id: networksMap.julesjordan,
        },
        // KELLY MADISON MEDIA
--- a/src/media.js
+++ b/src/media.js
@ -112,8 +112,16 @@ async function filterHashDuplicates(files, domains = ['releases'], roles = ['pho

 async function fetchPhoto(photoUrl, index, identifier, attempt = 1) {
    if (Array.isArray(photoUrl)) {
-        return fetchPhoto(photoUrl[0], index, identifier);
-        // return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => fetchPhoto(url, index, identifier)), Promise.reject());
+        // return fetchPhoto(photoUrl[0], index, identifier);
+        return photoUrl.reduce(async (outcome, url) => outcome.catch(async () => {
+            const photo = await fetchPhoto(url, index, identifier);
+
+            if (photo) {
+                return photo;
+            }
+
+            throw new Error('Photo not available');
+        }), Promise.reject(new Error()));
    }

    try {
@ -214,6 +222,8 @@ async function storePhotos(release, releaseId) {
        concurrency: 10,
    }).filter(photo => photo);

+    console.log(metaFiles);
+
    const uniquePhotos = await filterHashDuplicates(metaFiles, 'releases', 'photo', `(${release.site.name}, ${releaseId}) "${release.title}"`);
    const savedPhotos = await savePhotos(uniquePhotos, release, releaseId);

--- a/src/scrape-sites.js
+++ b/src/scrape-sites.js
@ -142,8 +142,6 @@ async function scrapeReleases() {
        concurrency: 5,
    });

-    console.log(scrapedReleases.flat(2).map(release => release.movie));
-
    if (argv.save) {
        await storeReleases(scrapedReleases.flat(2));
    }
--- a/src/scrapers/julesjordan.js
+++ b/src/scrapers/julesjordan.js
@ -22,8 +22,12 @@ function scrapePhotos(html) {
        .map((photoElement) => {
            const src = $(photoElement).attr('src');

-            // high res often available in photos/ directory, but not always, provide original as fallback
-            return [src.replace('thumbs/', 'photos/'), src];
+            // high res often available in alternative directories, but not always, provide original as fallback
+            return [
+                src.replace('thumbs/', 'photos/'),
+                src.replace('thumbs/', '1024watermarked/'),
+                src,
+            ];
        });

    return photos;
@ -60,7 +64,14 @@ function scrapeLatest(html, site) {
    return scenesElements.map((element) => {
        const photoElement = $(element).find('a img.thumbs');
        const photoCount = Number(photoElement.attr('cnt'));
-        const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => photoElement.attr(`src${index}_1x`)).filter(photoUrl => photoUrl !== undefined);
+        const [poster, ...photos] = Array.from({ length: photoCount }, (value, index) => {
+            const src = photoElement.attr(`src${index}_1x`) || photoElement.attr(`src${index}`);
+
+            if (!src) return null;
+            if (src.match(/^http/)) return src;
+
+            return `${site.url}${src}`;
+        }).filter(photoUrl => photoUrl);

        const sceneLinkElement = $(element).children('a').eq(1);
        const url = sceneLinkElement.attr('href');
@ -116,7 +127,8 @@ function scrapeUpcoming(html, site) {
            .toDate();

        const photoElement = $(element).find('a img.thumbs');
-        const poster = photoElement.attr('src');
+        const posterPath = photoElement.attr('src');
+        const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;

        const videoClass = $(element).find('.update_thumbnail div').attr('class');
        const videoScript = $(element).find(`script:contains(${videoClass})`).html();
@ -159,7 +171,8 @@ async function scrapeScene(html, url, site) {
        .html()
        .split('\n');

-    const poster = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
+    const posterPath = infoLines.find(line => line.match('useimage')).replace('useimage = "', '').slice(0, -2);
+    const poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;

    const trailerLine = infoLines.find(line => line.match('movie["Trailer_720"]'));
    const trailer = trailerLine.slice(trailerLine.indexOf('path:"') + 6, trailerLine.indexOf('",movie'));
@ -167,7 +180,7 @@ async function scrapeScene(html, url, site) {
    const photos = await getPhotos(entryId, site);

    const tags = $('.update_tags a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
-    const movie = $('.update_dvds a').href;
+    const movie = $('.update_dvds a').attr('href');

    return {
        url,