From f10e4af29b09a11eebb6292b6951c68d2347c45c Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Thu, 5 Mar 2020 03:44:27 +0100 Subject: [PATCH] Allowing scrapers to force channel allocation attempt. Added Hush Pass subsite handling to Hussie Pass scraper. --- seeds/02_sites.js | 12 +++++- src/releases.js | 8 ++-- src/scrapers/hussiepass.js | 80 +++++++++++++++++++++----------------- 3 files changed, 58 insertions(+), 42 deletions(-) diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 62cfc5f6..fc0ce0ad 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -2072,14 +2072,22 @@ const sites = [ { slug: 'shotherfirst', name: 'Shot Her First', - url: 'https://hushpass.com/tgp/', + url: 'https://shotherfirst.com', network: 'hussiepass', + parameters: { + latest: 'https://hushpass.com/t1/categories/shot-her-first_%d_d.html', + media: 'https://hushpass.com', + t1: true, + }, }, { slug: 'hushpass', - name: 'hushpass', + name: 'Hush Pass', url: 'https://hushpass.com', network: 'hussiepass', + parameters: { + t1: true, + }, }, { slug: 'eyeontheguy', diff --git a/src/releases.js b/src/releases.js index 3f62e660..abb4b22d 100644 --- a/src/releases.js +++ b/src/releases.js @@ -154,7 +154,7 @@ function curateReleases(releases) { } async function attachChannelSite(release) { - if (!release.site?.isFallback) { + if (!release.site?.isFallback && !release.channel?.force) { return release; } @@ -163,8 +163,8 @@ async function attachChannelSite(release) { } const [site] = await fetchSites({ - name: release.channel, - slug: release.channel, + name: release.channel.name || release.channel, + slug: release.channel.slug || release.channel, }); if (site) { @@ -175,7 +175,7 @@ async function attachChannelSite(release) { } try { - const urlSite = await findSiteByUrl(release.channel); + const urlSite = await findSiteByUrl(release.channel.url || release.channel); return { ...release, diff --git a/src/scrapers/hussiepass.js b/src/scrapers/hussiepass.js index a8b739d4..e547f0fa 100644 --- a/src/scrapers/hussiepass.js +++ b/src/scrapers/hussiepass.js @@ -1,10 +1,30 @@ 'use strict'; -// const util = require('util'); +const util = require('util'); const { get, geta, fd } = require('../utils/q'); const slugify = require('../utils/slugify'); +function extractPoster(posterPath, site, baseRelease) { + if (posterPath && !/400.jpg/.test(posterPath)) { + const poster = `${site.parameters?.media || site.url}${posterPath}`; + const posterSources = [ + poster, + // upscaled + poster.replace('-1x', '-2x'), + poster.replace('-1x', '-3x'), + ]; + + if (baseRelease?.poster) { + return [posterSources, [baseRelease.poster]]; + } + + return [posterSources, []]; + } + + return [null, []]; +} + function scrapeLatest(scenes, site) { return scenes.map(({ q, qu, qd, ql }) => { const release = {}; @@ -23,7 +43,7 @@ function scrapeLatest(scenes, site) { q('.modelimg img', 'src0_3x'), q('.modelimg img', 'src0_2x'), q('.modelimg img', 'src0_1x'), - ].filter(Boolean).map(src => `${site.url}${src}`); + ].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`); return release; }); @@ -45,7 +65,7 @@ function scrapeLatestT1(scenes, site) { const posterPath = q('.img-div img', 'src0_1x') || qi('img.video_placeholder'); if (posterPath) { - const poster = /^http/.test(posterPath) ? posterPath : `${site.url}${posterPath}`; + const poster = /^http/.test(posterPath) ? posterPath : `${site.parameters?.media || site.url}${posterPath}`; release.poster = [ poster.replace('-1x', '-3x'), @@ -59,7 +79,7 @@ function scrapeLatestT1(scenes, site) { } function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { - const release = {}; + const release = { url }; release.entryId = html.match(/set-target-(\d+)/)[1]; release.title = q('.centerwrap h2', true); @@ -71,21 +91,10 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { release.actors = qa('.modelname a', true); const posterPath = html.match(/poster="([\w-/.]+)"/)?.[1]; - - if (posterPath && !/400.jpg/.test(posterPath)) { - const poster = `${site.url}${posterPath}`; - release.poster = [ - poster, - // upscaled - poster.replace('-1x', '-2x'), - poster.replace('-1x', '-3x'), - ]; - - if (baseRelease?.poster) release.photos = [baseRelease.poster]; - } + [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); const trailerPath = html.match(/\/trailers\/.*.mp4/); - if (trailerPath) release.trailer = { src: `${site.url}${trailerPath}` }; + if (trailerPath) release.trailer = { src: `${site.parameters?.media || site.url}${trailerPath}` }; const stars = q('.modelrates + p', true).match(/\d.\d/)?.[0]; if (stars) release.stars = Number(stars); @@ -94,7 +103,7 @@ function scrapeScene({ html, q, qa, qd, ql }, site, url, baseRelease) { } function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { - const release = {}; + const release = { url }; release.title = q('.trailer-section-head .section-title', true); release.description = qtx('.row .update-info-block'); @@ -111,39 +120,38 @@ function scrapeSceneT1({ html, q, qa, qd, ql, qtx }, site, url, baseRelease) { q(el, 'img', 'src0_3x'), q(el, 'img', 'src0_2x'), q(el, 'img', 'src0_1x'), - ].filter(Boolean).map(src => `${site.url}${src}`), + ].filter(Boolean).map(src => `${site.parameters?.media || site.url}${src}`), })); release.tags = qa('.tags a', true); - const posterPath = html.match(/poster="(.*\.jpg)/)?.[1]; - - if (posterPath && !/400.jpg/.test(posterPath)) { - const poster = `${site.url}${posterPath}`; - release.poster = [ - poster, - // upscaled - poster.replace('-1x', '-2x'), - poster.replace('-1x', '-3x'), - ]; - - if (baseRelease?.poster) release.photos = [baseRelease.poster]; - } + // const posterPath = html.match(/poster="(.*\.jpg)/)?.[1]; + const posterPath = q('.player-thumb img', 'src0_1x'); + [release.poster, release.photos] = extractPoster(posterPath, site, baseRelease); const trailer = html.match(/ /Shot Her First/.test(tag)); + if (channel) { + release.channel = { + force: true, + slug: slugify(channel, { delimiter: '' }), + }; + } + return release; } async function fetchLatest(site, page = 1) { - const url = site.parameters?.t1 - ? `${site.url}/t1/categories/movies_${page}_d.html` - : `${site.url}/categories/movies_${page}_d.html`; + const url = (site.parameters?.latest && util.format(site.parameters.latest, page)) + || (site.parameters?.t1 && `${site.url}/t1/categories/movies_${page}_d.html`) + || `${site.url}/categories/movies_${page}_d.html`; + const qLatest = await geta(url, '.modelfeature, .item-video'); if (!qLatest) return null;