From c3d771c8fc17233efd61d9b50d0c6b5f4390d5ba Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 23 Aug 2020 03:31:37 +0200 Subject: [PATCH] Hush scraper uses children from entity argument for filter regexp, instead of making its own database request. --- seeds/01_networks.js | 8 +++++--- src/scrapers/hush.js | 27 ++++++--------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/seeds/01_networks.js b/seeds/01_networks.js index b1ed6d1a..0530914d 100644 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -230,24 +230,26 @@ const networks = [ { slug: 'hussiepass', name: 'Hussie Pass', - url: 'http://www.hussiepass.com', + url: 'https://www.hussiepass.com', parent: 'hush', }, { slug: 'hushpass', name: 'Hush Pass', - url: 'http://www.hushpass.com', + url: 'https://www.hushpass.com', parent: 'hush', parameters: { + t1: true, sequential: true, }, }, { slug: 'interracialpass', name: 'Interracial Pass', - url: 'http://www.interracialpass.com', + url: 'https://www.interracialpass.com', parent: 'hush', parameters: { + t1: true, sequential: true, }, }, diff --git a/src/scrapers/hush.js b/src/scrapers/hush.js index cdb8c750..28ffdf11 100644 --- a/src/scrapers/hush.js +++ b/src/scrapers/hush.js @@ -2,19 +2,10 @@ const util = require('util'); -const knex = require('../knex'); const { get, geta, ed, formatDate, ctxa } = require('../utils/q'); const slugify = require('../utils/slugify'); const { feetInchesToCm } = require('../utils/convert'); -async function getChannelRegExp(site) { - if (!['hushpass', 'interracialpass'].includes(site.parent.slug)) return null; - - const sites = await knex('entities').where('parent_id', site.parent.id); - - return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); -} - function deriveEntryId(release) { if (release.date && release.title) { return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; @@ -160,7 +151,7 @@ function scrapeScene({ html, qu }, site, url, baseRelease) { return release; } -function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) { +function scrapeSceneT1({ html, qu }, site, url, baseRelease) { const release = { url }; release.title = qu.q('.trailer-section-head .section-title', true); @@ -187,16 +178,12 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) { const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0]; if (stars) release.stars = Number(stars); - console.log(channelRegExp, site); - - if (channelRegExp) { + if (site.type === 'network') { + const channelRegExp = new RegExp(site.children.map(channel => channel.parameters?.match || channel.name).join('|'), 'i'); const channel = release.tags.find(tag => channelRegExp.test(tag)); if (channel) { - release.channel = { - force: true, - slug: slugify(channel, ''), - }; + release.channel = slugify(channel, ''); } } @@ -372,12 +359,11 @@ async function fetchLatest(site, page = 1, include, { uniqueReleases, duplicateR return scrapeAll(res.items, site, uniqueReleases); } -async function fetchScene(url, site, baseRelease, include, beforeFetchLatest) { - const channelRegExp = beforeFetchLatest || await getChannelRegExp(site); +async function fetchScene(url, site, baseRelease) { const res = await get(url); if (!res.ok) return res.status; - if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp); + if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease); if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease); return scrapeScene(res.item, site, url, baseRelease); @@ -405,7 +391,6 @@ async function fetchProfile({ name: actorName }, { site }) { } module.exports = { - beforeFetchLatest: getChannelRegExp, fetchLatest, fetchScene, fetchProfile,