Hush scraper uses children from entity argument for filter regexp, instead of making its own database request.

This commit is contained in:
DebaucheryLibrarian 2020-08-23 03:31:37 +02:00
parent 278b74e78c
commit c3d771c8fc
2 changed files with 11 additions and 24 deletions

View File

@ -230,24 +230,26 @@ const networks = [
{ {
slug: 'hussiepass', slug: 'hussiepass',
name: 'Hussie Pass', name: 'Hussie Pass',
url: 'http://www.hussiepass.com', url: 'https://www.hussiepass.com',
parent: 'hush', parent: 'hush',
}, },
{ {
slug: 'hushpass', slug: 'hushpass',
name: 'Hush Pass', name: 'Hush Pass',
url: 'http://www.hushpass.com', url: 'https://www.hushpass.com',
parent: 'hush', parent: 'hush',
parameters: { parameters: {
t1: true,
sequential: true, sequential: true,
}, },
}, },
{ {
slug: 'interracialpass', slug: 'interracialpass',
name: 'Interracial Pass', name: 'Interracial Pass',
url: 'http://www.interracialpass.com', url: 'https://www.interracialpass.com',
parent: 'hush', parent: 'hush',
parameters: { parameters: {
t1: true,
sequential: true, sequential: true,
}, },
}, },

View File

@ -2,19 +2,10 @@
const util = require('util'); const util = require('util');
const knex = require('../knex');
const { get, geta, ed, formatDate, ctxa } = require('../utils/q'); const { get, geta, ed, formatDate, ctxa } = require('../utils/q');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
const { feetInchesToCm } = require('../utils/convert'); const { feetInchesToCm } = require('../utils/convert');
async function getChannelRegExp(site) {
if (!['hushpass', 'interracialpass'].includes(site.parent.slug)) return null;
const sites = await knex('entities').where('parent_id', site.parent.id);
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
}
function deriveEntryId(release) { function deriveEntryId(release) {
if (release.date && release.title) { if (release.date && release.title) {
return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`; return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
@ -160,7 +151,7 @@ function scrapeScene({ html, qu }, site, url, baseRelease) {
return release; return release;
} }
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) { function scrapeSceneT1({ html, qu }, site, url, baseRelease) {
const release = { url }; const release = { url };
release.title = qu.q('.trailer-section-head .section-title', true); release.title = qu.q('.trailer-section-head .section-title', true);
@ -187,16 +178,12 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0]; const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
if (stars) release.stars = Number(stars); if (stars) release.stars = Number(stars);
console.log(channelRegExp, site); if (site.type === 'network') {
const channelRegExp = new RegExp(site.children.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
if (channelRegExp) {
const channel = release.tags.find(tag => channelRegExp.test(tag)); const channel = release.tags.find(tag => channelRegExp.test(tag));
if (channel) { if (channel) {
release.channel = { release.channel = slugify(channel, '');
force: true,
slug: slugify(channel, ''),
};
} }
} }
@ -372,12 +359,11 @@ async function fetchLatest(site, page = 1, include, { uniqueReleases, duplicateR
return scrapeAll(res.items, site, uniqueReleases); return scrapeAll(res.items, site, uniqueReleases);
} }
async function fetchScene(url, site, baseRelease, include, beforeFetchLatest) { async function fetchScene(url, site, baseRelease) {
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
const res = await get(url); const res = await get(url);
if (!res.ok) return res.status; if (!res.ok) return res.status;
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp); if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease);
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease); if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
return scrapeScene(res.item, site, url, baseRelease); return scrapeScene(res.item, site, url, baseRelease);
@ -405,7 +391,6 @@ async function fetchProfile({ name: actorName }, { site }) {
} }
module.exports = { module.exports = {
beforeFetchLatest: getChannelRegExp,
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile, fetchProfile,