forked from DebaucheryLibrarian/traxxx
Hush scraper uses children from entity argument for filter regexp, instead of making its own database request.
This commit is contained in:
parent
278b74e78c
commit
c3d771c8fc
|
@ -230,24 +230,26 @@ const networks = [
|
||||||
{
|
{
|
||||||
slug: 'hussiepass',
|
slug: 'hussiepass',
|
||||||
name: 'Hussie Pass',
|
name: 'Hussie Pass',
|
||||||
url: 'http://www.hussiepass.com',
|
url: 'https://www.hussiepass.com',
|
||||||
parent: 'hush',
|
parent: 'hush',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'hushpass',
|
slug: 'hushpass',
|
||||||
name: 'Hush Pass',
|
name: 'Hush Pass',
|
||||||
url: 'http://www.hushpass.com',
|
url: 'https://www.hushpass.com',
|
||||||
parent: 'hush',
|
parent: 'hush',
|
||||||
parameters: {
|
parameters: {
|
||||||
|
t1: true,
|
||||||
sequential: true,
|
sequential: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'interracialpass',
|
slug: 'interracialpass',
|
||||||
name: 'Interracial Pass',
|
name: 'Interracial Pass',
|
||||||
url: 'http://www.interracialpass.com',
|
url: 'https://www.interracialpass.com',
|
||||||
parent: 'hush',
|
parent: 'hush',
|
||||||
parameters: {
|
parameters: {
|
||||||
|
t1: true,
|
||||||
sequential: true,
|
sequential: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -2,19 +2,10 @@
|
||||||
|
|
||||||
const util = require('util');
|
const util = require('util');
|
||||||
|
|
||||||
const knex = require('../knex');
|
|
||||||
const { get, geta, ed, formatDate, ctxa } = require('../utils/q');
|
const { get, geta, ed, formatDate, ctxa } = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
const { feetInchesToCm } = require('../utils/convert');
|
const { feetInchesToCm } = require('../utils/convert');
|
||||||
|
|
||||||
async function getChannelRegExp(site) {
|
|
||||||
if (!['hushpass', 'interracialpass'].includes(site.parent.slug)) return null;
|
|
||||||
|
|
||||||
const sites = await knex('entities').where('parent_id', site.parent.id);
|
|
||||||
|
|
||||||
return new RegExp(sites.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
|
||||||
}
|
|
||||||
|
|
||||||
function deriveEntryId(release) {
|
function deriveEntryId(release) {
|
||||||
if (release.date && release.title) {
|
if (release.date && release.title) {
|
||||||
return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
return `${slugify(formatDate(release.date, 'YYYY-MM-DD'))}-${slugify(release.title)}`;
|
||||||
|
@ -160,7 +151,7 @@ function scrapeScene({ html, qu }, site, url, baseRelease) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
function scrapeSceneT1({ html, qu }, site, url, baseRelease) {
|
||||||
const release = { url };
|
const release = { url };
|
||||||
|
|
||||||
release.title = qu.q('.trailer-section-head .section-title', true);
|
release.title = qu.q('.trailer-section-head .section-title', true);
|
||||||
|
@ -187,16 +178,12 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
||||||
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
const stars = qu.q('.update-rating', true).match(/\d.\d/)?.[0];
|
||||||
if (stars) release.stars = Number(stars);
|
if (stars) release.stars = Number(stars);
|
||||||
|
|
||||||
console.log(channelRegExp, site);
|
if (site.type === 'network') {
|
||||||
|
const channelRegExp = new RegExp(site.children.map(channel => channel.parameters?.match || channel.name).join('|'), 'i');
|
||||||
if (channelRegExp) {
|
|
||||||
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
const channel = release.tags.find(tag => channelRegExp.test(tag));
|
||||||
|
|
||||||
if (channel) {
|
if (channel) {
|
||||||
release.channel = {
|
release.channel = slugify(channel, '');
|
||||||
force: true,
|
|
||||||
slug: slugify(channel, ''),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,12 +359,11 @@ async function fetchLatest(site, page = 1, include, { uniqueReleases, duplicateR
|
||||||
return scrapeAll(res.items, site, uniqueReleases);
|
return scrapeAll(res.items, site, uniqueReleases);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site, baseRelease, include, beforeFetchLatest) {
|
async function fetchScene(url, site, baseRelease) {
|
||||||
const channelRegExp = beforeFetchLatest || await getChannelRegExp(site);
|
|
||||||
const res = await get(url);
|
const res = await get(url);
|
||||||
|
|
||||||
if (!res.ok) return res.status;
|
if (!res.ok) return res.status;
|
||||||
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease, channelRegExp);
|
if (site.parameters?.t1) return scrapeSceneT1(res.item, site, url, baseRelease);
|
||||||
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
if (site.parameters?.tour) return scrapeSceneTour(res.item, site, url, baseRelease);
|
||||||
|
|
||||||
return scrapeScene(res.item, site, url, baseRelease);
|
return scrapeScene(res.item, site, url, baseRelease);
|
||||||
|
@ -405,7 +391,6 @@ async function fetchProfile({ name: actorName }, { site }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
beforeFetchLatest: getChannelRegExp,
|
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
fetchProfile,
|
fetchProfile,
|
||||||
|
|
Loading…
Reference in New Issue