Improved site and network pages. Fixed various issues.

This commit is contained in:
2019-11-12 01:22:20 +01:00
parent 3c76d39301
commit 832e96ced1
118 changed files with 327 additions and 224 deletions

View File

@@ -5,7 +5,7 @@ const bhttp = require('bhttp');
const cheerio = require('cheerio');
const moment = require('moment');
const knex = require('../knex');
const fetchSites = require('../sites');
const { matchTags } = require('../tags');
function scrape(html, site, upcoming) {
@@ -75,6 +75,7 @@ async function scrapeScene(html, url, site) {
const siteElement = $('.niche-site-logo');
const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
const siteName = siteElement.attr('title');
const siteSlug = siteName.replace(/\s+/g, '');
const rawTags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
@@ -84,10 +85,7 @@ async function scrapeScene(html, url, site) {
const [tags, channelSite] = await Promise.all([
matchTags(rawTags),
knex('sites')
.where({ url: siteUrl })
.orWhere({ name: siteName })
.first(),
site.isFallback ? fetchSites(null, siteSlug, siteName, siteUrl) : site,
]);
return {

View File

@@ -54,7 +54,7 @@ function scrapeLatest(html, site) {
return sceneElements.reduce((acc, element) => {
const siteUrl = element.querySelector('.help-block').textContent;
if (siteUrl.toLowerCase() !== new URL(site.url).host) {
if (`www.${siteUrl.toLowerCase()}` !== new URL(site.url).host) {
// different dogfart site
return acc;
}

View File

@@ -22,8 +22,8 @@ function scrapePhotos(html) {
const lockedThumbnails = $('.preview .imgLink.lockedPicture img')
.map((photoIndex, photoElement) => $(photoElement)
.attr('src')
.replace('_tb.jpg', '.jpg'))
.attr('src'))
// .replace('_tb.jpg', '.jpg')) does not always work
.toArray();
return unlockedPhotos.concat(lockedThumbnails);