forked from DebaucheryLibrarian/traxxx
Removed site and tag matching from Dogfart scraper. Filtering duplicate tag associations before insert attempt.
This commit is contained in:
@@ -6,9 +6,6 @@ const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const knex = require('../knex');
|
||||
const { matchTags } = require('../tags');
|
||||
|
||||
async function getPhoto(url) {
|
||||
const res = await bhttp.get(url);
|
||||
const html = res.body.toString();
|
||||
@@ -92,7 +89,7 @@ async function scrapeScene(html, url, site) {
|
||||
.replace('...read more', '')
|
||||
.trim();
|
||||
|
||||
const siteSlug = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const channel = document.querySelector('.site-name').textContent.split('.')[0].toLowerCase();
|
||||
const date = new Date(document.querySelector('meta[itemprop="uploadDate"]').content);
|
||||
const duration = moment
|
||||
.duration(`00:${document
|
||||
@@ -110,17 +107,7 @@ async function scrapeScene(html, url, site) {
|
||||
const photos = await getPhotos(`${origin}${pathname}${lastPhotosUrl}`, site, url);
|
||||
|
||||
const stars = Math.floor(Number(document.querySelector('span[itemprop="average"]').textContent) / 2);
|
||||
const rawTags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
|
||||
const [channelSite, tags] = await Promise.all([
|
||||
site.isFallback
|
||||
? knex('sites')
|
||||
.where({ slug: siteSlug })
|
||||
.orWhere({ url: `https://${siteSlug}.com` })
|
||||
.first()
|
||||
: site,
|
||||
matchTags(rawTags),
|
||||
]);
|
||||
const tags = Array.from(document.querySelectorAll('.scene-details .categories a')).map(({ textContent }) => textContent);
|
||||
|
||||
return {
|
||||
url: `${origin}${pathname}`,
|
||||
@@ -138,7 +125,8 @@ async function scrapeScene(html, url, site) {
|
||||
rating: {
|
||||
stars,
|
||||
},
|
||||
site: channelSite || site,
|
||||
site,
|
||||
channel,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user