From 1d8483042396766c9301d2585693a29c9a0c5b99 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 7 Apr 2022 16:06:38 +0200 Subject: [PATCH] Supporting JSON in scenes file. Added Dogfart link finding utility. --- .gitignore | 1 + seeds/02_sites.js | 3 ++ src/deep.js | 4 +-- src/scrapers/dogfart.js | 2 -- src/tools/dogfart.js | 76 +++++++++++++++++++++++++++++++++++++++ src/utils/file-entries.js | 8 ++++- 6 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 src/tools/dogfart.js diff --git a/.gitignore b/.gitignore index 9eb45e8d8..72a493429 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ dist/ log/ media/ html/ +tmp/* public/js/* public/css/* config/* diff --git a/seeds/02_sites.js b/seeds/02_sites.js index a867a0b28..6fc3c4f5a 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -2794,6 +2794,7 @@ const sites = [ name: 'Dogfart Behind The Scenes', url: 'https://www.dogfartbehindthescenes.com', parent: 'dogfartnetwork', + tags: ['bts'], }, { slug: 'blackmeatwhitefeet', @@ -2848,12 +2849,14 @@ const sites = [ name: 'Blacks On Boys', url: 'https://www.blacksonboys.com', parent: 'dogfartnetwork', + tags: ['gay'], }, { slug: 'gloryholesandhandjobs', name: 'Gloryholes And Handjobs', url: 'https://www.gloryholesandhandjobs.com', parent: 'dogfartnetwork', + tags: ['gay'], }, // DORCEL { diff --git a/src/deep.js b/src/deep.js index a7ad4160d..af3625c2c 100644 --- a/src/deep.js +++ b/src/deep.js @@ -158,8 +158,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') { // filter out keys with null values to ensure original base value is used instead const curatedScrapedRelease = Object.entries(scrapedRelease).reduce((acc, [key, value]) => ({ ...acc, - ...(value !== null && value !== undefined && { - [key]: value, + ...(value !== null && value !== undefined && !(Array.isArray(value) && value.filter(Boolean).length === 0) && { + [key]: Array.isArray(value) ? value.filter(Boolean) : value, }), }), {}); diff --git a/src/scrapers/dogfart.js b/src/scrapers/dogfart.js index e17ec8127..a89aa7e4b 100644 --- a/src/scrapers/dogfart.js +++ b/src/scrapers/dogfart.js @@ -99,8 +99,6 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) { release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2)); - console.log(release.photos); - return release; } diff --git a/src/tools/dogfart.js b/src/tools/dogfart.js new file mode 100644 index 000000000..1b15d963a --- /dev/null +++ b/src/tools/dogfart.js @@ -0,0 +1,76 @@ +'use strict'; + +const fs = require('fs').promises; + +const Promise = require('bluebird'); + +const qu = require('../utils/qu'); + +const qualities = { + sm: 360, + med: 480, + big: 720, +}; + +/* +async function scrape() { + const urlsByPage = await Promise.map(Array.from({ length: 140 }), async (value, index) => { + const res = await qu.get(`https://www.dogfartnetwork.com/tour/scenes/?p=${index + 1}`); + + if (res.ok) { + return res.item.query.urls('.recent-updates > a', 'href', { origin: 'https://www.dogfartnetwork.com' }); + } + + return []; + }, { concurrency: 1 }); + + const urls = urlsByPage.flat(); + + await fs.writeFile('./dogfart-links', urls.join('\n')); + + console.log(`Saved ${urls.length} URLs to file`); +} + +async function compare() { + const newLinksFile = await fs.readFile('./dogfart-links', 'utf8'); + const oldLinksFile = await fs.readFile('./dogfart-got', 'utf8'); + + const newLinks = newLinksFile.split('\n').filter(Boolean); + const oldLinks = new Set(oldLinksFile.split('\n').filter(Boolean)); + + const getLinks = newLinks.filter((link) => !oldLinks.has(link)).map((link) => `https://dogfartnetwork.com/tour/sites${link}`); + + await fs.writeFile('./dogfart-new', getLinks.join('\n')); + + console.log(getLinks); +} +*/ + +async function scrapeMembers() { + const titlesByPage = await Promise.map(Array.from({ length: 1 }), async (value, index) => { + const res = await qu.get(`https://sbj1e2bdv33.dogfartnetwork.com/members/blacksonboys/index.php?page=${index + 1}`); + + if (res.ok) { + return qu.initAll(res.item.query.all('.scene-container')).map(({ query }) => ({ + url: `https://www.dogfartnetwork.com/tour/sites/BlacksOnBoys/${query.img('.video-container img').match(/\/(\w+).jpg/)[1]}/`, + actors: query.contents('a[href*="model.php"]'), + trailer: query.urls('.trailer-link a').map((url) => ({ + src: url, + quality: qualities[url.match(/_([a-z]+).mp4/)[1]], + })), + })); + } + + return []; + }, { concurrency: 1 }); + + const urls = titlesByPage.flat().map((data) => JSON.stringify(data)); + + console.log(urls); + + await fs.writeFile('./dogfart-blacksonboys', Array.from(new Set(urls)).join('\n')); + + console.log(`Saved ${urls.length} URLs to file`); +} + +scrapeMembers(); diff --git a/src/utils/file-entries.js b/src/utils/file-entries.js index d4b049337..94932d6a6 100644 --- a/src/utils/file-entries.js +++ b/src/utils/file-entries.js @@ -8,7 +8,13 @@ async function getFileEntries(location) { } const file = await fs.promises.readFile(location, 'utf-8'); - const entries = file.split(/\n/).map((entry) => entry.trim()).filter(Boolean); + const entries = file.split(/\n/).map((entry) => { + try { + return JSON.parse(entry); + } catch (error) { + return entry.trim(); + } + }).filter(Boolean); return entries; }