Supporting JSON in scenes file. Added Dogfart link finding utility.
This commit is contained in:
parent
21a3bc44e6
commit
1d84830423
|
@ -3,6 +3,7 @@ dist/
|
||||||
log/
|
log/
|
||||||
media/
|
media/
|
||||||
html/
|
html/
|
||||||
|
tmp/*
|
||||||
public/js/*
|
public/js/*
|
||||||
public/css/*
|
public/css/*
|
||||||
config/*
|
config/*
|
||||||
|
|
|
@ -2794,6 +2794,7 @@ const sites = [
|
||||||
name: 'Dogfart Behind The Scenes',
|
name: 'Dogfart Behind The Scenes',
|
||||||
url: 'https://www.dogfartbehindthescenes.com',
|
url: 'https://www.dogfartbehindthescenes.com',
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
|
tags: ['bts'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'blackmeatwhitefeet',
|
slug: 'blackmeatwhitefeet',
|
||||||
|
@ -2848,12 +2849,14 @@ const sites = [
|
||||||
name: 'Blacks On Boys',
|
name: 'Blacks On Boys',
|
||||||
url: 'https://www.blacksonboys.com',
|
url: 'https://www.blacksonboys.com',
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
|
tags: ['gay'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'gloryholesandhandjobs',
|
slug: 'gloryholesandhandjobs',
|
||||||
name: 'Gloryholes And Handjobs',
|
name: 'Gloryholes And Handjobs',
|
||||||
url: 'https://www.gloryholesandhandjobs.com',
|
url: 'https://www.gloryholesandhandjobs.com',
|
||||||
parent: 'dogfartnetwork',
|
parent: 'dogfartnetwork',
|
||||||
|
tags: ['gay'],
|
||||||
},
|
},
|
||||||
// DORCEL
|
// DORCEL
|
||||||
{
|
{
|
||||||
|
|
|
@ -158,8 +158,8 @@ async function scrapeRelease(baseRelease, entitiesBySlug, type = 'scene') {
|
||||||
// filter out keys with null values to ensure original base value is used instead
|
// filter out keys with null values to ensure original base value is used instead
|
||||||
const curatedScrapedRelease = Object.entries(scrapedRelease).reduce((acc, [key, value]) => ({
|
const curatedScrapedRelease = Object.entries(scrapedRelease).reduce((acc, [key, value]) => ({
|
||||||
...acc,
|
...acc,
|
||||||
...(value !== null && value !== undefined && {
|
...(value !== null && value !== undefined && !(Array.isArray(value) && value.filter(Boolean).length === 0) && {
|
||||||
[key]: value,
|
[key]: Array.isArray(value) ? value.filter(Boolean) : value,
|
||||||
}),
|
}),
|
||||||
}), {});
|
}), {});
|
||||||
|
|
||||||
|
|
|
@ -99,8 +99,6 @@ async function scrapeScene({ query }, url, channel, baseScene, parameters) {
|
||||||
|
|
||||||
release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
|
release.stars = Number(((query.number('span[itemprop="average"], span[itemprop="ratingValue"]') || query.number('canvas[data-score]', null, 'data-score')) / 2).toFixed(2));
|
||||||
|
|
||||||
console.log(release.photos);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const fs = require('fs').promises;
|
||||||
|
|
||||||
|
const Promise = require('bluebird');
|
||||||
|
|
||||||
|
const qu = require('../utils/qu');
|
||||||
|
|
||||||
|
const qualities = {
|
||||||
|
sm: 360,
|
||||||
|
med: 480,
|
||||||
|
big: 720,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
async function scrape() {
|
||||||
|
const urlsByPage = await Promise.map(Array.from({ length: 140 }), async (value, index) => {
|
||||||
|
const res = await qu.get(`https://www.dogfartnetwork.com/tour/scenes/?p=${index + 1}`);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return res.item.query.urls('.recent-updates > a', 'href', { origin: 'https://www.dogfartnetwork.com' });
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}, { concurrency: 1 });
|
||||||
|
|
||||||
|
const urls = urlsByPage.flat();
|
||||||
|
|
||||||
|
await fs.writeFile('./dogfart-links', urls.join('\n'));
|
||||||
|
|
||||||
|
console.log(`Saved ${urls.length} URLs to file`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function compare() {
|
||||||
|
const newLinksFile = await fs.readFile('./dogfart-links', 'utf8');
|
||||||
|
const oldLinksFile = await fs.readFile('./dogfart-got', 'utf8');
|
||||||
|
|
||||||
|
const newLinks = newLinksFile.split('\n').filter(Boolean);
|
||||||
|
const oldLinks = new Set(oldLinksFile.split('\n').filter(Boolean));
|
||||||
|
|
||||||
|
const getLinks = newLinks.filter((link) => !oldLinks.has(link)).map((link) => `https://dogfartnetwork.com/tour/sites${link}`);
|
||||||
|
|
||||||
|
await fs.writeFile('./dogfart-new', getLinks.join('\n'));
|
||||||
|
|
||||||
|
console.log(getLinks);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
async function scrapeMembers() {
|
||||||
|
const titlesByPage = await Promise.map(Array.from({ length: 1 }), async (value, index) => {
|
||||||
|
const res = await qu.get(`https://sbj1e2bdv33.dogfartnetwork.com/members/blacksonboys/index.php?page=${index + 1}`);
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return qu.initAll(res.item.query.all('.scene-container')).map(({ query }) => ({
|
||||||
|
url: `https://www.dogfartnetwork.com/tour/sites/BlacksOnBoys/${query.img('.video-container img').match(/\/(\w+).jpg/)[1]}/`,
|
||||||
|
actors: query.contents('a[href*="model.php"]'),
|
||||||
|
trailer: query.urls('.trailer-link a').map((url) => ({
|
||||||
|
src: url,
|
||||||
|
quality: qualities[url.match(/_([a-z]+).mp4/)[1]],
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}, { concurrency: 1 });
|
||||||
|
|
||||||
|
const urls = titlesByPage.flat().map((data) => JSON.stringify(data));
|
||||||
|
|
||||||
|
console.log(urls);
|
||||||
|
|
||||||
|
await fs.writeFile('./dogfart-blacksonboys', Array.from(new Set(urls)).join('\n'));
|
||||||
|
|
||||||
|
console.log(`Saved ${urls.length} URLs to file`);
|
||||||
|
}
|
||||||
|
|
||||||
|
scrapeMembers();
|
|
@ -8,7 +8,13 @@ async function getFileEntries(location) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const file = await fs.promises.readFile(location, 'utf-8');
|
const file = await fs.promises.readFile(location, 'utf-8');
|
||||||
const entries = file.split(/\n/).map((entry) => entry.trim()).filter(Boolean);
|
const entries = file.split(/\n/).map((entry) => {
|
||||||
|
try {
|
||||||
|
return JSON.parse(entry);
|
||||||
|
} catch (error) {
|
||||||
|
return entry.trim();
|
||||||
|
}
|
||||||
|
}).filter(Boolean);
|
||||||
|
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue