Added Team Skeet scraper.

This commit is contained in:
2020-01-12 05:30:46 +01:00
parent bce2c73cbe
commit 97cb4a061e
46 changed files with 352 additions and 8 deletions

View File

@@ -4,6 +4,7 @@
const Promise = require('bluebird');
const bhttp = require('bhttp');
const logger = require('../logger');
const slugify = require('../utils/slugify');
async function fetchToken(site) {
@@ -30,12 +31,17 @@ async function fetchActors(entryId, site, { token, time }) {
async function fetchTrailerLocation(entryId, site) {
const url = `${site.url}/api/download/${entryId}/hd1080/stream`;
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 302) {
return res.headers.location;
try {
const res = await bhttp.get(url, {
followRedirects: false,
});
if (res.statusCode === 302) {
return res.headers.location;
}
} catch (error) {
logger.warn(`${site.name}: Unable to fetch trailer at '${url}': ${error.message}`);
}
return null;
@@ -72,11 +78,11 @@ function scrapeLatest(scenes, site, tokens) {
return Promise.map(scenes, async scene => scrapeScene(scene, site, tokens), { concurrency: 10 });
}
async function fetchLatest(site) {
async function fetchLatest(site, page = 1) {
const { time, token } = await fetchToken(site);
// transParameters[v1] includes _resources, [v2] includes photos, [preset] is mandatory
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=20&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const url = `${site.url}/sapi/${token}/${time}/content.load?limit=50&offset=${(page - 1) * 50}&transitParameters[v1]=OhUOlmasXD&transitParameters[v2]=OhUOlmasXD&transitParameters[preset]=videos`;
const res = await bhttp.get(url);
if (res.statusCode === 200 && res.body.status) {

View File

@@ -15,6 +15,7 @@ const pervcity = require('./pervcity');
const privateNetwork = require('./private'); // reserved keyword
const naughtyamerica = require('./naughtyamerica');
const realitykings = require('./realitykings');
const teamskeet = require('./teamskeet');
const vixen = require('./vixen');
// releases and profiles
@@ -52,6 +53,7 @@ module.exports = {
private: privateNetwork,
naughtyamerica,
realitykings,
teamskeet,
vixen,
xempire,
},

105
src/scrapers/teamskeet.js Normal file
View File

@@ -0,0 +1,105 @@
'use strict';
const bhttp = require('bhttp');
const { JSDOM } = require('jsdom');
const moment = require('moment');
function extractTitle(pathname) {
return pathname
.split('/')
.slice(-2)[0]
.split('_')
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
.join(' ');
}
function extractActors(str) {
return str
.split(/,|\band/)
.filter(actor => !/\.{3}/.test(actor))
.map(actor => actor.trim())
.filter(actor => actor.length > 0);
}
function scrapeLatest(html, site) {
const { document } = new JSDOM(html).window;
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
return scenes.map((scene) => {
const release = { site };
const link = scene.querySelector('.info a');
const poster = scene.querySelector('img');
const { pathname } = new URL(link);
[release.entryId] = poster.id.match(/\d+/);
release.url = `https://www.teamskeet.com${pathname}`;
release.title = extractTitle(pathname);
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
[release.poster] = photos;
release.photos = photos.slice(1);
const actors = scene.querySelector('div span[rel="test"]').textContent;
release.actors = extractActors(actors);
return release;
});
}
function scrapeScene(html, site) {
const { document } = new JSDOM(html).window;
const release = { site };
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
release.title = title;
release.actors = extractActors(actors);
release.channel = channel.toLowerCase();
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
const { poster } = document.querySelector('video');
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
const trailer = document.querySelector('div.right.gray a').href;
if (trailer) release.trailer = { src: trailer };
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
}
async function fetchScene(url, site) {
const session = bhttp.session(); // resolve redirects
const res = await session.get(url);
return scrapeScene(res.body.toString(), site);
}
module.exports = {
fetchLatest,
fetchScene,
};

View File

@@ -1,5 +1,6 @@
'use strict';
const logger = require('./logger');
const knex = require('./knex');
const whereOr = require('./utils/where-or');
@@ -58,7 +59,7 @@ async function matchTags(rawTags) {
async function associateTags(release, releaseId) {
if (!release.tags || release.tags.length === 0) {
console.warn(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
logger.info(`No tags available for (${release.site.name}, ${releaseId}) "${release.title}"`);
return;
}