forked from DebaucheryLibrarian/traxxx
Added profile scraping and Hoby Buchanon to Team Skeet.
This commit is contained in:
@@ -232,6 +232,7 @@ module.exports = {
|
||||
silverstonedvd: famedigital,
|
||||
silviasaint: famedigital,
|
||||
swallowed: mikeadriano,
|
||||
teamskeet,
|
||||
teencoreclub,
|
||||
thatsitcomshow: nubiles,
|
||||
transangels,
|
||||
|
||||
@@ -1,180 +1,155 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function extractTitle(pathname) {
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
function extractActors(str) {
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
const qu = require('../utils/qu');
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
|
||||
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
const release = {};
|
||||
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
release.entryId = scene.id;
|
||||
release.url = `https://teamskeet.com/movies/${release.entryId}`;
|
||||
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
release.title = scene.title;
|
||||
release.date = qu.extractDate(scene.publishedDate);
|
||||
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
release.actors = scene.models?.map(model => model.modelName) || [];
|
||||
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.poster = [
|
||||
scene.img.replace('med.jpg', 'hi.jpg'),
|
||||
scene.img,
|
||||
];
|
||||
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
release.teaser = scene.videoTrailer;
|
||||
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
if (scene.video) {
|
||||
release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` };
|
||||
}
|
||||
|
||||
release.likes = scene.stats.likeCount;
|
||||
release.dislikes = scene.stats.dislikeCount;
|
||||
|
||||
release.channel = slugify(scene.site.name, '')
|
||||
.replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
function scrapeScene(scene) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
release.entryId = scene.id;
|
||||
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
release.title = scene.title;
|
||||
release.description = scene.description;
|
||||
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
release.date = qu.extractDate(scene.publishedDate);
|
||||
release.actors = scene.models?.map(model => model.modelName) || [];
|
||||
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
release.poster = [
|
||||
scene.img.replace('med.jpg', 'hi.jpg'),
|
||||
scene.img,
|
||||
];
|
||||
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
release.channel = slugify(scene.site.name, '')
|
||||
.replace('hobybuchanon', 'tshobybuchanon'); // slug collision with his own site
|
||||
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
if (scene.video) {
|
||||
release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` };
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneA(html, site, sceneX, url) {
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
function scrapeProfile(actor) {
|
||||
const profile = {};
|
||||
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
if (actor.bio.about) {
|
||||
// birthdate seems never/rarely correct
|
||||
const measurements = actor.bio.about.match(/Measurements: (\d+)(\w+)-(\d+)-(\d+)/i);
|
||||
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
if (measurements) {
|
||||
[profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1);
|
||||
} else {
|
||||
const breastSize = actor.bio.breastSize?.match(/(\d+)(\w+)/)?.slice(1) || actor.bio.about.match(/Measurements: (\d+)(\w+)/)?.slice(1);
|
||||
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
if (breastSize) {
|
||||
[profile.bust, profile.cup] = breastSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
profile.nationality = actor.bio.about.match(/Nationality: (\w+)/i)?.[1];
|
||||
profile.ethnicity = actor.bio.about.match(/Ethnicity: (\w+)/i)?.[1];
|
||||
profile.hairColor = actor.bio.about.match(/Hair Color: (\w+)/i)?.[1];
|
||||
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
const piercings = actor.bio.about.match(/Piercings: (\w+)/i)?.[1];
|
||||
const tattoos = actor.bio.about.match(/Tattoos: (\w+)/i)?.[1];
|
||||
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
if (slugify(piercings) === 'yes') profile.hasPiercings = true;
|
||||
if (slugify(piercings) === 'no') profile.hasPiercings = false;
|
||||
|
||||
if (slugify(tattoos) === 'yes') profile.hasTattoos = true;
|
||||
if (slugify(tattoos) === 'no') profile.hasTattoos = false;
|
||||
}
|
||||
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
if (actor.bio.heightFeet && actor.bio.heightInches) {
|
||||
profile.height = feetInchesToCm(actor.bio.heightFeet, actor.bio.heightInches);
|
||||
}
|
||||
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
if (actor.bio.weight) {
|
||||
profile.weight = lbsToKg(actor.bio.weight);
|
||||
}
|
||||
|
||||
return release;
|
||||
profile.avatar = actor.img;
|
||||
profile.releases = scrapeAll(actor.movies);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
async function fetchLatest(channel, _page = 1) {
|
||||
// freshman year, layna landry
|
||||
if (!channel.parameters?.id) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
const url = `https://store.psmcdn.net/ts-organic-iiiokv9kyo/seriesContent/${channel.parameters.id}/latestMovies.json`;
|
||||
const res = await http.get(url);
|
||||
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
if (res.ok) {
|
||||
return scrapeAll(Object.values(res.body), channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
async function fetchScene(url, channel) {
|
||||
const entryId = new URL(url).pathname.match(/\/movies\/(.+)$/)[1];
|
||||
const apiUrl = `https://store.psmcdn.net/ts-organic-iiiokv9kyo/videosContent/${entryId}.json`;
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
const res = await http.get(apiUrl);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.body, channel);
|
||||
}
|
||||
|
||||
return null;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestA(site) {
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
async function fetchProfile(baseActor) {
|
||||
const res = await http.get(`https://store.psmcdn.net/ts-organic-iiiokv9kyo/modelsContent/${slugify(baseActor.name)}.json`);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
if (res.ok && res.body) {
|
||||
return scrapeProfile(res.body);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
|
||||
180
src/scrapers/teamskeet_legacy.js
Normal file
180
src/scrapers/teamskeet_legacy.js
Normal file
@@ -0,0 +1,180 @@
|
||||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function extractTitle(pathname) {
|
||||
return pathname
|
||||
.split('/')
|
||||
.slice(-2)[0]
|
||||
.split('_')
|
||||
.map(seg => `${seg.charAt(0).toUpperCase()}${seg.slice(1)}`)
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
function extractActors(str) {
|
||||
return str
|
||||
.split(/,|\band\b/ig)
|
||||
.filter(actor => !/\.{3}/.test(actor))
|
||||
.map(actor => actor.trim())
|
||||
.filter(actor => actor.length > 0);
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('#updatesList li.grey, #updatesList li.white'));
|
||||
|
||||
return scenes.map((scene) => {
|
||||
const release = { site };
|
||||
|
||||
const link = scene.querySelector('.info a');
|
||||
const poster = scene.querySelector('img');
|
||||
const { pathname } = new URL(link);
|
||||
|
||||
[release.entryId] = poster.id.match(/\d+/);
|
||||
|
||||
release.url = `https://www.teamskeet.com${pathname}`;
|
||||
release.title = extractTitle(pathname);
|
||||
|
||||
release.date = moment.utc(scene.querySelector('strong').textContent, 'MM/DD/YYYY').toDate();
|
||||
|
||||
const photos = Array.from({ length: 5 }, (_value, index) => poster.dataset.original.replace(/\d+.jpg/, `${String(index + 1).padStart(2, '0')}.jpg`));
|
||||
[release.poster] = photos;
|
||||
release.photos = photos.slice(1);
|
||||
|
||||
const actors = scene.querySelector('div span[rel="test"]').textContent;
|
||||
release.actors = extractActors(actors);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene(html, site, url) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
const release = { site };
|
||||
|
||||
release.entryId = document.querySelector('#story-and-tags .scene_rater').attributes.rel.value;
|
||||
release.description = document.querySelector('#story-and-tags td:nth-child(2) div').textContent;
|
||||
const [actors, title, channel] = document.querySelector('title').textContent.split('|').map(item => item.trim());
|
||||
|
||||
release.url = url;
|
||||
release.title = title;
|
||||
release.actors = extractActors(actors);
|
||||
release.channel = channel.toLowerCase();
|
||||
release.tags = Array.from(document.querySelectorAll('#story-and-tags tr:nth-child(2) a'), el => el.rel);
|
||||
|
||||
const date = document.querySelector('h3 ~ div:nth-child(4), h3 ~ div div.gray:not(.scene_rater)').textContent.split(':')[1].trim();
|
||||
release.date = moment.utc(date, 'MMMM Do, YYYY').toDate();
|
||||
|
||||
const { poster } = document.querySelector('video');
|
||||
if (poster && !/gen/.test(poster)) release.poster = [poster.replace('low', 'hi'), poster];
|
||||
|
||||
const siteId = document.querySelector('#story-and-tags img').src.match(/\w+.jpg/)[0].replace('.jpg', '');
|
||||
const actorsSlug = document.querySelector('h3 a').href.split('/').slice(-2)[0];
|
||||
|
||||
release.photos = Array.from({ length: 5 }, (value, index) => `https://images.psmcdn.net/teamskeet/${siteId}/${actorsSlug}/shared/scenes/new/${String(index + 1).padStart(2, '0')}.jpg`);
|
||||
|
||||
const trailer = document.querySelector('div.right.gray a').href;
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeSceneA(html, site, sceneX, url) {
|
||||
const scene = sceneX || new JSDOM(html).window.document;
|
||||
const release = { site };
|
||||
|
||||
release.description = scene.querySelector('.scene-story').textContent.replace('...read more', '...').trim();
|
||||
|
||||
release.date = moment.utc(scene.querySelector('.scene-date').textContent, 'MM/DD/YYYY').toDate();
|
||||
release.actors = Array.from(scene.querySelectorAll('.starring span'), el => extractActors(el.textContent)).flat();
|
||||
|
||||
const durationString = scene.querySelector('.time').textContent.trim();
|
||||
const duration = ['00'].concat(durationString.split(':')).slice(-3).join(':'); // ensure hh:mm:ss
|
||||
release.duration = moment.duration(duration).asSeconds();
|
||||
|
||||
if (sceneX) {
|
||||
const titleEl = scene.querySelector(':scope > a');
|
||||
|
||||
release.url = titleEl.href;
|
||||
release.entryId = titleEl.id;
|
||||
release.title = titleEl.title;
|
||||
|
||||
const [poster, ...photos] = Array.from(scene.querySelectorAll('.scene img'), el => el.src);
|
||||
release.poster = [poster.replace('bio_big', 'video'), poster];
|
||||
release.photos = photos;
|
||||
}
|
||||
|
||||
if (!sceneX) {
|
||||
release.title = scene.querySelector('.title span').textContent;
|
||||
release.url = url;
|
||||
|
||||
release.poster = scene.querySelector('video').poster;
|
||||
release.photos = [release.poster.replace('video', 'bio_small'), release.poster.replace('video', 'bio_small2')];
|
||||
}
|
||||
|
||||
const [, entryIdA, entryIdB] = new URL(release.url).pathname.split('/');
|
||||
release.entryId = entryIdA === 'scenes' ? entryIdB : entryIdA;
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeLatestA(html, site) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const scenes = Array.from(document.querySelectorAll('.scenewrapper'));
|
||||
|
||||
return scenes.map(scene => scrapeSceneA(null, site, scene));
|
||||
}
|
||||
|
||||
async function fetchLatestTeamSkeet(site, page = 1) {
|
||||
const url = `https://www.teamskeet.com/t1/updates/load?fltrs[site]=${site.parameters.id}&page=${page}&view=newest&fltrs[time]=ALL&order=DESC`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatestA(site) {
|
||||
const url = `${site.url}/scenes`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatestA(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
if (site.parameters.id) {
|
||||
return fetchLatestTeamSkeet(site, page);
|
||||
}
|
||||
|
||||
if (site.parameters.scraper === 'A') {
|
||||
return fetchLatestA(site, page);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const session = bhttp.session(); // resolve redirects
|
||||
const res = await session.get(url);
|
||||
|
||||
if (site.parameters?.scraper === 'A') {
|
||||
return scrapeSceneA(res.body.toString(), site, null, url);
|
||||
}
|
||||
|
||||
return scrapeScene(res.body.toString(), site, url);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
||||
Reference in New Issue
Block a user