traxxx/src/scrapers/teamskeet.js

163 lines
4.5 KiB
JavaScript
Raw Normal View History

2020-01-12 04:30:46 +00:00
'use strict';
const format = require('template-format');
const qu = require('../utils/qu');
const http = require('../utils/http');
const slugify = require('../utils/slugify');
const { lbsToKg, feetInchesToCm } = require('../utils/convert');
2020-01-12 04:30:46 +00:00
function getChannelSlug(channelName, entity) {
if (entity.type === 'channel') {
return entity.slug;
}
2020-01-12 04:30:46 +00:00
const channelSlug = slugify(channelName, '', { removePunctuation: true });
const channel = entity.children.find((child) => new RegExp(channelSlug).test(child.slug));
2020-01-12 04:30:46 +00:00
return channel?.slug || null;
2020-01-12 04:30:46 +00:00
}
function scrapeScene(scene, channel) {
const release = {};
2020-01-12 04:30:46 +00:00
release.entryId = scene.id;
release.url = `${channel.type === 'network' ? channel.url : channel.parent.url}/movies/${release.entryId}`;
2020-01-12 04:30:46 +00:00
release.title = scene.title;
release.date = qu.extractDate(scene.publishedDate);
release.actors = scene.models?.map((model) => model.modelName) || [];
release.actors = scene.models?.map((model) => ({
name: model.modelName,
avatar: `https://images.mylfcdn.net/tsv4/model/profiles/${slugify(model.modelName, '_')}.jpg`,
url: `${channel.url}/models/www.mylf.com/models/${model.modelId}`,
}));
2020-01-12 04:30:46 +00:00
release.poster = [
2022-02-14 00:27:56 +00:00
// scene.img.replace('med.jpg', 'hi.jpg'), // this image is not always from the same scene! for example on Petite Teens 18
scene.img,
];
2020-01-12 04:30:46 +00:00
release.teaser = scene.videoTrailer;
2020-01-12 04:30:46 +00:00
if (scene.video) {
release.trailer = { stream: `https://videodelivery.net/${scene.video}/manifest/video.mpd` };
}
2020-01-12 04:30:46 +00:00
release.tags = scene.tags;
release.likes = scene.stats.likeCount;
release.dislikes = scene.stats.dislikeCount;
release.channel = getChannelSlug(scene.site.name || scene.site.nickName, channel);
return release;
2020-01-12 04:30:46 +00:00
}
function scrapeAll(scenes, channel) {
return scenes.map(({ _source: scene }) => scrapeScene(scene, channel));
}
function scrapeProfile(actor, entity) {
const profile = {};
2020-01-13 22:45:09 +00:00
if (actor.bio.about && !/\band\b/.test(actor.bio.about)) {
2020-07-23 19:00:16 +00:00
const bio = actor.bio.about.split(/\n/).filter(Boolean).reduce((acc, item) => {
const [key, value] = item.match(/(.+): (.+)/).slice(1);
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
// birthdate seems never/rarely correct
2020-01-13 22:45:09 +00:00
if (bio.measurements) {
profile.measurements = bio.measurements;
} else {
const breastSize = actor.bio.breastSize?.match(/(\d+)(\w+)/)?.slice(1) || actor.bio.about.match(/Measurements: (\d+)(\w+)/)?.slice(1);
2020-01-13 22:45:09 +00:00
if (breastSize) {
[profile.bust, profile.cup] = breastSize;
}
}
2020-01-13 22:45:09 +00:00
2020-07-23 19:00:16 +00:00
profile.birthPlace = bio.birth_location;
profile.nationality = bio.nationality;
profile.ethnicity = bio.ethnicity;
profile.hairColor = bio.hair_color;
2020-01-13 22:45:09 +00:00
const piercings = actor.bio.about.match(/Piercings: (\w+)/i)?.[1];
const tattoos = actor.bio.about.match(/Tattoos: (\w+)/i)?.[1];
2020-01-13 22:45:09 +00:00
2020-07-23 19:00:16 +00:00
if (/yes|various/i.test(piercings)) profile.hasPiercings = true;
else if (/no/i.test(piercings)) profile.hasPiercings = false;
else if (bio.piercings) {
profile.hasPiercings = true;
profile.piercings = piercings;
}
2020-01-13 22:45:09 +00:00
2020-07-23 19:00:16 +00:00
if (/yes|various/i.test(tattoos)) profile.hasTattoos = true;
else if (/no/i.test(tattoos)) profile.hasTattoos = false;
else if (bio.tattoos) {
profile.hasTattoos = true;
profile.tattoos = tattoos;
}
}
2020-01-13 22:45:09 +00:00
if (actor.bio.heightFeet && actor.bio.heightInches) {
profile.height = feetInchesToCm(actor.bio.heightFeet, actor.bio.heightInches);
}
2020-01-13 22:45:09 +00:00
if (actor.bio.weight) {
profile.weight = lbsToKg(actor.bio.weight);
}
2020-01-13 22:45:09 +00:00
profile.avatar = actor.img;
profile.scenes = actor.movies?.map((scene) => scrapeScene(scene, entity));
2020-01-13 22:45:09 +00:00
return profile;
2020-01-13 22:45:09 +00:00
}
async function fetchLatest(channel, page = 1, { parameters }) {
const res = await http.get(`${parameters.videos}/_search?q=site.seo.seoSlug:"${parameters.id}"&sort=publishedDate:desc&size=30&from=${(page - 1) * 30}`);
2020-01-13 22:45:09 +00:00
if (res.ok) {
return scrapeAll(res.body.hits.hits, channel);
}
2020-01-13 22:45:09 +00:00
return res.status;
2020-01-13 22:45:09 +00:00
}
async function fetchScene(url, channel, baseScene, { parameters }) {
if (baseScene?.entryId) {
// overview and deep data is the same, don't hit server unnecessarily
return baseScene;
}
const sceneSlug = new URL(url).pathname.match(/\/([\w-]+$)/)[1];
const res = await http.get(`${parameters.videos}/${sceneSlug}`);
2020-01-13 22:45:09 +00:00
if (res.ok && res.body.found) {
return scrapeScene(res.body._source, channel);
}
2020-01-13 22:45:09 +00:00
return res.status;
2020-01-13 22:45:09 +00:00
}
async function fetchProfile(baseActor, { entity, parameters }) {
const url = format(parameters.profiles, { slug: baseActor.slug });
const res = await qu.get(url);
2020-01-12 04:30:46 +00:00
if (res.ok && res.body) {
return scrapeProfile(res.body._source, entity);
}
2020-01-13 22:45:09 +00:00
return res.status;
2020-01-12 04:30:46 +00:00
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
2020-01-12 04:30:46 +00:00
};