traxxx/src/scrapers/porndoe.js

146 lines
4.5 KiB
JavaScript

'use strict';
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a');
release.entryId = release.url.match(/\/watch\/(\d+)/)[1];
release.title = query.cnt('[class*="item-title"] a') || query.q('.bottom .link', 'title');
release.date = query.date('[class*="item-date"]', 'MMM DD, YYYY');
release.actors = query.all('[class*="item-actors"] a').map((el) => ({
name: query.cnt(el),
url: query.url(el, null),
}));
// const poster = query.img('.thumb img');
const poster = query.img('[class*="thumb"]', 'data-bg');
if (poster) {
release.poster = [
poster.replace('512x288', '1472x828'),
poster,
];
}
release.teaser = query.video('video-preview');
release.channel = slugify(query.q('[class*="item-channel"] a', 'title'), '');
return release;
});
}
function scrapeScene({ query }, url) {
const release = {};
release.title = query.q('h1', true);
release.entryId = url.match(/\/watch\/(\d+)/)[1];
release.description = query.meta('name=description') || query.q('read-even-more', true);
release.date = query.date('.h5-published', 'MMM DD, YYYY', /\w{3} \d{1,2}, \d{4}/);
release.actors = query.all('.video-top-details .actors a[href*="/models"]').map((el) => ({
name: query.cnt(el),
url: query.url(el, null),
}));
release.duration = query.dur('meta[itemprop="duration"]', null, 'content');
release.tags = query.all('.video-top-details a[href*="/categories"], .video-top-details a[href*="/tags"]', true);
release.poster = query.img('.poster img') || query.meta('itemprop=thumbnailUrl');
release.photos = query.imgs('#gallery-thumbs [class*="thumb"]', 'data-bg').slice(1).map((photo) => [ // first image is poster
photo.replace('512x288', '1472x828'),
photo,
]);
release.trailer = query.meta('itemprop=contentURL');
release.channel = slugify(query.q('.video-top-details .actors a[href*="/channels"] strong', true), '');
return release;
}
async function fetchActorReleases({ query }, url, page = 1, accReleases = []) {
const releases = scrapeAll(qu.initAll(query.all('.main-content .card-video, .main-content .global-video-card')));
const hasNextPage = query.exists('.pages a.active + a');
if (hasNextPage) {
const res = await qu.get(`${url}?page=${page + 1}`);
if (res.ok) {
return fetchActorReleases(res.item, url, page + 1, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
}
async function scrapeProfile({ query }, url, include) {
const profile = {};
const bio = query.all('[class*="list"] [class*="list-item"]').reduce((acc, el) => ({
...acc,
[slugify(query.text(el), '_')]: query.cnt(el, 'span'),
}), {});
const tags = query.cnts(`
[class$="description"] [class*="more-less"] a[href*="/categories"],
[class$="description"] [class*="more-less"] a[href*="/tags"],
[class*="seo-text"] a[href*="/categories"],
[class*="seo-text"] a[href*="/tags"]
`);
profile.nationality = bio.nationality;
profile.placeOfBirth = bio.birth_place;
profile.dateOfBirth = qu.extractDate(bio.birth_date, 'MMM D, YYYY');
if (/enhanced/i.test(bio.tits_type)) profile.naturalBoobs = false;
if (/natural/i.test(bio.tits_type)) profile.naturalBoobs = true;
if (tags.includes('brunette') || tags.includes('brown hair')) profile.hairColor = 'brown';
if (tags.includes('blonde')) profile.hairColor = 'blonde';
if (tags.includes('tattoo') || tags.includes('tattoos')) profile.hasTattoos = true;
if (tags.includes('piercing') || tags.includes('piercings')) profile.hasPiercings = true;
profile.description = query.text('[class$="description"] [class*="more-less"]');
profile.avatar = query.img('[class*="poster"] img') || null;
if (include.releases) {
profile.releases = await fetchActorReleases({ query }, url);
}
return profile;
}
async function fetchLatest(channel, page = 1) {
const res = await qu.getAll(`${channel.url}?page=${page}`, '.main-content .card-video, .main-content .global-video-card');
return res.ok ? scrapeAll(res.items, channel) : res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, '.main-content');
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
async function fetchProfile({ name: actorName }, entity, include) {
const url = `http://letsdoeit.com/models/${slugify(actorName)}.en.html`;
const res = await qu.get(url);
return res.ok ? scrapeProfile(res.item, url, include) : res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};