traxxx/src/scrapers/vogov.js

92 lines
2.7 KiB
JavaScript

'use strict';
const bhttp = require('bhttp');
const { ex, ctxa } = require('../utils/q');
// const slugify = require('../utils/slugify');
function scrapeLatest(html) {
const { document } = ex(html);
return ctxa(document, '.video-post').map(({ q, qa, qd }) => {
const release = {};
// release.entryId = slugify(release.title);
release.entryId = q('.ico-fav-0').dataset.favVideoId;
const titleEl = q('.video-title-title');
release.title = titleEl.title;
release.url = titleEl.href;
release.date = qd('.video-data em', 'MMM DD, YYYY');
release.actors = qa('.video-model-list a', true);
const posterData = q('img.thumb').dataset;
release.poster = posterData.src;
release.trailer = posterData.preview;
return release;
});
}
function scrapeScene(html, url) {
const { q, qa, qd, qu, ql, qm } = ex(html);
const release = { url };
// release.entryId = slugify(release.title);
[release.entryId] = q('link[rel="canonical"]').href.match(/\d+/);
release.title = qm('meta[property="og:title"]') || q('.video-page-header h1', true);
release.description = qm('meta[property="og:description"]') || q('.info-video-description', true);
release.date = qd('.info-video-details li:first-child span', 'MMM DD, YYYY');
release.duration = ql('.info-video-details li:nth-child(2) span');
release.actors = qa('.info-video-models a', true);
release.tags = qa('.info-video-category a', true);
release.photos = qu('.swiper-wrapper .swiper-slide a').map(source => source.replace('.jpg/', '.jpg'));
release.poster = qm('meta[property="og:image"');
if (!release.poster) {
const previewStart = html.indexOf('preview_url');
release.poster = html.slice(html.indexOf('http', previewStart), html.indexOf('.jpg', previewStart) + 4);
}
/*
const trailerStart = html.indexOf('video_url');
const trailerUrl = html.slice(html.indexOf('http', trailerStart), html.indexOf('.mp4', trailerStart) + 4);
const rnd = new Date().getTime();
release.trailer = `${trailerUrl}/?rnd=${rnd}`;
*/
// console.log(release);
return release;
}
async function fetchLatest(site, page = 1) {
const url = `https://vogov.com/latest-videos/?sort_by=post_date&from=${page}`;
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeLatest(res.body.toString(), site);
}
return null;
}
async function fetchScene(url) {
const res = await bhttp.get(url);
if (res.statusCode === 200) {
return scrapeScene(res.body.toString(), url);
}
return null;
}
module.exports = {
fetchLatest,
fetchScene,
};