Added basic actor and network overview. Added DDF Network actor scraper. Various bug fixes and layout improvements.

This commit is contained in:
2019-11-30 05:55:32 +01:00
parent 0dbe853f39
commit bead69de49
44 changed files with 1697 additions and 527 deletions

View File

@@ -3,9 +3,10 @@
const Promise = require('bluebird');
const bhttp = require('bhttp');
const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const knex = require('../knex');
const { fetchSites } = require('../sites');
const { matchTags } = require('../tags');
const pluckPhotos = require('../utils/pluck-photos');
@@ -142,7 +143,7 @@ async function scrapeScene(html, url, site) {
const duration = moment.duration(data.duration.slice(2).split(':')).asSeconds();
const siteDomain = $('meta[name="twitter:domain"]').attr('content');
const siteId = siteDomain && siteDomain.split('.')[0].toLowerCase();
const siteSlug = siteDomain && siteDomain.split('.')[0].toLowerCase();
const siteUrl = siteDomain && `https://www.${siteDomain}`;
const poster = videoData.picPreview;
@@ -152,14 +153,14 @@ async function scrapeScene(html, url, site) {
const rawTags = data.keywords.split(', ');
const [channelSite, tags] = await Promise.all([
const [[channelSite], tags] = await Promise.all([
site.isFallback
? knex('sites')
.where({ url: siteUrl })
.orWhere({ slug: siteId })
.first()
: site,
matchTags([...defaultTags[siteId], ...rawTags]),
? fetchSites({
url: siteUrl,
slug: siteSlug,
})
: [site],
matchTags([...defaultTags[siteSlug], ...rawTags]),
]);
return {
@@ -185,6 +186,31 @@ async function scrapeScene(html, url, site) {
};
}
function scrapeActorSearch(html, url, actorName) {
const { document } = new JSDOM(html).window;
const actorLink = document.querySelector(`a[title="${actorName}" i]`);
return actorLink ? actorLink.href : null;
}
function scrapeProfile(html, url, actorName) {
const { document } = new JSDOM(html).window;
const avatarEl = document.querySelector('img.actorPicture');
const descriptionEl = document.querySelector('.actorBio p:not(.bioTitle)');
const profile = {
name: actorName,
};
if (avatarEl) profile.avatar = avatarEl.src;
if (descriptionEl) profile.description = descriptionEl.textContent.trim();
profile.releases = Array.from(document.querySelectorAll('.sceneList .scene a.imgLink'), el => `https://xempire.com${el.href}`);
return profile;
}
async function fetchLatest(site, page = 1) {
const res = await bhttp.get(`${site.url}/en/videos/AllCategories/0/${page}`);
@@ -203,8 +229,34 @@ async function fetchScene(url, site) {
return scrapeScene(res.body.toString(), url, site);
}
async function fetchProfile(actorName) {
const actorSlug = actorName.toLowerCase().replace(/\s+/, '+');
const searchUrl = `https://www.xempire.com/en/search/xempire/actor/${actorSlug}`;
const searchRes = await bhttp.get(searchUrl);
if (searchRes.statusCode !== 200) {
return null;
}
const actorUrl = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName);
if (actorUrl) {
const url = `https://xempire.com${actorUrl}`;
const actorRes = await bhttp.get(url);
if (actorRes.statusCode !== 200) {
return null;
}
return scrapeProfile(actorRes.body.toString(), url, actorName);
}
return null;
}
module.exports = {
fetchLatest,
fetchProfile,
fetchUpcoming,
fetchScene,
};