Filtering undefined scenes property from movies. Added movie page scraper to Elegant Angel.

This commit is contained in:
DebaucheryLibrarian
2020-08-08 18:10:59 +02:00
parent 7bfa5a6cc4
commit a7d5bef93f
7 changed files with 177 additions and 128 deletions

View File

@@ -1,6 +1,7 @@
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
@@ -27,14 +28,30 @@ function scrapeAll(scenes, channel) {
});
}
async function scrapeScene({ query, html }, url) {
function scrapeMovieScenes(scenes) {
return scenes.map(({ query }) => {
const release = {};
release.title = query.cnt('.scene-title a');
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
release.duration = query.number('.scene-length') * 60;
release.actors = query.cnts('.scene-cast-list a');
release.poster = query.img('a img');
return release;
});
}
async function scrapeRelease({ query, html }, url, channel, type = 'scene') {
const release = {};
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
release.title = query.cnt('.scene-page .description');
release.title = query.cnt('.scene-page .description, .video-page .description');
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
release.duration = query.number('.release-date:last-child') * 60;
release.actors = query.all('.video-performer').map((el) => {
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
@@ -48,8 +65,21 @@ async function scrapeScene({ query, html }, url) {
};
});
release.tags = query.cnts('.tags a');
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
release.tags = query.cnts('.tags a, .categories a');
release.studio = slugify(query.cnt('.studio span:last-child'), '');
if (type === 'scene') {
release.director = query.text('.director');
release.duration = query.number('.release-date:last-child') * 60;
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
}
if (type === 'movie') {
release.director = query.cnt('.director a');
release.covers = query.imgs('.carousel-item > img');
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
}
release.photos = query.imgs('#dv_frames a > img').map(photo => [
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
@@ -70,7 +100,6 @@ async function scrapeScene({ query, html }, url) {
}
}
// console.log(release);
return release;
}
@@ -116,7 +145,20 @@ async function fetchScene(url, channel) {
});
if (res.ok) {
return scrapeScene(res.item, url, channel);
return scrapeRelease(res.item, url, channel);
}
return res.status;
}
async function fetchMovie(url, channel) {
const res = await qu.get(url, null, null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeRelease(res.item, url, channel, 'movie');
}
return res.status;
@@ -139,4 +181,5 @@ module.exports = {
fetchLatest,
fetchScene,
fetchMovies,
fetchMovie,
};