Added experimental movie page scraping with Elegant Angel.

This commit is contained in:
DebaucheryLibrarian 2020-07-17 04:33:05 +02:00
parent a88c2f0760
commit f59e809713
4 changed files with 31 additions and 19 deletions

View File

@ -28,7 +28,7 @@ async function init() {
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
const updateBaseScenes = (argv.all || argv.channels || argv.networks || argv.movies) && await fetchUpdates();
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);

View File

@ -25,6 +25,10 @@ const { argv } = yargs
type: 'array',
alias: 'channel',
})
.option('movies', {
describe: 'Scrape movies from channels',
type: 'array',
})
.option('actors', {
describe: 'Scrape actors by name or slug',
type: 'array',
@ -91,10 +95,10 @@ const { argv } = yargs
type: 'boolean',
default: true,
})
.option('redownload', {
.option('force', {
describe: 'Don\'t ignore duplicates, update existing entries',
type: 'boolean',
alias: 'force',
alias: 'redownload',
})
.option('after', {
describe: 'Don\'t fetch scenes older than',

View File

@ -1,7 +1,6 @@
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
@ -75,20 +74,25 @@ async function scrapeScene({ query, html }, url) {
return release;
}
function scrapeProfile({ query, el }, actorName, entity, include) {
const profile = {};
function scrapeMovies(movies, channel) {
return movies.map(({ query }) => {
const release = {};
profile.description = query.cnt('.bio-text');
profile.birthPlace = query.cnt('.birth-place span');
release.url = query.url('.boxcover', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
profile.avatar = query.img('.actor-photo img');
release.title = query.cnt('span');
if (include.releases) {
return scrapeAll(qu.initAll(el, '.scene'));
}
const cover = query.img('picture img');
console.log(profile);
return profile;
release.covers = [
// filename is ignored, back-cover has suffix after media ID
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
];
return release;
});
}
async function fetchLatest(channel, page = 1) {
@ -118,12 +122,14 @@ async function fetchScene(url, channel) {
return res.status;
}
async function fetchProfile(actorName, entity, include) {
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
const res = await qu.get(url);
async function fetchMovies(channel, page = 1) {
const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeProfile(res.item, actorName, entity, include);
return scrapeMovies(res.items, channel);
}
return res.status;
@ -132,5 +138,5 @@ async function fetchProfile(actorName, entity, include) {
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
fetchMovies,
};

View File

@ -83,6 +83,8 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) {
? await scraper.fetchUpcoming(entity, page, preData, include)
: await scraper.fetchLatest(entity, page, preData, include);
await scraper.fetchMovies(entity, page);
if (!Array.isArray(latestReleases)) {
// scraper is unable to fetch the releases and returned a HTTP code or null
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);