From f59e80971331b2189230d31c6f03ade400c2a81b Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Fri, 17 Jul 2020 04:33:05 +0200 Subject: [PATCH] Added experimental movie page scraping with Elegant Angel. --- src/app.js | 2 +- src/argv.js | 8 ++++++-- src/scrapers/elegantangel.js | 38 +++++++++++++++++++++--------------- src/updates.js | 2 ++ 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/app.js b/src/app.js index 2a0dd9a5..52aeff44 100644 --- a/src/app.js +++ b/src/app.js @@ -28,7 +28,7 @@ async function init() { const actors = actorNames.length > 0 && await scrapeActors(actorNames); const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean); - const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates(); + const updateBaseScenes = (argv.all || argv.channels || argv.networks || argv.movies) && await fetchUpdates(); const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile); const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []); diff --git a/src/argv.js b/src/argv.js index 7f656c34..f6ec567d 100644 --- a/src/argv.js +++ b/src/argv.js @@ -25,6 +25,10 @@ const { argv } = yargs type: 'array', alias: 'channel', }) + .option('movies', { + describe: 'Scrape movies from channels', + type: 'array', + }) .option('actors', { describe: 'Scrape actors by name or slug', type: 'array', @@ -91,10 +95,10 @@ const { argv } = yargs type: 'boolean', default: true, }) - .option('redownload', { + .option('force', { describe: 'Don\'t ignore duplicates, update existing entries', type: 'boolean', - alias: 'force', + alias: 'redownload', }) .option('after', { describe: 'Don\'t fetch scenes older than', diff --git a/src/scrapers/elegantangel.js b/src/scrapers/elegantangel.js index c2f349c6..e95df5ea 100644 --- a/src/scrapers/elegantangel.js +++ b/src/scrapers/elegantangel.js @@ -1,7 +1,6 @@ 'use strict'; const qu = require('../utils/q'); -const slugify = require('../utils/slugify'); function scrapeAll(scenes, channel) { return scenes.map(({ query }) => { @@ -75,20 +74,25 @@ async function scrapeScene({ query, html }, url) { return release; } -function scrapeProfile({ query, el }, actorName, entity, include) { - const profile = {}; +function scrapeMovies(movies, channel) { + return movies.map(({ query }) => { + const release = {}; - profile.description = query.cnt('.bio-text'); - profile.birthPlace = query.cnt('.birth-place span'); + release.url = query.url('.boxcover', 'href', { origin: channel.url }); + release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; - profile.avatar = query.img('.actor-photo img'); + release.title = query.cnt('span'); - if (include.releases) { - return scrapeAll(qu.initAll(el, '.scene')); - } + const cover = query.img('picture img'); - console.log(profile); - return profile; + release.covers = [ + // filename is ignored, back-cover has suffix after media ID + cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'), + cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'), + ]; + + return release; + }); } async function fetchLatest(channel, page = 1) { @@ -118,12 +122,14 @@ async function fetchScene(url, channel) { return res.status; } -async function fetchProfile(actorName, entity, include) { - const url = `${entity.url}/actors/${slugify(actorName, '_')}`; - const res = await qu.get(url); +async function fetchMovies(channel, page = 1) { + const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, { + // invalid certificate + rejectUnauthorized: false, + }); if (res.ok) { - return scrapeProfile(res.item, actorName, entity, include); + return scrapeMovies(res.items, channel); } return res.status; @@ -132,5 +138,5 @@ async function fetchProfile(actorName, entity, include) { module.exports = { fetchLatest, fetchScene, - fetchProfile, + fetchMovies, }; diff --git a/src/updates.js b/src/updates.js index 766b859f..c873f654 100644 --- a/src/updates.js +++ b/src/updates.js @@ -83,6 +83,8 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) { ? await scraper.fetchUpcoming(entity, page, preData, include) : await scraper.fetchLatest(entity, page, preData, include); + await scraper.fetchMovies(entity, page); + if (!Array.isArray(latestReleases)) { // scraper is unable to fetch the releases and returned a HTTP code or null logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);