Added experimental movie page scraping with Elegant Angel.

This commit is contained in:
DebaucheryLibrarian 2020-07-17 04:33:05 +02:00
parent a88c2f0760
commit f59e809713
4 changed files with 31 additions and 19 deletions

View File

@ -28,7 +28,7 @@ async function init() {
const actors = actorNames.length > 0 && await scrapeActors(actorNames); const actors = actorNames.length > 0 && await scrapeActors(actorNames);
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean); const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates(); const updateBaseScenes = (argv.all || argv.channels || argv.networks || argv.movies) && await fetchUpdates();
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile); const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []); const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);

View File

@ -25,6 +25,10 @@ const { argv } = yargs
type: 'array', type: 'array',
alias: 'channel', alias: 'channel',
}) })
.option('movies', {
describe: 'Scrape movies from channels',
type: 'array',
})
.option('actors', { .option('actors', {
describe: 'Scrape actors by name or slug', describe: 'Scrape actors by name or slug',
type: 'array', type: 'array',
@ -91,10 +95,10 @@ const { argv } = yargs
type: 'boolean', type: 'boolean',
default: true, default: true,
}) })
.option('redownload', { .option('force', {
describe: 'Don\'t ignore duplicates, update existing entries', describe: 'Don\'t ignore duplicates, update existing entries',
type: 'boolean', type: 'boolean',
alias: 'force', alias: 'redownload',
}) })
.option('after', { .option('after', {
describe: 'Don\'t fetch scenes older than', describe: 'Don\'t fetch scenes older than',

View File

@ -1,7 +1,6 @@
'use strict'; 'use strict';
const qu = require('../utils/q'); const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) { function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => { return scenes.map(({ query }) => {
@ -75,20 +74,25 @@ async function scrapeScene({ query, html }, url) {
return release; return release;
} }
function scrapeProfile({ query, el }, actorName, entity, include) { function scrapeMovies(movies, channel) {
const profile = {}; return movies.map(({ query }) => {
const release = {};
profile.description = query.cnt('.bio-text'); release.url = query.url('.boxcover', 'href', { origin: channel.url });
profile.birthPlace = query.cnt('.birth-place span'); release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
profile.avatar = query.img('.actor-photo img'); release.title = query.cnt('span');
if (include.releases) { const cover = query.img('picture img');
return scrapeAll(qu.initAll(el, '.scene'));
}
console.log(profile); release.covers = [
return profile; // filename is ignored, back-cover has suffix after media ID
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
];
return release;
});
} }
async function fetchLatest(channel, page = 1) { async function fetchLatest(channel, page = 1) {
@ -118,12 +122,14 @@ async function fetchScene(url, channel) {
return res.status; return res.status;
} }
async function fetchProfile(actorName, entity, include) { async function fetchMovies(channel, page = 1) {
const url = `${entity.url}/actors/${slugify(actorName, '_')}`; const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
const res = await qu.get(url); // invalid certificate
rejectUnauthorized: false,
});
if (res.ok) { if (res.ok) {
return scrapeProfile(res.item, actorName, entity, include); return scrapeMovies(res.items, channel);
} }
return res.status; return res.status;
@ -132,5 +138,5 @@ async function fetchProfile(actorName, entity, include) {
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile, fetchMovies,
}; };

View File

@ -83,6 +83,8 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) {
? await scraper.fetchUpcoming(entity, page, preData, include) ? await scraper.fetchUpcoming(entity, page, preData, include)
: await scraper.fetchLatest(entity, page, preData, include); : await scraper.fetchLatest(entity, page, preData, include);
await scraper.fetchMovies(entity, page);
if (!Array.isArray(latestReleases)) { if (!Array.isArray(latestReleases)) {
// scraper is unable to fetch the releases and returned a HTTP code or null // scraper is unable to fetch the releases and returned a HTTP code or null
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`); logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);