Added experimental movie page scraping with Elegant Angel.
This commit is contained in:
parent
a88c2f0760
commit
f59e809713
|
@ -28,7 +28,7 @@ async function init() {
|
|||
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
|
||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||
|
||||
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
|
||||
const updateBaseScenes = (argv.all || argv.channels || argv.networks || argv.movies) && await fetchUpdates();
|
||||
|
||||
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
|
||||
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
|
||||
|
|
|
@ -25,6 +25,10 @@ const { argv } = yargs
|
|||
type: 'array',
|
||||
alias: 'channel',
|
||||
})
|
||||
.option('movies', {
|
||||
describe: 'Scrape movies from channels',
|
||||
type: 'array',
|
||||
})
|
||||
.option('actors', {
|
||||
describe: 'Scrape actors by name or slug',
|
||||
type: 'array',
|
||||
|
@ -91,10 +95,10 @@ const { argv } = yargs
|
|||
type: 'boolean',
|
||||
default: true,
|
||||
})
|
||||
.option('redownload', {
|
||||
.option('force', {
|
||||
describe: 'Don\'t ignore duplicates, update existing entries',
|
||||
type: 'boolean',
|
||||
alias: 'force',
|
||||
alias: 'redownload',
|
||||
})
|
||||
.option('after', {
|
||||
describe: 'Don\'t fetch scenes older than',
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
|
@ -75,20 +74,25 @@ async function scrapeScene({ query, html }, url) {
|
|||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, actorName, entity, include) {
|
||||
const profile = {};
|
||||
function scrapeMovies(movies, channel) {
|
||||
return movies.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
profile.description = query.cnt('.bio-text');
|
||||
profile.birthPlace = query.cnt('.birth-place span');
|
||||
release.url = query.url('.boxcover', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
profile.avatar = query.img('.actor-photo img');
|
||||
release.title = query.cnt('span');
|
||||
|
||||
if (include.releases) {
|
||||
return scrapeAll(qu.initAll(el, '.scene'));
|
||||
}
|
||||
const cover = query.img('picture img');
|
||||
|
||||
console.log(profile);
|
||||
return profile;
|
||||
release.covers = [
|
||||
// filename is ignored, back-cover has suffix after media ID
|
||||
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
|
@ -118,12 +122,14 @@ async function fetchScene(url, channel) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, entity, include) {
|
||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
||||
const res = await qu.get(url);
|
||||
async function fetchMovies(channel, page = 1) {
|
||||
const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, actorName, entity, include);
|
||||
return scrapeMovies(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
@ -132,5 +138,5 @@ async function fetchProfile(actorName, entity, include) {
|
|||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
fetchMovies,
|
||||
};
|
||||
|
|
|
@ -83,6 +83,8 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) {
|
|||
? await scraper.fetchUpcoming(entity, page, preData, include)
|
||||
: await scraper.fetchLatest(entity, page, preData, include);
|
||||
|
||||
await scraper.fetchMovies(entity, page);
|
||||
|
||||
if (!Array.isArray(latestReleases)) {
|
||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
||||
|
|
Loading…
Reference in New Issue