Added experimental movie page scraping with Elegant Angel.
This commit is contained in:
parent
a88c2f0760
commit
f59e809713
|
@ -28,7 +28,7 @@ async function init() {
|
||||||
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
|
const actors = actorNames.length > 0 && await scrapeActors(actorNames);
|
||||||
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean);
|
||||||
|
|
||||||
const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates();
|
const updateBaseScenes = (argv.all || argv.channels || argv.networks || argv.movies) && await fetchUpdates();
|
||||||
|
|
||||||
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
|
const scenesFromFile = argv.scenesFile && await getFileEntries(argv.scenesFile);
|
||||||
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
|
const sceneUrls = (argv.scenes || []).concat(scenesFromFile || []);
|
||||||
|
|
|
@ -25,6 +25,10 @@ const { argv } = yargs
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: 'channel',
|
alias: 'channel',
|
||||||
})
|
})
|
||||||
|
.option('movies', {
|
||||||
|
describe: 'Scrape movies from channels',
|
||||||
|
type: 'array',
|
||||||
|
})
|
||||||
.option('actors', {
|
.option('actors', {
|
||||||
describe: 'Scrape actors by name or slug',
|
describe: 'Scrape actors by name or slug',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
|
@ -91,10 +95,10 @@ const { argv } = yargs
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
default: true,
|
default: true,
|
||||||
})
|
})
|
||||||
.option('redownload', {
|
.option('force', {
|
||||||
describe: 'Don\'t ignore duplicates, update existing entries',
|
describe: 'Don\'t ignore duplicates, update existing entries',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
alias: 'force',
|
alias: 'redownload',
|
||||||
})
|
})
|
||||||
.option('after', {
|
.option('after', {
|
||||||
describe: 'Don\'t fetch scenes older than',
|
describe: 'Don\'t fetch scenes older than',
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const qu = require('../utils/q');
|
const qu = require('../utils/q');
|
||||||
const slugify = require('../utils/slugify');
|
|
||||||
|
|
||||||
function scrapeAll(scenes, channel) {
|
function scrapeAll(scenes, channel) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
|
@ -75,20 +74,25 @@ async function scrapeScene({ query, html }, url) {
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeProfile({ query, el }, actorName, entity, include) {
|
function scrapeMovies(movies, channel) {
|
||||||
const profile = {};
|
return movies.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
profile.description = query.cnt('.bio-text');
|
release.url = query.url('.boxcover', 'href', { origin: channel.url });
|
||||||
profile.birthPlace = query.cnt('.birth-place span');
|
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||||
|
|
||||||
profile.avatar = query.img('.actor-photo img');
|
release.title = query.cnt('span');
|
||||||
|
|
||||||
if (include.releases) {
|
const cover = query.img('picture img');
|
||||||
return scrapeAll(qu.initAll(el, '.scene'));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(profile);
|
release.covers = [
|
||||||
return profile;
|
// filename is ignored, back-cover has suffix after media ID
|
||||||
|
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||||
|
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||||
|
];
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel, page = 1) {
|
async function fetchLatest(channel, page = 1) {
|
||||||
|
@ -118,12 +122,14 @@ async function fetchScene(url, channel) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchProfile(actorName, entity, include) {
|
async function fetchMovies(channel, page = 1) {
|
||||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
|
||||||
const res = await qu.get(url);
|
// invalid certificate
|
||||||
|
rejectUnauthorized: false,
|
||||||
|
});
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
return scrapeProfile(res.item, actorName, entity, include);
|
return scrapeMovies(res.items, channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -132,5 +138,5 @@ async function fetchProfile(actorName, entity, include) {
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
fetchProfile,
|
fetchMovies,
|
||||||
};
|
};
|
||||||
|
|
|
@ -83,6 +83,8 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) {
|
||||||
? await scraper.fetchUpcoming(entity, page, preData, include)
|
? await scraper.fetchUpcoming(entity, page, preData, include)
|
||||||
: await scraper.fetchLatest(entity, page, preData, include);
|
: await scraper.fetchLatest(entity, page, preData, include);
|
||||||
|
|
||||||
|
await scraper.fetchMovies(entity, page);
|
||||||
|
|
||||||
if (!Array.isArray(latestReleases)) {
|
if (!Array.isArray(latestReleases)) {
|
||||||
// scraper is unable to fetch the releases and returned a HTTP code or null
|
// scraper is unable to fetch the releases and returned a HTTP code or null
|
||||||
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
logger.warn(`Scraper returned ${latestReleases} when fetching latest from '${entity.name}' (${entity.parent?.name})`);
|
||||||
|
|
Loading…
Reference in New Issue