Improved and documented actor profile scraping.

This commit is contained in:
DebaucheryLibrarian
2020-08-12 20:51:08 +02:00
parent 5cabeed19d
commit 7413d7db25
5 changed files with 64 additions and 32 deletions

View File

@@ -10,21 +10,6 @@ const include = require('./utils/argv-include')(argv);
const scrapers = require('./scrapers/scrapers');
const { fetchChannelsFromArgv, fetchChannelsFromConfig } = require('./entities');
const afterDate = (() => {
if (/\d{2,4}-\d{2}-\d{2,4}/.test(argv.after)) {
// using date
return moment
.utc(argv.after, ['YYYY-MM-DD', 'DD-MM-YYYY'])
.toDate();
}
// using time distance (e.g. "1 month")
return moment
.utc()
.subtract(...argv.after.split(' '))
.toDate();
})();
async function filterUniqueReleases(latestReleases, accReleases) {
const latestReleaseIdentifiers = latestReleases
.map(release => [release.entity.id, release.entryId]);
@@ -67,7 +52,7 @@ function needNextPage(uniqueReleases, pageAccReleases) {
.sort((releaseA, releaseB) => releaseB.date - releaseA.date)
.slice(-1)[0];
if (moment(oldestReleaseOnPage.date).isAfter(afterDate)) {
if (moment(oldestReleaseOnPage.date).isAfter(argv.after)) {
// oldest release on page is newer than the specified date cut-off
return true;
}
@@ -126,7 +111,7 @@ async function scrapeReleases(scraper, entity, preData, upcoming = false) {
}
if (releases.every(release => release.date)) {
return releases.filter(release => moment(release.date).isAfter(afterDate));
return releases.filter(release => moment(release.date).isAfter(argv.after));
}
return releases.slice(0, argv.nullDateLimit);