ripunzel/src/app.js

'use strict';

const config = require('config');
const Snoowrap = require('snoowrap');
const fs = require('fs-extra');
const Promise = require('bluebird');
const exiftool = require('node-exiftool');
const exiftoolBin = require('dist-exiftool');
const cron = require('node-cron');
const { format } = require('date-fns');

require('array.prototype.flatten').shim();

const reddit = new Snoowrap(config.reddit.api);
const args = require('./cli.js')();

const dissectLink = require('./dissectLink.js');
const curatePosts = require('./curate/posts.js');

const { attachContentInfo, getInfo } = require('./fetch/info.js');
const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js');

const getPosts = require('./sources/getPosts.js')(reddit, args);
const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);

async function getFileContents(location, label) {
    try {
        const fileContents = await fs.readFile(location, 'utf8');

        return fileContents.split('\n').filter(entry => entry);
    } catch (error) {
        console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);

        return [];
    }
}

async function getCompletePosts() {
    let userPosts = {};
    let ignoreIds = [];
    let usernames = args.users || [];
    let postIds = args.posts || [];

    if (args.fileUsers) {
        usernames = usernames.concat(await getFileContents(args.fileUsers, 'username'));
    }

    if (args.filePosts) {
        postIds = postIds.concat(await getFileContents(args.filePosts, 'post ID'));
    }

    if (!usernames.length && !postIds.length) {
        throw new Error('Could not retrieve any posts. Did you supply --users, --posts, --file-users or --file-posts?');
    }

    if (usernames.length) {
        userPosts = await getUserPosts(usernames);
    }

    if (postIds.length) {
        userPosts = await getPosts(postIds, userPosts);
    }

    if (args.fileIgnore) {
        ignoreIds = await getFileContents(args.fileIgnore, 'ignore');
    }

    const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);

    return attachContentInfo(curatedUserPosts);
}

async function getDirectContent(links, ep) {
    return Promise.map(links, async (link) => {
        const host = dissectLink(link);
        const info = await getInfo(host);

        return fetchSaveDirectContent(info, host, ep);
    }, {
        concurrency: 5,
    });
}

async function getCompleteContents(ep) {
    if (args.fetch) {
        return getDirectContent([args.fetch], ep);
    }

    if (args.fileDirect) {
        return getDirectContent(await getFileContents(args.fileDirect, 'direct'), ep);
    }

    return null;
}

function fetchSavePosts(userPosts, ep) {
    // don't map to apply concurrency limit and reduce network stress
    return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveUserContent(user, ep, args), null);
}

async function initApp() {
    try {
        const ep = new exiftool.ExiftoolProcess(exiftoolBin);
        await ep.open();

        if (args.fetch || args.fileDirect) {
            await getCompleteContents(ep);
            return;
        }

        const userPosts = await getCompletePosts();

        await fetchSavePosts(userPosts, ep);
        await ep.close();

        if (args.watch) {
            console.log(`[${format(new Date(), 'YYYY-MM-DD HH:mm:ss')}] Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
        }
    } catch (error) {
        if (args.debug) {
            console.log('\x1b[31m%s\x1b[0m', error.stack);
        } else {
            console.log('\x1b[31m%s\x1b[0m', error.message);
        }
    }
}

initApp();

if (args.watch) {
    cron.schedule(config.fetch.watch.schedule, initApp);
}
Connected to reddit. 2018-04-09 22:26:30 +00:00			`'use strict';`

			`const config = require('config');`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`const Snoowrap = require('snoowrap');`
Allow usernames and post IDs to be read from file. 2018-07-01 22:25:48 +00:00			`const fs = require('fs-extra');`
Added fetch concurrency. 2018-07-02 01:45:20 +00:00			`const Promise = require('bluebird');`
Refactored info content fetching and saving to handle users object and utilize async/await. 2018-06-12 23:51:45 +00:00			`const exiftool = require('node-exiftool');`
			`const exiftoolBin = require('dist-exiftool');`
Switched from setTimeout to crontab for watch-mode. 2018-07-01 21:16:03 +00:00			`const cron = require('node-cron');`
Added timestamp to watch mode status log. 2018-07-01 01:38:29 +00:00			`const { format } = require('date-fns');`
Added archive support, and the IP archive. 2018-05-04 22:51:58 +00:00
			`require('array.prototype.flatten').shim();`
Refactoring flow. Added user and profile saving and variables. 2018-04-22 21:46:14 +00:00
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`const reddit = new Snoowrap(config.reddit.api);`
			`const args = require('./cli.js')();`
Flow and modularization refactor. Added duplicates option and applying limit after fetch. 2018-04-22 23:50:07 +00:00
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`const dissectLink = require('./dissectLink.js');`
Added support for fetching individual posts. Improved use of yargs, --help now available. Refactored main app flow. 2018-04-29 00:02:34 +00:00			`const curatePosts = require('./curate/posts.js');`
Flow and modularization refactor. Added duplicates option and applying limit after fetch. 2018-04-22 23:50:07 +00:00
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`const { attachContentInfo, getInfo } = require('./fetch/info.js');`
			`const { fetchSaveUserContent, fetchSaveDirectContent } = require('./fetch/content.js');`
Refactoring flow. Added user and profile saving and variables. 2018-04-22 21:46:14 +00:00
Cleaned up entrypoint. 2018-05-05 00:27:15 +00:00			`const getPosts = require('./sources/getPosts.js')(reddit, args);`
			`const getUserPosts = require('./sources/getUserPosts.js')(reddit, args);`
Added multi-user support. 2018-04-18 02:04:39 +00:00
Allow usernames and post IDs to be read from file. 2018-07-01 22:25:48 +00:00			`async function getFileContents(location, label) {`
			`try {`
			`const fileContents = await fs.readFile(location, 'utf8');`

			`return fileContents.split('\n').filter(entry => entry);`
			`} catch (error) {`
			console.log('\x1b[31m%s\x1b[0m', `Could not read ${label} file '${location}': ${error}.`);

			`return [];`
			`}`
			`}`

Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`async function getCompletePosts() {`
Fixed index file for single post fetching. 2018-06-30 23:07:32 +00:00			`let userPosts = {};`
Added support for file with host IDs to ignore. 2018-07-02 00:33:34 +00:00			`let ignoreIds = [];`
Allow usernames and post IDs to be read from file. 2018-07-01 22:25:48 +00:00			`let usernames = args.users \|\| [];`
			`let postIds = args.posts \|\| [];`

			`if (args.fileUsers) {`
			`usernames = usernames.concat(await getFileContents(args.fileUsers, 'username'));`
			`}`

			`if (args.filePosts) {`
			`postIds = postIds.concat(await getFileContents(args.filePosts, 'post ID'));`
			`}`

			`if (!usernames.length && !postIds.length) {`
			`throw new Error('Could not retrieve any posts. Did you supply --users, --posts, --file-users or --file-posts?');`
			`}`
Fixed index file for single post fetching. 2018-06-30 23:07:32 +00:00
Allow usernames and post IDs to be read from file. 2018-07-01 22:25:48 +00:00			`if (usernames.length) {`
			`userPosts = await getUserPosts(usernames);`
Fixed index file for single post fetching. 2018-06-30 23:07:32 +00:00			`}`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00
Allow usernames and post IDs to be read from file. 2018-07-01 22:25:48 +00:00			`if (postIds.length) {`
			`userPosts = await getPosts(postIds, userPosts);`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`}`

Added support for file with host IDs to ignore. 2018-07-02 00:33:34 +00:00			`if (args.fileIgnore) {`
			`ignoreIds = await getFileContents(args.fileIgnore, 'ignore');`
			`}`

			`const curatedUserPosts = curatePosts(userPosts, ignoreIds, args);`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00
			`return attachContentInfo(curatedUserPosts);`
Refactoring flow. Added user and profile saving and variables. 2018-04-22 21:46:14 +00:00			`}`

Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`async function getDirectContent(links, ep) {`
			`return Promise.map(links, async (link) => {`
			`const host = dissectLink(link);`
			`const info = await getInfo(host);`

			`return fetchSaveDirectContent(info, host, ep);`
			`}, {`
			`concurrency: 5,`
			`});`
			`}`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00
Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`async function getCompleteContents(ep) {`
			`if (args.fetch) {`
			`return getDirectContent([args.fetch], ep);`
			`}`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00
Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`if (args.fileDirect) {`
			`return getDirectContent(await getFileContents(args.fileDirect, 'direct'), ep);`
			`}`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00
Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`return null;`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`}`

Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`function fetchSavePosts(userPosts, ep) {`
Added fetch concurrency. 2018-07-02 01:45:20 +00:00			`// don't map to apply concurrency limit and reduce network stress`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`return Promise.reduce(Object.values(userPosts), (acc, user) => fetchSaveUserContent(user, ep, args), null);`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`}`
Refactored info content fetching and saving to handle users object and utilize async/await. 2018-06-12 23:51:45 +00:00
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`async function initApp() {`
			`try {`
			`const ep = new exiftool.ExiftoolProcess(exiftoolBin);`
Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`await ep.open();`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00
Completed Erome module. Added content URL list fetching. 2019-11-01 04:55:55 +00:00			`if (args.fetch \|\| args.fileDirect) {`
			`await getCompleteContents(ep);`
Added support for fetching content directly from host. Improved pattern interpolation. Refactored content modules. 2019-11-01 03:22:36 +00:00			`return;`
			`}`

			`const userPosts = await getCompletePosts();`

Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`await fetchSavePosts(userPosts, ep);`
Reading index file and ignoring already indexed content. 2018-06-17 01:39:12 +00:00			`await ep.close();`
Added watch-mode. Waiting for profile detail write to finalize before new watch cycle and capture details in index file. 2018-07-01 01:06:57 +00:00
			`if (args.watch) {`
Switched from setTimeout to crontab for watch-mode. 2018-07-01 21:16:03 +00:00			console.log(`[${format(new Date(), 'YYYY-MM-DD HH:mm:ss')}] Watch-mode enabled, checking again for new posts according to crontab '${config.fetch.watch.schedule}'.`);
Added watch-mode. Waiting for profile detail write to finalize before new watch cycle and capture details in index file. 2018-07-01 01:06:57 +00:00			`}`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`} catch (error) {`
Accounting for actualid_d.jpg imgur pattern. Added debug argument to print full stack trace on error. 2018-07-05 21:27:11 +00:00			`if (args.debug) {`
			`console.log('\x1b[31m%s\x1b[0m', error.stack);`
			`} else {`
			`console.log('\x1b[31m%s\x1b[0m', error.message);`
			`}`
Building user posts object after fetching user to ensure user fetched posts and directly fetched posts are added to the same user key. Refactor to make better use of functions. Moved profile detail saving call to content fetch. No longer attempting and failing to save profile details for deleted users (directory would not exist). 2018-06-16 23:11:10 +00:00			`}`
			`}`

			`initApp();`
Switched from setTimeout to crontab for watch-mode. 2018-07-01 21:16:03 +00:00
			`if (args.watch) {`
			`cron.schedule(config.fetch.watch.schedule, initApp);`
			`}`