Added dynamic dividers to patterns. Fixed PornHub module. Updated README.

This commit is contained in:
ThePendulum 2019-11-02 02:19:23 +01:00
parent bfc31c18ba
commit a7135f48a6
8 changed files with 899 additions and 122 deletions

106
README.md
View File

@ -15,6 +15,7 @@ Most features are optional and can easily be disabled!
* Reddit text/self, images and videos[\*](#reddit-videos)
* Imgur (requires API key as of late 2019)
* Gfycat
* PornHub (videos)
* Erome
* Vidble
* Eroshare archive
@ -35,16 +36,22 @@ reddit-post-dump requires a arbitrarily recent version of Node.js. Before use, d
`npm install`
## Usage
`npm start -- (--user <username> | --post <post-id>)`
`npm start -- (--user <username> | --post <post-id> | --fetch <content-url>)`
### Optional arguments
* `--users <username> [<username>...]`: You may fetch posts from multiple users by supplying a space-separated list of usernames to `--users`.
* `--posts <post-id> [<post-id>...]`: Fetch multiple posts by supplying a space-separated list of post IDs to `--posts`.
* `--fetch <content-url>`: Fetch content directly from an URL to an album or image on one of the supported hosts
* `--file-users <filepath>`: Fetch posts from multiple users by supplying a file with newline separated usernames
* `--file-posts <filepath>`: Fetch multiple posts by supplying a file with newline separated post IDs
* `--file-fetch <filepath>`: Fetch content directly from multiple sources by supplying a file with newline separated URLs
* `--limit <number>`: Maximum amount posts per user to fetch content from. Limit is applied after filtering out ignored, cross- and reposts. Posts requested directly by ID may be discarded as duplicates, but are not otherwise affected by the limit.
* `--sort <method>`: How posts should be sorted while fetched. This affects the `$postIndex` variable, and in combination with a `--limit` decides what posts will be included.
* `--ignore <prop> [<prop>...]`: Ignore posts with any of the following properties: `pinned`, `stickied`, `hidden`, `over_18`, `spoiler`.
* `--exclude <source> [<source>...]`: Do not include posts from these sources (e.g. `self`, `reddit`, `imgur`, `gfycat`, ...). Should not be used in combination with `--include`.
* `--include <source> [<source>...]`: Only include posts from these sources (e.g. `self`, `reddit`, `imgur`, `gfycat`, ...). Should not be used in combination with `--exclude`.
* `--base <path>`: Overwrite the base path variables `{base.posts}` and `{base.direct}`, preserving the remainder of the filepath pattern.
* `--label <name>`: Arbitrary text made available as the `{label}` variable.
### Examples
* `npm start -- --user AWildSketchAppeared`
@ -66,66 +73,73 @@ Unfortunately, it is necessary to register for the reddit and imgur APIs for thi
Path patterns dictate where and how a file will be saved. Various variables and options are available, and you may use subdirectories divided by `/`.
#### Variables
#### Base
`$base` is an optional variable intended to set the beginning most paths have in common. The variable must be added to each path manually and is not prefixed automatically as to allow for exceptions.
#### Misc
* `{base.posts}` or `{base.direct}`: An optional variable intended to set the beginning most paths have in common, for content fetched via reddit and content fetched directly respectively. The variable must be added to each path manually and is not prefixed automatically as to allow for exceptions. The configuration for both will be overruled by the `--base` argument;
* `{label}`: Arbitrary text specified by the `--label` argument.
##### Item (individual image, video or text)
* `{item.id}`: The ID of the individual image or video
* `{item.title}`: The title of the individual image or video
* `{item.description}`: The description of the individual image or video
* `{item.date}`: The submission date of the individual image or video, formatted by the `dateFormat` configuration described below
* `{item.index}`: The index of the individual image or video in an album, offset by the `indexOffset` configuration described below
* `{*tags*.extracted}`: Whether the item has been extracted as the only item in an album
* `{*tags*.preview}`: Whether the image is a reddit preview because it was unavailable on the original host
* `{ext}`: The extension of the medium. Must typically be included, but may be omitted for self (text) posts on Unix systems
##### Album
* `{album.id}`: The ID of the media host album
* `{album.title}`: The title of the media host album
* `{album.description}`: The description of the media host album
* `{album.date}`: The submission date of the media host album, formatted by the `dateFormat` configuration described below
##### Post
* `{post.id}`: The ID of the reddit post
* `{post.title}`: The title of the reddit post
* `{post.user}`: The user that submitted the post, almost always equivalent to the `--user` command line argument
* `{post.date}`: The submission date of the reddit post, formatted by the `dateFormat` configuration described below
* `{post.index}`: The index of the post according to the sort method
* `{post.score}`: The current karma score of the post
* `{post.hash}`: The hash of the post
* `{post.subreddit}`: The name of the subreddit the post is submitted to
##### Host
* `{host.name}` or `{host.label}`: Name of the source the content was hosted on, e.g. 'imgur' or 'gfycat'
* `{host.id}`: ID of the source the content was hosted on
#### User
* `$user` or `$username`: The nickname of the reddit user that submitted the post
* `$userId`: The ID of the reddit user that submitted the post
* `$userCreated`: The creation date or birthday of the reddit user, formatted according to `dateFormat` described below
* `$userVerified` (boolean): Whether the reddit user is verified
* `$userVerifiedEmail` (boolean): Whether the reddit user has verified their e-mail address
* `$userGold` (boolean): Whether the reddit user is a gold member of reddit
* `{user.name}` or `{user.username}`: The nickname of the reddit user that submitted the post
* `{user.id}`: The ID of the reddit user that submitted the post
* `{user.created}`: The creation date or birthday of the reddit user, formatted according to `dateFormat` described below
* `{tags.verified}`: Whether the reddit user is verified
* `{tags.verifiedEmail}`: Whether the reddit user has verified their e-mail address
* `{tags.gold}`: Whether the reddit user is a gold member of reddit
#### Profile
Many reddit users have a 'subreddit' of their own in the form of a profile (not to be confused with users that have created an *actual* subreddit for themselves). These variables are only available for users that have enabled this.
* `$profileTitle`: The title of the reddit user's profile
* `$profileId`: The ID of the reddit user's profile
* `$profileDescription`: The description of the reddit user's profile
* `$profileOver18` (boolean): Whether the profile contains adult content and requires an 'over 18' age confirmation
* `{profile.title}`: The title of the reddit user's profile
* `{profile.id}`: The ID of the reddit user's profile
* `{profile.description}`: The description of the reddit user's profile
* `{tags.over18}` (boolean): Whether the profile contains adult content and requires an 'over 18' age confirmation
##### Post
* `$postId`: The ID of the reddit post
* `$postTitle`: The title of the reddit post
* `$postUser`: The user that submitted the post, almost always equivalent to the `--user` command line argument
* `$postDate`: The submission date of the reddit post, formatted by the `dateFormat` configuration described below
* `$postIndex`: The index of the post according to the sort method
* `$host`: Name of the source the content was hosted on
##### `{tags.*x*}`
Tags are variables that will only be inserted when another variable is present. When you use a tag, you must configure a string of text that is inserted in place of a tag variable when the associated variable is available.
##### Album
* `$albumId`: The ID of the media host album
* `$albumTitle`: The title of the media host album
* `$albumDescription`: The description of the media host album
* `$albumDate`: The submission date of the media host album, formatted by the `dateFormat` configuration described below
##### Item (individual image, video or text)
* `$itemId`: The ID of the individual image or video
* `$itemTitle`: The title of the individual image or video
* `$itemDescription`: The description of the individual image or video
* `$itemDate`: The submission date of the individual image or video, formatted by the `dateFormat` configuration described below
* `$itemIndex`: The index of the individual image or video in an album, offset by the `indexOffset` configuration described below
* `$extracted` (boolean): Whether the item has been extracted as the only item in an album
* `$preview` (boolean): Whether the image is a reddit preview because it was unavailable on the original host
* `$ext`: The extension of the medium. Must typically be included, but may be omitted for self (text) posts on Unix systems
##### `booleans`
Some variables are booleans and indicate whether or not a property applies. When you use a boolean variable, you must configure a string of text that is only inserted in place of a boolean variable when the variable is true.
##### `booleans`
Some variables are booleans and indicate whether or not a property applies. When you use a boolean variable, you must configure a string of text that is only inserted in place of a boolean variable when the variable is true.
#### `divider`, `{div}` and `{divs.*x*}`
The `{div}` variable will insert an arbitrary string as configured by the `divider` option, intended, of course, to be used as a divider between other components. Similar to tags, `{divs.*x*}` will insert a divider only when the specified variable is present. For example, `{divs.item.}title` will only insert a divider when `{item.title} is present. This makes sure a filename will look like, for example, either `./20191101 - abc123 - Hello world!.jpeg` when a title is available or `./20191101 - abc123.jpg` when no title is available, instead of `20191101 - abc123 - .jpeg` when a title is not available.
##### `dateFormat`
Affects the representation of `$postDate`, `$albumDate` and `$itemDate` and defaults to `YYYYMMDD`. See [this documentation](https://date-fns.org/v1.29.0/docs/format) for an overview of all available tokens.
Affects the representation of `{item.date}`, `{album.date}` and `{post.date}` and defaults to `YYYYMMDD`. See [this documentation](https://date-fns.org/v1.29.0/docs/format) for an overview of all available tokens.
##### `titleLength`
Titles can sometimes be longer than you prefer your filenames to be, or even overflow the operating system's limit (255 bytes for Linux). This property cuts off titles at a fixed number of characters.
##### `indexOffset`
Arrays start at 0, but as to not tire myself out debating the matter, you may offset it my any numerical value you like. Affects the `$itemIndex` variable for album items.
Arrays start at 0, but as to not tire myself out debating the matter, you may offset it my any numerical value you like. Affects the `{item.index}` variable for album items.
##### `slashSubstitute`
The patterns represent Unix file paths, and a `/` therefore indicates a new directory. You may freely use directories in your paths, but titles or descriptions may contain a `/` that is not supposed to create a new directory. All instances of `/` in a variable value will be replaced with the configured slash substitute.
The patterns represent Unix file paths, and a `/` therefore indicates a new directory. You may freely use directories in your patterns, but titles or descriptions may contain a `/` that is not supposed to create a new directory. All instances of `/` in a variable value will be replaced with the configured slash substitute.
##### `album.extractSingleItem`
Some albums contain only one image or video. By setting `album.extractSingleItem` to `true` (default), the item will be saved in accordance to the individual item patterns rather than the album patterns. An extracted item will inherit the title and description of the album if it has none of its own. Extracted items will have a truthy `$extracted` boolean variable.
##### `extractSingleAlbumItem`
Some albums contain only one image or video. By setting `extractSingleAlbumItem` to `true` (default), the item will be saved in accordance to the individual item patterns rather than the album patterns. An extracted item will inherit the title and description of the album if it has none of its own. Extracted items will have a truthy `{tags.extracted}` variable.

View File

@ -3,31 +3,31 @@
module.exports = {
library: {
base: {
posts: 'output/{user.name}/',
direct: 'output/{host.name}/',
posts: 'output/{user.name}/{label}/',
direct: 'output/{host.name}/{label}/',
},
posts: {
image: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}',
video: '{base.posts}{post.date} - {tags.preview}{item.id} - {post.title}{ext}',
text: '{base.posts}{post.date} - {tags.preview}{post.id} - {post.title}',
image: '{base.posts}{post.date}{div}{tag.preview}{item.id}{div}{post.title}{ext}',
video: '{base.posts}{post.date}{div}{tag.preview}{item.id}{div}{post.title}{ext}',
text: '{base.posts}{post.date}{div}{tag.preview}{post.id}{div}{post.title}',
album: {
image: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}',
video: '{base.posts}{post.date} - {tags.preview}{album.id} - {post.title}/{item.index} - {item.id}{ext}',
image: '{base.posts}{post.date}{div}{tag.preview}{album.id}{div}{post.title}/{item.index}{div}{item.id}{ext}',
video: '{base.posts}{post.date}{div}{tag.preview}{album.id}{div}{post.title}/{item.index}{div}{item.id}{ext}',
},
},
direct: {
image: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}',
video: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}{ext}',
text: '{base.direct}{item.date} - {tags.preview}{item.id} - {item.title}',
image: '{base.direct}{item.date}{div}{tag.preview}{item.id}{divs.item.title}{item.title}{ext}',
video: '{base.direct}{item.date}{div}{tag.preview}{item.id}{divs.item.title}{item.title}{ext}',
text: '{base.direct}{item.date}{div}{tag.preview}{item.id}{divs.item.title}{item.title}',
album: {
image: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}',
video: '{base.direct}{album.date} - {tags.preview}{album.id} - {album.title}/{item.index} - {item.id}{ext}',
image: '{base.direct}{album.date}{div}{tag.preview}{album.id}{divs.album.title}{album.title}/{item.index}{div}{item.id}{ext}',
video: '{base.direct}{album.date}{div}{tag.preview}{album.id}{divs.album.title}{album.title}/{item.index}{div}{item.id}{ext}',
},
},
extractSingleAlbumItem: true,
profile: {
image: '{base.posts}{user.created} - profile{ext}',
description: '{base.posts}{user.created} - profile ({tags.verified}{tags.verifiedEmail}{tags.gold}{tags.over18})',
image: '{base.posts}{user.created}{div}profile{ext}',
description: '{base.posts}{user.created}{div}profile ({tag.verified}{tag.verifiedEmail}{tag.gold}{tag.over18})',
avoidAvatar: true,
},
index: {
@ -50,6 +50,7 @@ module.exports = {
truncator: '...',
},
indexOffset: 1,
divider: ' - ',
slashSubstitute: '#',
},
fetch: {

776
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -37,6 +37,7 @@
"fluent-ffmpeg": "^2.1.2",
"fs-extra": "^5.0.0",
"js-yaml": "^3.12.0",
"jsdom": "^15.2.0",
"mime-types": "^2.1.18",
"node-cron": "^1.2.1",
"node-exiftool": "^2.3.0",
@ -46,7 +47,8 @@
"snoowrap": "^1.15.2",
"template-format": "^1.2.4",
"url-pattern": "^1.0.3",
"yargs": "^11.0.0"
"yargs": "^11.0.0",
"youtube-dl": "^2.1.0"
},
"devDependencies": {
"eslint": "^4.19.1",

View File

@ -50,7 +50,7 @@ async function getCompletePosts() {
}
if (!usernames.length && !postIds.length) {
throw new Error('Could not retrieve any posts. Did you supply --users, --posts, --file-users or --file-posts?');
return null;
}
if (usernames.length) {
@ -105,12 +105,14 @@ async function initApp() {
if (args.fetch || args.fileDirect) {
await getCompleteContents(ep);
return;
}
const userPosts = await getCompletePosts();
if (userPosts) {
await fetchSavePosts(userPosts, ep);
}
await ep.close();
if (args.watch) {

View File

@ -34,6 +34,14 @@ function getArgs() {
type: 'string',
alias: 'file-fetch',
})
.option('label', {
describe: 'Arbitrary variable made available in path patterns. Useful to organize files from a URL lists in directory.',
type: 'string',
})
.option('base', {
describe: 'Alternative base path, overriding both the default posts and direct base paths.',
type: 'string',
})
.option('limit', {
describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts',
type: 'number',

View File

@ -7,6 +7,8 @@ const dateFns = require('date-fns');
const mime = require('mime-types');
const format = require('template-format');
const args = require('./cli')();
function interpolate(pattern, item = null, content = null, host = null, post = null, user = null, strip = true, dateFormat = config.library.dateFormat) {
const data = {
tags: {},
@ -104,7 +106,7 @@ function interpolate(pattern, item = null, content = null, host = null, post = n
if (typeof value === 'string') {
return {
...acc,
[key]: value && value.toString().replace(/\//g, config.library.slashSubstitute),
[key]: value ? value.toString().replace(/\//g, config.library.slashSubstitute) : '',
};
}
@ -112,20 +114,45 @@ function interpolate(pattern, item = null, content = null, host = null, post = n
...acc,
[key]: Object.entries(value).reduce((subacc, [subkey, subvalue]) => ({
...subacc,
[subkey]: subvalue && subvalue.toString().replace(/\//g, config.library.slashSubstitute),
[subkey]: subvalue ? subvalue.toString().replace(/\//g, config.library.slashSubstitute) : '',
}), {}),
};
}, {})
: data;
const base = {
posts: format(config.library.base.posts, strippedData),
direct: format(config.library.base.direct, strippedData),
const dividers = Object.entries(data).reduce((acc, [key, value]) => {
if (typeof value === 'string') {
return {
...acc,
[key]: value ? config.library.divider : '',
};
}
return {
...acc,
[key]: Object.entries(value).reduce((subacc, [subkey, subvalue]) => ({
...subacc,
[subkey]: subvalue ? config.library.divider : '',
}), {}),
};
}, {});
if (args.label) {
Object.assign(strippedData, {
label: format(args.label, strippedData),
});
}
const interpolated = format(pattern, {
base,
...strippedData,
base: {
posts: format(args.base || config.library.base.posts, strippedData),
direct: format(args.base || config.library.base.direct, strippedData),
},
dividers,
divider: config.library.divider,
divs: dividers,
div: config.library.divider,
});
return interpolated;

View File

@ -1,38 +1,31 @@
'use strict';
const fetch = require('node-fetch');
const youtubedl = require('youtube-dl');
const dateFns = require('date-fns');
async function pornhub(host, post) {
const res = await fetch(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`);
if (res.status !== 200) {
throw new Error(`Could not fetch info PornHub video '${host.id}': '${res.error}'`);
async function pornhub(host) {
const data = await new Promise((resolve, reject) => {
youtubedl.getInfo(`https://www.pornhub.com/view_video.php?viewkey=${host.id}`, null, (error, info) => {
if (error) {
reject(error);
}
const html = await res.text();
const dataString = html.replace(/\s+/g, ' ').match(/var flashvars_.* = (.*); var player_mp4_seek/)[1];
const data = JSON.parse(dataString);
const url = data.mediaDefinitions.sort((sourceA, sourceB) => {
if (sourceA.quality < sourceB.quality) {
return 1;
}
if (sourceA.quality > sourceB.quality) {
return -1;
}
return 0;
})[0].videoUrl;
resolve(info);
});
});
return {
album: null,
items: [{
id: host.id,
url,
title: post ? post.title : null,
type: 'video/mp4',
datetime: post ? post.datetime : null,
}],
items: [
{
id: data.id,
url: data.url,
title: data.fulltitle || data.title,
type: `video/${data.ext}`,
datetime: dateFns.format(data.upload_date, 'YYYYMMDD'),
original: data,
},
],
};
}