From 401a4e21530caee3193d7edb4cbffffadb28f332 Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Fri, 1 Nov 2019 05:55:55 +0100 Subject: [PATCH] Completed Erome module. Added content URL list fetching. --- README.md | 2 ++ src/app.js | 46 ++++++++++++++---------------- src/cli.js | 5 ++++ src/dissectLink.js | 4 +++ src/methods/erome.js | 66 +++++++++++++++++++++++++++++++++++++------- 5 files changed, 87 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 9ad4be8..942bcfa 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Most features are optional and can easily be disabled! * Reddit text/self, images and videos[\*](#reddit-videos) * Imgur (requires API key as of late 2019) * Gfycat +* Erome +* Vidble * Eroshare archive ### Plans and ideas diff --git a/src/app.js b/src/app.js index 465f5cd..e44faf4 100644 --- a/src/app.js +++ b/src/app.js @@ -70,33 +70,27 @@ async function getCompletePosts() { return attachContentInfo(curatedUserPosts); } -async function getDirectContent(link, ep) { - const host = dissectLink(link); - const info = await getInfo(host); +async function getDirectContent(links, ep) { + return Promise.map(links, async (link) => { + const host = dissectLink(link); + const info = await getInfo(host); - console.log(info); + return fetchSaveDirectContent(info, host, ep); + }, { + concurrency: 5, + }); +} - return fetchSaveDirectContent(info, host, ep); +async function getCompleteContents(ep) { + if (args.fetch) { + return getDirectContent([args.fetch], ep); + } - /* - console.log(info); + if (args.fileDirect) { + return getDirectContent(await getFileContents(args.fileDirect, 'direct'), ep); + } - await Promise.all(info.items.map(async (item, index) => { - const stream = await fetchItem(item.url); - - if (info.album) { - const filepath = `./output/${host.label}/${host.id} - ${info.album.title}/${index + 1} - ${item.id}${path.extname(url.parse(item.url).pathname)}`; - console.log(filepath); - - return saveItem(filepath, stream, item); - } - - const filepath = `./output/${host.label}/${item.id}${path.extname(url.parse(item.url).pathname)}`; - console.log(filepath); - - return saveItem(filepath, stream, item); - })); - */ + return null; } function fetchSavePosts(userPosts, ep) { @@ -107,15 +101,15 @@ function fetchSavePosts(userPosts, ep) { async function initApp() { try { const ep = new exiftool.ExiftoolProcess(exiftoolBin); + await ep.open(); - if (args.fetch) { - await getDirectContent(args.fetch, ep); + if (args.fetch || args.fileDirect) { + await getCompleteContents(ep); return; } const userPosts = await getCompletePosts(); - await ep.open(); await fetchSavePosts(userPosts, ep); await ep.close(); diff --git a/src/cli.js b/src/cli.js index 46a449e..7a1bf4b 100644 --- a/src/cli.js +++ b/src/cli.js @@ -29,6 +29,11 @@ function getArgs() { type: 'string', alias: 'fetch', }) + .option('file-direct', { + describe: 'Load direct content URLs from file', + type: 'string', + alias: 'file-fetch', + }) .option('limit', { describe: 'Maximum amount of posts to fetch per supplied user (!), after filtering out ignored, cross- and reposts', type: 'number', diff --git a/src/dissectLink.js b/src/dissectLink.js index 27d6a8f..2f19030 100644 --- a/src/dissectLink.js +++ b/src/dissectLink.js @@ -42,6 +42,10 @@ const hosts = [{ method: 'gfycat', label: 'gfycat', pattern: new UrlPattern('http(s)\\://(:server.)gfycat.com/(gifs/detail/)(:id-mobile)(:id-size_restricted)(:id)(.:ext)(?*)'), +}, { + method: 'erome', + label: 'erome', + pattern: new UrlPattern('http(s)\\://(www.)erome.com/a/:id(?*)'), }, { method: 'eroshareAlbum', label: 'eroshare', diff --git a/src/methods/erome.js b/src/methods/erome.js index 3c1c7e6..a544052 100644 --- a/src/methods/erome.js +++ b/src/methods/erome.js @@ -1,30 +1,76 @@ 'use strict'; +const config = require('config'); const fetch = require('node-fetch'); const cheerio = require('cheerio'); const base = 'https://www.erome.com/'; -async function erome(host) { - const res = await fetch(`${base}a/${host.id}`); +async function erome(host, post) { + const url = `${base}a/${host.id}`; + const res = await fetch(url); - if (res.ok) { + if (!res.ok) { throw new Error(`Unable to retrieve info for Erome album '${host.id}' :(`); } const html = await res.text(); - const $ = cheerio.load(html); - const videoUrls = $('video').toArray().map((videoEl) => { - const sourceHd = $(videoEl).find('source[label="HD"]'); - const sourceSd = $(videoEl).find('source[label="SD"]'); - console.log(sourceHd.attr('src')); + const title = $('meta[property="og:title"]').attr('content') || $('meta[property="twitter:title"]').attr('content'); - return sourceHd ? base + sourceHd.attr('src') : base + sourceSd.attr('src'); + const items = $('.media-group').toArray().map((mediaItem) => { + const mediaElement = $(mediaItem); + const videoElement = mediaElement.find('.video video'); + + const id = mediaElement.attr('id'); + const itemTitle = mediaElement.find('h2.media-title').text(); + + if (videoElement.length) { + const sourceHd = videoElement.find('source[label="HD"]'); + const sourceSd = videoElement.find('source[label="SD"]'); + + return { + id, + title: itemTitle, + url: sourceHd.length ? sourceHd.attr('src') : sourceSd.attr('src'), + type: (sourceHd.length ? sourceHd.attr('type') : sourceSd.attr('type')) || 'video/mp4', + }; + } + + const img = mediaElement.find('.img-front').attr('data-src'); + + return { + id, + title: itemTitle, + url: img, + }; }); - console.log(videoUrls); + const extract = config.library.extractSingleAlbumItem && (items.length === 1); + + if (extract) { + console.log('\x1b[36m%s\x1b[0m', `Extracting single item from album '${url}' (${post ? post.url : 'no post'})`); + } + + return { + album: extract ? null : { + id: host.id, + url, + title, + description: null, + datetime: null, + }, + items: items.map(item => ({ + extracted: extract, + id: item.id, + url: item.url, + title: item.title || title || null, + description: null, + type: item.type || 'image/jpeg', + datetime: null, + })), + }; } module.exports = erome;