Fixed pagination for Killergram, added pagination to actor profiles, added Killergram Platinum. Added experimental m3u8 stream support.

This commit is contained in:
ThePendulum 2020-07-10 02:01:23 +02:00
parent 067c4a62ff
commit 4c551cc15f
7 changed files with 280 additions and 68 deletions

179
package-lock.json generated
View File

@ -5713,12 +5713,28 @@
"minimalistic-assert": "^1.0.1"
}
},
"hashish": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/hashish/-/hashish-0.0.4.tgz",
"integrity": "sha1-bWC8b/r3Ebav1g5CbQd5iAFOZVQ=",
"requires": {
"traverse": ">=0.2.4"
}
},
"he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
"dev": true
},
"hh-mm-ss": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/hh-mm-ss/-/hh-mm-ss-1.2.0.tgz",
"integrity": "sha512-f4I9Hz1dLpX/3mrEs7yq30+FiuO3tt5NWAqAGeBTaoeoBfB8vhcQ3BphuDc5DjZb/K809agqrAaFlP0jhEU/8w==",
"requires": {
"zero-fill": "^2.2.3"
}
},
"hmac-drbg": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/hmac-drbg/-/hmac-drbg-1.0.1.tgz",
@ -5821,6 +5837,11 @@
"debug": "^3.1.0"
}
},
"human-signals": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/human-signals/-/human-signals-1.1.1.tgz",
"integrity": "sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw=="
},
"iconv-lite": {
"version": "0.5.1",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.5.1.tgz",
@ -6906,15 +6927,6 @@
}
}
},
"m3u8stream": {
"version": "0.7.1",
"resolved": "https://registry.npmjs.org/m3u8stream/-/m3u8stream-0.7.1.tgz",
"integrity": "sha512-z6ldnAdhbuWOL6LmMkwptSZGzj+qbRytMKLTbNicwF/bJMjf9U9lqD57RNQUFecvWadEkzy6PDjcNJFFgi19uQ==",
"requires": {
"miniget": "^1.6.1",
"sax": "^1.2.4"
}
},
"make-dir": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/make-dir/-/make-dir-2.1.0.tgz",
@ -7138,6 +7150,11 @@
}
}
},
"merge-stream": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
"integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="
},
"mersenne-twister": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/mersenne-twister/-/mersenne-twister-1.1.0.tgz",
@ -7219,11 +7236,6 @@
"webpack-sources": "^1.1.0"
}
},
"miniget": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/miniget/-/miniget-1.7.2.tgz",
"integrity": "sha512-USPNNK2bnHLOplX8BZVMehUkyQizS/DFpBdoH0TS+fM+hQoLNg9tWg4MeY9wE8gfY0pbzmx5UBEODujt3Lz8AA=="
},
"minimalistic-assert": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz",
@ -10431,6 +10443,14 @@
"mixme": "^0.3.1"
}
},
"streamify": {
"version": "0.2.9",
"resolved": "https://registry.npmjs.org/streamify/-/streamify-0.2.9.tgz",
"integrity": "sha512-8pUxeLEef9UO1FxtTt5iikAiyzGI4SZRnGuJ3sz8axZ5Xk+/7ezEV5kuJQsMEFxw7AKYw3xp0Ow+20mmSaJbQQ==",
"requires": {
"hashish": "~0.0.4"
}
},
"strict-uri-encode": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
@ -10509,6 +10529,11 @@
"integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=",
"dev": true
},
"strip-final-newline": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz",
"integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA=="
},
"strip-indent": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-1.0.1.tgz",
@ -10928,6 +10953,11 @@
"punycode": "^2.1.0"
}
},
"traverse": {
"version": "0.6.6",
"resolved": "https://registry.npmjs.org/traverse/-/traverse-0.6.6.tgz",
"integrity": "sha1-y99WD9e5r2MlAv7UD5GMFX6pcTc="
},
"trim-newlines": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/trim-newlines/-/trim-newlines-1.0.0.tgz",
@ -12062,6 +12092,127 @@
"camelcase": "^5.0.0",
"decamelize": "^1.2.0"
}
},
"youtube-dl": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/youtube-dl/-/youtube-dl-3.0.2.tgz",
"integrity": "sha512-LFFfpsYbRLpqKsnb4gzbnyN7fm190tJw3gJVSvfoEfnb/xYIPNT6i9G3jdzPDp/U5cwB3OSq63nUa7rUwxXAGA==",
"requires": {
"debug": "~4.1.1",
"execa": "~3.2.0",
"hh-mm-ss": "~1.2.0",
"mkdirp": "~0.5.1",
"request": "~2.88.0",
"streamify": "~0.2.9",
"universalify": "~0.1.2"
},
"dependencies": {
"cross-spawn": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
"requires": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
"which": "^2.0.1"
}
},
"debug": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz",
"integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==",
"requires": {
"ms": "^2.1.1"
}
},
"execa": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/execa/-/execa-3.2.0.tgz",
"integrity": "sha512-kJJfVbI/lZE1PZYDI5VPxp8zXPO9rtxOkhpZ0jMKha56AI9y2gGVC6bkukStQf0ka5Rh15BA5m7cCCH4jmHqkw==",
"requires": {
"cross-spawn": "^7.0.0",
"get-stream": "^5.0.0",
"human-signals": "^1.1.1",
"is-stream": "^2.0.0",
"merge-stream": "^2.0.0",
"npm-run-path": "^4.0.0",
"onetime": "^5.1.0",
"p-finally": "^2.0.0",
"signal-exit": "^3.0.2",
"strip-final-newline": "^2.0.0"
}
},
"get-stream": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.1.0.tgz",
"integrity": "sha512-EXr1FOzrzTfGeL0gQdeFEvOMm2mzMOglyiOXSTpPC+iAjAKftbr3jpCMWynogwYnM+eSj9sHGc6wjIcDvYiygw==",
"requires": {
"pump": "^3.0.0"
}
},
"is-stream": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.0.tgz",
"integrity": "sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw=="
},
"mimic-fn": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz",
"integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg=="
},
"npm-run-path": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz",
"integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==",
"requires": {
"path-key": "^3.0.0"
}
},
"onetime": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.0.tgz",
"integrity": "sha512-5NcSkPHhwTVFIQN+TUqXoS5+dlElHXdpAWu9I0HP20YOtIi+aZ0Ct82jdlILDxjLEAWwvm+qj1m6aEtsDVmm6Q==",
"requires": {
"mimic-fn": "^2.1.0"
}
},
"p-finally": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/p-finally/-/p-finally-2.0.1.tgz",
"integrity": "sha512-vpm09aKwq6H9phqRQzecoDpD8TmVyGw70qmWlyq5onxY7tqyTTFVvxMykxQSQKILBSFlbXpypIw2T1Ml7+DDtw=="
},
"path-key": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="
},
"shebang-command": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
"requires": {
"shebang-regex": "^3.0.0"
}
},
"shebang-regex": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="
},
"which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
"requires": {
"isexe": "^2.0.0"
}
}
}
},
"zero-fill": {
"version": "2.2.3",
"resolved": "https://registry.npmjs.org/zero-fill/-/zero-fill-2.2.3.tgz",
"integrity": "sha1-o97wa6XjmuZEhQu0yirUEStIVek="
}
}
}

View File

@ -98,7 +98,6 @@
"knex": "^0.20.10",
"knex-migrate": "^1.7.4",
"longjohn": "^0.2.12",
"m3u8stream": "^0.7.1",
"mime": "^2.4.4",
"moment": "^2.24.0",
"nanoid": "^2.1.11",
@ -125,6 +124,7 @@
"vuex": "^3.1.2",
"winston": "^3.2.1",
"winston-daily-rotate-file": "^4.4.2",
"yargs": "^13.3.0"
"yargs": "^13.3.0",
"youtube-dl": "^3.0.2"
}
}

View File

@ -2937,6 +2937,13 @@ const sites = [
hasLogo: false,
parent: 'killergram',
},
{
name: 'Killergram Platinum',
url: 'http://killergram.com/episodes.asp?page=episodes&ct=site&site=killergram%20platinum',
slug: 'killergramplatinum',
hasLogo: false,
parent: 'killergram',
},
{
name: 'Killergram Cuts',
url: 'http://killergram.com/episodes.asp?page=episodes&ct=site&site=killergram%20cuts',

View File

@ -1,14 +1,16 @@
'use strict';
const config = require('config');
const util = require('util');
const Promise = require('bluebird');
const fs = require('fs');
const fsPromises = require('fs').promises;
const path = require('path');
const { PassThrough } = require('stream');
const stream = require('stream');
const nanoid = require('nanoid/non-secure');
const mime = require('mime');
// const fileType = require('file-type');
const youtubeDl = require('youtube-dl');
const sharp = require('sharp');
const blake2 = require('blake2');
@ -18,6 +20,8 @@ const knex = require('./knex');
const http = require('./utils/http');
const { get } = require('./utils/qu');
const pipeline = util.promisify(stream.pipeline);
function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
// limit media sets, use extras as fallbacks
if (medias.length <= limit) {
@ -77,7 +81,7 @@ function itemsByKey(items, key) {
}
function toBaseSource(rawSource) {
if (rawSource && (rawSource.src || (rawSource.extract && rawSource.url))) {
if (rawSource && (rawSource.src || (rawSource.extract && rawSource.url) || rawSource.stream)) {
const baseSource = {};
if (rawSource.src) baseSource.src = rawSource.src;
@ -87,6 +91,11 @@ function toBaseSource(rawSource) {
if (rawSource.url) baseSource.url = rawSource.url;
if (rawSource.extract) baseSource.extract = rawSource.extract;
if (rawSource.stream) {
baseSource.src = rawSource.stream;
baseSource.stream = rawSource.stream;
}
if (rawSource.referer) baseSource.referer = rawSource.referer;
if (rawSource.host) baseSource.host = rawSource.host;
if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
@ -368,19 +377,57 @@ async function storeFile(media) {
}
}
async function fetchHttpSource(source, tempFileTarget, hashStream) {
const res = await http.get(source.src, {
...(source.referer && { referer: source.referer }),
...(source.host && { host: source.host }),
}, {
stream: true, // sources are fetched in parallel, don't gobble up memory
transforms: [hashStream],
destination: tempFileTarget,
queueMethod: source.queueMethod || null, // use http module's default
});
if (!res.ok) {
throw new Error(`Response ${res.status} not OK`);
}
return {
mimetype: res.headers['content-type'] || mime.getType(new URL(source.src).pathname),
};
}
async function fetchStreamSource(source, tempFileTarget, hashStream) {
const video = youtubeDl(source.stream);
video.on('info', (info) => {
console.log(info);
logger.verbose(`Starting fetching stream from ${source.stream}`);
});
video.on('end', (info) => {
console.log(info);
logger.verbose(`Finished fetching stream from ${source.stream}`);
});
await pipeline(video, hashStream, tempFileTarget);
return { mimetype: null };
}
async function fetchSource(source, baseMedia) {
logger.silly(`Fetching media from ${source.src}`);
// attempts
async function attempt(attempts = 1) {
const hasher = new blake2.Hash('blake2b', { digestLength: 24 });
hasher.setEncoding('hex');
try {
const tempFilePath = path.join(config.media.path, 'temp', `${baseMedia.id}`);
const hasher = new blake2.Hash('blake2b', { digestLength: 24 });
hasher.setEncoding('hex');
const tempFileTarget = fs.createWriteStream(tempFilePath);
const hashStream = new PassThrough();
const hashStream = new stream.PassThrough();
let size = 0;
hashStream.on('data', (chunk) => {
@ -388,28 +435,18 @@ async function fetchSource(source, baseMedia) {
hasher.write(chunk);
});
const res = await http.get(source.src, {
...(source.referer && { referer: source.referer }),
...(source.host && { host: source.host }),
}, {
stream: true, // sources are fetched in parallel, don't gobble up memory
transforms: [hashStream],
destination: tempFileTarget,
queueMethod: source.queueMethod || null, // use http module's default
});
console.log(source);
const { mimetype } = source.stream
? await fetchStreamSource(source, tempFileTarget, hashStream)
: await fetchHttpSource(source, tempFileTarget, hashStream);
hasher.end();
const hash = hasher.read();
const { pathname } = new URL(source.src);
const mimetype = res.headers['content-type'] || mime.getType(pathname);
const [type, subtype] = mimetype.split('/');
const extension = mime.getExtension(mimetype);
if (!res.ok) {
throw new Error(`Response ${res.status} not OK`);
}
return {
...source,
file: {
@ -425,6 +462,7 @@ async function fetchSource(source, baseMedia) {
},
};
} catch (error) {
hasher.end();
const maxAttempts = source.attempts || 3;
logger.warn(`Failed attempt ${attempts}/${maxAttempts} to fetch ${source.src}: ${error.message}`);

View File

@ -36,23 +36,51 @@ function scrapeScene({ query, html }, url) {
release.actors = query.all('.modelstarring a', true);
const duration = html.match(/(\d+) minutes/)?.[1];
const channelUrl = query.url('a[href*="ct=site"]');
if (duration) release.duration = Number(duration) * 60;
if (channelUrl) {
const siteName = new URL(`https://killergram.com/${channelUrl}`).searchParams.get('site');
release.channel = slugify(siteName, '');
}
[release.poster, ...release.photos] = query.imgs('img[src*="/models"]');
return release;
}
function scrapeProfile({ query }, actorName) {
async function fetchActorReleases({ query }, url, remainingPages, actorName, accReleases = []) {
const releases = scrapeAll({ query }).filter(release => release.actors.includes(actorName));
if (remainingPages.length > 0) {
const { origin, pathname, searchParams } = new URL(url);
searchParams.set('p', remainingPages[0]);
const nextPage = `${origin}${pathname}?${searchParams}`;
const res = await qu.get(nextPage, '#episodes > table');
if (res.ok) {
return fetchActorReleases(res.item, url, remainingPages.slice(1), actorName, accReleases.concat(releases));
}
}
return accReleases.concat(releases);
}
async function scrapeProfile({ query, window }, actorName, url, include) {
const profile = {};
profile.releases = scrapeAll({ query }).filter(release => release.actors.includes(actorName));
if (include.releases) {
const availablePages = query.all('.pageboxdropdown option', 'value');
profile.releases = await fetchActorReleases(qu.init(query.q('#episodes > table'), window), url, availablePages.slice(1), actorName);
}
return profile;
}
async function fetchLatest(channel, page = 1) {
const res = await qu.get(`${channel.url}&p=${page}`, '#episodes > table');
const res = await qu.get(`${channel.url}&p=${((page - 1) * 15) + 1}`, '#episodes > table');
return res.ok ? scrapeAll(res.item, channel) : res.status;
}
@ -63,11 +91,11 @@ async function fetchScene(url, channel) {
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
}
async function fetchProfile(actorName) {
async function fetchProfile(actorName, entity, include) {
const url = `http://killergram.com/episodes.asp?page=episodes&model=${encodeURI(actorName)}&ct=model`;
const res = await qu.get(url, '#episodes > table');
const res = await qu.get(url, '#content');
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
return res.ok ? scrapeProfile(res.item, actorName, url, include) : res.status;
}
module.exports = {

View File

@ -9,7 +9,7 @@ const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM)\d+/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;

View File

@ -1,10 +1,8 @@
'use strict';
const moment = require('moment');
// const fs = require('fs');
// const m3u8stream = require('m3u8stream');
// const logger = require('../logger')(__filename);
const logger = require('../logger')(__filename);
const http = require('../utils/http');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
@ -45,10 +43,11 @@ function scrapeAll(scenes, entity) {
});
}
async function scrapeScene({ query }, url, _entity) {
async function scrapeScene({ query }, url) {
const release = {};
const { pathname, origin, host } = new URL(url);
const entryId = new URL(url).pathname.match(/\/video\/(\d+)/)[1];
const entryId = pathname.match(/\/video\/(\d+)/)[1];
release.entryId = entryId;
const title = query.meta('name=title');
@ -74,30 +73,19 @@ async function scrapeScene({ query }, url, _entity) {
release.poster = query.q('.detail-hero').style['background-image'].match(/url\((.+)\)/)[1];
release.photos = query.imgs('.detail-grabs img');
/*
// example https://video.pictalk.com/5d13d6e5f7533152a61ee20e/2020-07-06/ab92b1e24d1c249e508bf5c73f047baf.m3u8
m3u8stream('https://video.pictalk.com/5d13d6e5f7533152a61ee20e/2020-07-06/ab92b1e24d1c249e508bf5c73f047baf.m3u8')
.pipe(fs.createWriteStream('./test.mp4'));
const streamData = await http.get(`${entity.url}/video/source/${entryId}`, {
host: new URL(entity.url).host,
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
host,
referer: url,
});
}, { queueMethod: '5s' });
if (streamData.ok && streamData.body.status === 'success') {
console.log(streamData.body);
await m3u8stream(streamData.body.link)
.pipe(fs.createWriteStream('./trailer.mp4'))
.on('progress', status => console.log(status))
.on('error', error => console.log(error));
release.trailer = {
stream: streamData.body.link,
};
} else {
logger.warn(`Failed to fetch trailer for ${url}: ${streamData.ok ? streamData.body.status : streamData.status }`);
}
if (streamData.body.status !== 'success') {
logger.warn(`Could not retrieve trailer from ${entity.name} (Teen Core Club): ${streamData.body.status}`);
}
*/
return release;
}