Fixed pagination for Killergram, added pagination to actor profiles, added Killergram Platinum. Added experimental m3u8 stream support.
This commit is contained in:
80
src/media.js
80
src/media.js
@@ -1,14 +1,16 @@
|
||||
'use strict';
|
||||
|
||||
const config = require('config');
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const fs = require('fs');
|
||||
const fsPromises = require('fs').promises;
|
||||
const path = require('path');
|
||||
const { PassThrough } = require('stream');
|
||||
const stream = require('stream');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
// const fileType = require('file-type');
|
||||
const youtubeDl = require('youtube-dl');
|
||||
const sharp = require('sharp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
@@ -18,6 +20,8 @@ const knex = require('./knex');
|
||||
const http = require('./utils/http');
|
||||
const { get } = require('./utils/qu');
|
||||
|
||||
const pipeline = util.promisify(stream.pipeline);
|
||||
|
||||
function sampleMedias(medias, limit = config.media.limit, preferLast = true) {
|
||||
// limit media sets, use extras as fallbacks
|
||||
if (medias.length <= limit) {
|
||||
@@ -77,7 +81,7 @@ function itemsByKey(items, key) {
|
||||
}
|
||||
|
||||
function toBaseSource(rawSource) {
|
||||
if (rawSource && (rawSource.src || (rawSource.extract && rawSource.url))) {
|
||||
if (rawSource && (rawSource.src || (rawSource.extract && rawSource.url) || rawSource.stream)) {
|
||||
const baseSource = {};
|
||||
|
||||
if (rawSource.src) baseSource.src = rawSource.src;
|
||||
@@ -87,6 +91,11 @@ function toBaseSource(rawSource) {
|
||||
if (rawSource.url) baseSource.url = rawSource.url;
|
||||
if (rawSource.extract) baseSource.extract = rawSource.extract;
|
||||
|
||||
if (rawSource.stream) {
|
||||
baseSource.src = rawSource.stream;
|
||||
baseSource.stream = rawSource.stream;
|
||||
}
|
||||
|
||||
if (rawSource.referer) baseSource.referer = rawSource.referer;
|
||||
if (rawSource.host) baseSource.host = rawSource.host;
|
||||
if (rawSource.attempts) baseSource.attempts = rawSource.attempts;
|
||||
@@ -368,19 +377,57 @@ async function storeFile(media) {
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
const res = await http.get(source.src, {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
}, {
|
||||
stream: true, // sources are fetched in parallel, don't gobble up memory
|
||||
transforms: [hashStream],
|
||||
destination: tempFileTarget,
|
||||
queueMethod: source.queueMethod || null, // use http module's default
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Response ${res.status} not OK`);
|
||||
}
|
||||
|
||||
return {
|
||||
mimetype: res.headers['content-type'] || mime.getType(new URL(source.src).pathname),
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchStreamSource(source, tempFileTarget, hashStream) {
|
||||
const video = youtubeDl(source.stream);
|
||||
|
||||
video.on('info', (info) => {
|
||||
console.log(info);
|
||||
logger.verbose(`Starting fetching stream from ${source.stream}`);
|
||||
});
|
||||
|
||||
video.on('end', (info) => {
|
||||
console.log(info);
|
||||
logger.verbose(`Finished fetching stream from ${source.stream}`);
|
||||
});
|
||||
|
||||
await pipeline(video, hashStream, tempFileTarget);
|
||||
|
||||
return { mimetype: null };
|
||||
}
|
||||
|
||||
async function fetchSource(source, baseMedia) {
|
||||
logger.silly(`Fetching media from ${source.src}`);
|
||||
// attempts
|
||||
|
||||
async function attempt(attempts = 1) {
|
||||
const hasher = new blake2.Hash('blake2b', { digestLength: 24 });
|
||||
hasher.setEncoding('hex');
|
||||
|
||||
try {
|
||||
const tempFilePath = path.join(config.media.path, 'temp', `${baseMedia.id}`);
|
||||
|
||||
const hasher = new blake2.Hash('blake2b', { digestLength: 24 });
|
||||
hasher.setEncoding('hex');
|
||||
|
||||
const tempFileTarget = fs.createWriteStream(tempFilePath);
|
||||
const hashStream = new PassThrough();
|
||||
const hashStream = new stream.PassThrough();
|
||||
let size = 0;
|
||||
|
||||
hashStream.on('data', (chunk) => {
|
||||
@@ -388,28 +435,18 @@ async function fetchSource(source, baseMedia) {
|
||||
hasher.write(chunk);
|
||||
});
|
||||
|
||||
const res = await http.get(source.src, {
|
||||
...(source.referer && { referer: source.referer }),
|
||||
...(source.host && { host: source.host }),
|
||||
}, {
|
||||
stream: true, // sources are fetched in parallel, don't gobble up memory
|
||||
transforms: [hashStream],
|
||||
destination: tempFileTarget,
|
||||
queueMethod: source.queueMethod || null, // use http module's default
|
||||
});
|
||||
console.log(source);
|
||||
|
||||
const { mimetype } = source.stream
|
||||
? await fetchStreamSource(source, tempFileTarget, hashStream)
|
||||
: await fetchHttpSource(source, tempFileTarget, hashStream);
|
||||
|
||||
hasher.end();
|
||||
|
||||
const hash = hasher.read();
|
||||
const { pathname } = new URL(source.src);
|
||||
const mimetype = res.headers['content-type'] || mime.getType(pathname);
|
||||
const [type, subtype] = mimetype.split('/');
|
||||
const extension = mime.getExtension(mimetype);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Response ${res.status} not OK`);
|
||||
}
|
||||
|
||||
return {
|
||||
...source,
|
||||
file: {
|
||||
@@ -425,6 +462,7 @@ async function fetchSource(source, baseMedia) {
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
hasher.end();
|
||||
const maxAttempts = source.attempts || 3;
|
||||
|
||||
logger.warn(`Failed attempt ${attempts}/${maxAttempts} to fetch ${source.src}: ${error.message}`);
|
||||
|
||||
@@ -36,23 +36,51 @@ function scrapeScene({ query, html }, url) {
|
||||
release.actors = query.all('.modelstarring a', true);
|
||||
|
||||
const duration = html.match(/(\d+) minutes/)?.[1];
|
||||
const channelUrl = query.url('a[href*="ct=site"]');
|
||||
|
||||
if (duration) release.duration = Number(duration) * 60;
|
||||
|
||||
if (channelUrl) {
|
||||
const siteName = new URL(`https://killergram.com/${channelUrl}`).searchParams.get('site');
|
||||
release.channel = slugify(siteName, '');
|
||||
}
|
||||
|
||||
[release.poster, ...release.photos] = query.imgs('img[src*="/models"]');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, actorName) {
|
||||
async function fetchActorReleases({ query }, url, remainingPages, actorName, accReleases = []) {
|
||||
const releases = scrapeAll({ query }).filter(release => release.actors.includes(actorName));
|
||||
|
||||
if (remainingPages.length > 0) {
|
||||
const { origin, pathname, searchParams } = new URL(url);
|
||||
searchParams.set('p', remainingPages[0]);
|
||||
|
||||
const nextPage = `${origin}${pathname}?${searchParams}`;
|
||||
const res = await qu.get(nextPage, '#episodes > table');
|
||||
|
||||
if (res.ok) {
|
||||
return fetchActorReleases(res.item, url, remainingPages.slice(1), actorName, accReleases.concat(releases));
|
||||
}
|
||||
}
|
||||
|
||||
return accReleases.concat(releases);
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query, window }, actorName, url, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.releases = scrapeAll({ query }).filter(release => release.actors.includes(actorName));
|
||||
if (include.releases) {
|
||||
const availablePages = query.all('.pageboxdropdown option', 'value');
|
||||
profile.releases = await fetchActorReleases(qu.init(query.q('#episodes > table'), window), url, availablePages.slice(1), actorName);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const res = await qu.get(`${channel.url}&p=${page}`, '#episodes > table');
|
||||
const res = await qu.get(`${channel.url}&p=${((page - 1) * 15) + 1}`, '#episodes > table');
|
||||
|
||||
return res.ok ? scrapeAll(res.item, channel) : res.status;
|
||||
}
|
||||
@@ -63,11 +91,11 @@ async function fetchScene(url, channel) {
|
||||
return res.ok ? scrapeScene(res.item, url, channel) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
async function fetchProfile(actorName, entity, include) {
|
||||
const url = `http://killergram.com/episodes.asp?page=episodes&model=${encodeURI(actorName)}&ct=model`;
|
||||
const res = await qu.get(url, '#episodes > table');
|
||||
const res = await qu.get(url, '#content');
|
||||
|
||||
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
|
||||
return res.ok ? scrapeProfile(res.item, actorName, url, include) : res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -9,7 +9,7 @@ const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV)\d+/); // detect studio prefixes
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM)\d+/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const moment = require('moment');
|
||||
// const fs = require('fs');
|
||||
// const m3u8stream = require('m3u8stream');
|
||||
|
||||
// const logger = require('../logger')(__filename);
|
||||
const logger = require('../logger')(__filename);
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
@@ -45,10 +43,11 @@ function scrapeAll(scenes, entity) {
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, url, _entity) {
|
||||
async function scrapeScene({ query }, url) {
|
||||
const release = {};
|
||||
const { pathname, origin, host } = new URL(url);
|
||||
|
||||
const entryId = new URL(url).pathname.match(/\/video\/(\d+)/)[1];
|
||||
const entryId = pathname.match(/\/video\/(\d+)/)[1];
|
||||
release.entryId = entryId;
|
||||
|
||||
const title = query.meta('name=title');
|
||||
@@ -74,30 +73,19 @@ async function scrapeScene({ query }, url, _entity) {
|
||||
release.poster = query.q('.detail-hero').style['background-image'].match(/url\((.+)\)/)[1];
|
||||
release.photos = query.imgs('.detail-grabs img');
|
||||
|
||||
/*
|
||||
// example https://video.pictalk.com/5d13d6e5f7533152a61ee20e/2020-07-06/ab92b1e24d1c249e508bf5c73f047baf.m3u8
|
||||
m3u8stream('https://video.pictalk.com/5d13d6e5f7533152a61ee20e/2020-07-06/ab92b1e24d1c249e508bf5c73f047baf.m3u8')
|
||||
.pipe(fs.createWriteStream('./test.mp4'));
|
||||
|
||||
const streamData = await http.get(`${entity.url}/video/source/${entryId}`, {
|
||||
host: new URL(entity.url).host,
|
||||
const streamData = await http.get(`${origin}/video/source/${entryId}`, {
|
||||
host,
|
||||
referer: url,
|
||||
});
|
||||
}, { queueMethod: '5s' });
|
||||
|
||||
if (streamData.ok && streamData.body.status === 'success') {
|
||||
console.log(streamData.body);
|
||||
|
||||
await m3u8stream(streamData.body.link)
|
||||
.pipe(fs.createWriteStream('./trailer.mp4'))
|
||||
.on('progress', status => console.log(status))
|
||||
.on('error', error => console.log(error));
|
||||
release.trailer = {
|
||||
stream: streamData.body.link,
|
||||
};
|
||||
} else {
|
||||
logger.warn(`Failed to fetch trailer for ${url}: ${streamData.ok ? streamData.body.status : streamData.status }`);
|
||||
}
|
||||
|
||||
if (streamData.body.status !== 'success') {
|
||||
logger.warn(`Could not retrieve trailer from ${entity.name} (Teen Core Club): ${streamData.body.status}`);
|
||||
}
|
||||
*/
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user