Added m3u8 stream support to media module. Added Elegant Angel. Added regex parameter to qu's number method. Various tags.

This commit is contained in:
DebaucheryLibrarian
2020-07-17 03:39:13 +02:00
parent 66d6322c1d
commit a88c2f0760
27 changed files with 222 additions and 223 deletions

View File

@@ -134,7 +134,7 @@ function toBaseActors(actorsOrNames, release) {
});
}
function curateActor(actor, withDetails = false) {
function curateActor(actor, withDetails = false, isProfile = false) {
if (!actor) {
return null;
}
@@ -174,7 +174,7 @@ function curateActor(actor, withDetails = false) {
hasPiercings: actor.has_piercings,
tattoos: actor.tattoos,
piercings: actor.piercings,
description: actor.description,
...(isProfile && { description: actor.description }),
placeOfBirth: actor.birth_country && {
country: {
alpha2: actor.birth_country.alpha2,
@@ -201,6 +201,7 @@ function curateActor(actor, withDetails = false) {
size: actor.avatar.size,
source: actor.avatar.source,
},
...(actor.profiles && { profiles: actor.profiles?.map(profile => curateActor(profile, true, true)) }),
}),
};
@@ -504,12 +505,14 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
try {
const scraper = scrapers[scraperSlug];
const entity = entitiesBySlug[scraperSlug] || null;
const context = {
...entitiesBySlug[scraperSlug],
...entity,
// legacy
site: entitiesBySlug[scraperSlug] || null,
network: entitiesBySlug[scraperSlug] || null,
entity: entitiesBySlug[scraperSlug] || null,
site: entity,
network: entity?.parent,
entity,
scraper: scraperSlug,
};
@@ -547,7 +550,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
return await curateProfile({
...actor,
...profile,
...context,
entity,
update: existingProfile?.id || false,
});
} catch (error) {
@@ -715,7 +718,8 @@ async function fetchActor(actorId) {
row_to_json(actor_alias) as alias,
row_to_json(birth_country) as birth_country,
row_to_json(residence_country) as residence_country,
row_to_json(media) as avatar
row_to_json(media) as avatar,
json_agg(actors_profiles) as profiles
`))
.modify((queryBuilder) => {
if (Number.isNaN(Number(actorId))) {
@@ -726,10 +730,12 @@ async function fetchActor(actorId) {
queryBuilder.where('actors.id', actorId);
})
.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for')
.leftJoin('actors_profiles', 'actors.id', 'actors_profiles.actor_id')
.leftJoin('entities', 'entities.id', 'actors.entity_id')
.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2')
.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2')
.leftJoin('media', 'media.id', 'actors.avatar_media_id')
.groupBy('actors.id', 'entities.id', 'actor_alias.id', 'birth_country.alpha2', 'residence_country.alpha2', 'media.id')
.first();
return curateActor(actor, true);

View File

@@ -10,7 +10,7 @@ const stream = require('stream');
const nanoid = require('nanoid/non-secure');
const mime = require('mime');
// const fileType = require('file-type');
const youtubeDl = require('youtube-dl');
const ffmpeg = require('fluent-ffmpeg');
const sharp = require('sharp');
const blake2 = require('blake2');
@@ -418,22 +418,21 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
};
}
async function fetchStreamSource(source, tempFileTarget, hashStream) {
const video = youtubeDl(source.stream);
async function fetchStreamSource(source, tempFileTarget, tempFilePath, hashStream) {
const meta = { mimetype: 'video/mp4' };
video.on('info', (info) => {
console.log(info);
logger.verbose(`Starting fetching stream from ${source.stream}`);
});
video.on('end', (info) => {
console.log(info);
logger.verbose(`Finished fetching stream from ${source.stream}`);
});
const video = ffmpeg(source.stream)
.format('mp4')
.outputOptions(['-movflags frag_keyframe+empty_moov'])
.on('start', cmd => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`))
.on('error', error => logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`))
.pipe();
await pipeline(video, hashStream, tempFileTarget);
return { mimetype: null };
logger.verbose(`Finished fetching stream from ${source.stream}`);
return meta;
}
async function fetchSource(source, baseMedia) {
@@ -457,7 +456,7 @@ async function fetchSource(source, baseMedia) {
});
const { mimetype } = source.stream
? await fetchStreamSource(source, tempFileTarget, hashStream)
? await fetchStreamSource(source, tempFileTarget, tempFilePath, hashStream)
: await fetchHttpSource(source, tempFileTarget, hashStream);
hasher.end();

View File

@@ -7,8 +7,6 @@ function curateRelease(release, withMedia = false) {
return null;
}
const network = release.site_network || release.network;
return {
id: release.id,
entryId: release.entry_id,
@@ -18,22 +16,22 @@ function curateRelease(release, withMedia = false) {
date: release.date,
description: release.description,
duration: release.duration,
site: release.site && {
id: release.site.id,
name: release.site.name,
slug: release.site.slug,
},
network: network && {
id: network.id,
name: network.name,
slug: network.slug,
entity: release.entity && {
id: release.entity.id,
name: release.entity.name,
slug: release.entity.slug,
parent: release.entity.parent && {
id: release.entity.parent.id,
name: release.entity.parent.name,
slug: release.entity.parent.slug,
},
},
actors: (release.actors || []).map(actor => ({
id: actor.id,
name: actor.name,
slug: actor.slug,
gender: actor.gender,
networkId: actor.network_id,
entityId: actor.entity_id,
aliasFor: actor.alias_for,
})),
tags: (release.tags || []).map(tag => ({
@@ -67,23 +65,21 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
queryBuilder
.select(knex.raw(`
releases.id, releases.entry_id, releases.shoot_id, releases.title, releases.url, releases.date, releases.description, releases.duration, releases.created_at,
row_to_json(sites) as site,
row_to_json(networks) as network,
row_to_json(site_networks) as site_network,
row_to_json(entities) as entity,
row_to_json(parents) as parent,
COALESCE(json_agg(DISTINCT actors) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
COALESCE(json_agg(DISTINCT tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
`))
.where('type', type)
.leftJoin('sites', 'sites.id', 'releases.site_id')
.leftJoin('networks', 'networks.id', 'releases.network_id')
.leftJoin('networks as site_networks', 'site_networks.id', 'sites.network_id')
.where('releases.type', type)
.leftJoin('entities', 'entities.id', 'releases.entity_id')
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
.leftJoin('releases_actors', 'releases_actors.release_id', 'releases.id')
.leftJoin('actors', 'actors.id', 'releases_actors.actor_id')
.leftJoin('releases_tags', 'releases_tags.release_id', 'releases.id')
.leftJoin('tags', 'tags.id', 'releases_tags.tag_id')
.groupBy(knex.raw(`
releases.id, releases.entry_id, releases.shoot_id, releases.title, releases.url, releases.date, releases.description, releases.duration, releases.created_at,
sites.id, networks.id, site_networks.id
entities.id, parents.id
`));
if (withMedia) {

View File

@@ -0,0 +1,136 @@
'use strict';
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, '');
release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY');
release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map(actor => actor.trim());
const poster = query.img('.scene-img-wrapper img');
release.poster = [
poster.replace(/\/res\/\d+/, '/res/1920'),
poster.replace(/\/res\/\d+/, '/res/1600'),
poster,
];
release.trailer = { src: query.video('.scene-img-wrapper source') };
return release;
});
}
async function scrapeScene({ query, html }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
release.title = query.cnt('.scene-page .description');
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
release.duration = query.number('.release-date:last-child') * 60;
release.actors = query.all('.video-performer').map((el) => {
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
return {
name: qu.query.cnt(el, 'span'),
avatar: [
avatar.replace(/\/actor\/\d+/, '/actor/1600'),
avatar,
],
};
});
release.tags = query.cnts('.tags a');
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
release.photos = query.imgs('#dv_frames a > img').map(photo => [
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`),
photo,
]);
const trailerId = html.match(/item: (\d+),/)?.[1];
if (trailerId) {
const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`;
const trailerRes = await qu.get(trailerUrl);
if (trailerRes.ok) {
const stream = trailerRes.item.query.video();
release.trailer = { stream };
}
}
// console.log(release);
return release;
}
function scrapeProfile({ query, el }, actorName, entity, include) {
const profile = {};
profile.description = query.cnt('.bio-text');
profile.birthPlace = query.cnt('.birth-place span');
profile.avatar = query.img('.actor-photo img');
if (include.releases) {
return scrapeAll(qu.initAll(el, '.scene'));
}
console.log(profile);
return profile;
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/tour?page=${page}`;
const res = await qu.getAll(url, '.scene-update', null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, null, null, {
// invalid certificate
rejectUnauthorized: false,
});
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile(actorName, entity, include) {
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
const res = await qu.get(url);
if (res.ok) {
return scrapeProfile(res.item, actorName, entity, include);
}
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -3,13 +3,12 @@
const qu = require('../utils/q');
const slugify = require('../utils/slugify');
function scrapeAll(scenes, site) {
function scrapeAll(scenes) {
return scenes.map(({ query }) => {
const release = {};
const pathname = query.url('.title a');
release.entryId = pathname.match(/\/scene\/(\d+)/)[1];
release.url = `${site.url}${pathname}`;
release.url = query.url('.title a');
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
release.title = query.cnt('.title a');
release.description = query.cnt('.description');
@@ -17,7 +16,8 @@ function scrapeAll(scenes, site) {
release.date = query.date('.date', 'MMM DD, YYYY');
release.actors = query.cnts('.models a.model');
release.poster = query.q('img.poster');
release.poster = query.img('img.poster');
release.teaser = { src: query.video('.teaser video') };
release.stars = query.number('.rating');
release.likes = query.number('.likes');
@@ -27,16 +27,16 @@ function scrapeAll(scenes, site) {
});
}
function scrapeScene({ query }) {
function scrapeScene({ query }, url) {
const release = {};
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
release.title = query.cnt('h3.title');
release.description = query.cnt('p.description');
[release.poster, ...release.photos] = query.imgs('.preview-thumb');
const trailer = query.video('.trailer video');
release.trailer = { src: trailer };
release.trailer = { src: query.video('.trailer video') };
console.log(release);
return release;
@@ -73,7 +73,7 @@ async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, channel);
return scrapeScene(res.item, url, channel);
}
return res.status;

View File

@@ -140,10 +140,18 @@ function styles(context, selector, styleAttr) {
return elStyles;
}
function number(context, selector, attr = true) {
function number(context, selector, match = /\d+/, attr = 'textContent') {
const value = q(context, selector, attr);
return value ? Number(value) : null;
if (value && match) {
return Number(value.match(match)?.[0]);
}
if (value) {
return Number(value);
}
return null;
}
function meta(context, selector, attrArg = 'content', applyTrim = true) {
@@ -280,6 +288,8 @@ const quFuncs = {
date,
dur: duration,
duration,
element: q,
el: q,
exists,
image,
images,