Added m3u8 stream support to media module. Added Elegant Angel. Added regex parameter to qu's number method. Various tags.
This commit is contained in:
@@ -134,7 +134,7 @@ function toBaseActors(actorsOrNames, release) {
|
||||
});
|
||||
}
|
||||
|
||||
function curateActor(actor, withDetails = false) {
|
||||
function curateActor(actor, withDetails = false, isProfile = false) {
|
||||
if (!actor) {
|
||||
return null;
|
||||
}
|
||||
@@ -174,7 +174,7 @@ function curateActor(actor, withDetails = false) {
|
||||
hasPiercings: actor.has_piercings,
|
||||
tattoos: actor.tattoos,
|
||||
piercings: actor.piercings,
|
||||
description: actor.description,
|
||||
...(isProfile && { description: actor.description }),
|
||||
placeOfBirth: actor.birth_country && {
|
||||
country: {
|
||||
alpha2: actor.birth_country.alpha2,
|
||||
@@ -201,6 +201,7 @@ function curateActor(actor, withDetails = false) {
|
||||
size: actor.avatar.size,
|
||||
source: actor.avatar.source,
|
||||
},
|
||||
...(actor.profiles && { profiles: actor.profiles?.map(profile => curateActor(profile, true, true)) }),
|
||||
}),
|
||||
};
|
||||
|
||||
@@ -504,12 +505,14 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
return await [].concat(source).reduce(async (outcome, scraperSlug) => outcome.catch(async () => {
|
||||
try {
|
||||
const scraper = scrapers[scraperSlug];
|
||||
const entity = entitiesBySlug[scraperSlug] || null;
|
||||
|
||||
const context = {
|
||||
...entitiesBySlug[scraperSlug],
|
||||
...entity,
|
||||
// legacy
|
||||
site: entitiesBySlug[scraperSlug] || null,
|
||||
network: entitiesBySlug[scraperSlug] || null,
|
||||
entity: entitiesBySlug[scraperSlug] || null,
|
||||
site: entity,
|
||||
network: entity?.parent,
|
||||
entity,
|
||||
scraper: scraperSlug,
|
||||
};
|
||||
|
||||
@@ -547,7 +550,7 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy
|
||||
return await curateProfile({
|
||||
...actor,
|
||||
...profile,
|
||||
...context,
|
||||
entity,
|
||||
update: existingProfile?.id || false,
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -715,7 +718,8 @@ async function fetchActor(actorId) {
|
||||
row_to_json(actor_alias) as alias,
|
||||
row_to_json(birth_country) as birth_country,
|
||||
row_to_json(residence_country) as residence_country,
|
||||
row_to_json(media) as avatar
|
||||
row_to_json(media) as avatar,
|
||||
json_agg(actors_profiles) as profiles
|
||||
`))
|
||||
.modify((queryBuilder) => {
|
||||
if (Number.isNaN(Number(actorId))) {
|
||||
@@ -726,10 +730,12 @@ async function fetchActor(actorId) {
|
||||
queryBuilder.where('actors.id', actorId);
|
||||
})
|
||||
.leftJoin('actors as actor_alias', 'actor_alias.id', 'actors.alias_for')
|
||||
.leftJoin('actors_profiles', 'actors.id', 'actors_profiles.actor_id')
|
||||
.leftJoin('entities', 'entities.id', 'actors.entity_id')
|
||||
.leftJoin('countries as birth_country', 'birth_country.alpha2', 'actors.birth_country_alpha2')
|
||||
.leftJoin('countries as residence_country', 'residence_country.alpha2', 'actors.residence_country_alpha2')
|
||||
.leftJoin('media', 'media.id', 'actors.avatar_media_id')
|
||||
.groupBy('actors.id', 'entities.id', 'actor_alias.id', 'birth_country.alpha2', 'residence_country.alpha2', 'media.id')
|
||||
.first();
|
||||
|
||||
return curateActor(actor, true);
|
||||
|
||||
27
src/media.js
27
src/media.js
@@ -10,7 +10,7 @@ const stream = require('stream');
|
||||
const nanoid = require('nanoid/non-secure');
|
||||
const mime = require('mime');
|
||||
// const fileType = require('file-type');
|
||||
const youtubeDl = require('youtube-dl');
|
||||
const ffmpeg = require('fluent-ffmpeg');
|
||||
const sharp = require('sharp');
|
||||
const blake2 = require('blake2');
|
||||
|
||||
@@ -418,22 +418,21 @@ async function fetchHttpSource(source, tempFileTarget, hashStream) {
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchStreamSource(source, tempFileTarget, hashStream) {
|
||||
const video = youtubeDl(source.stream);
|
||||
async function fetchStreamSource(source, tempFileTarget, tempFilePath, hashStream) {
|
||||
const meta = { mimetype: 'video/mp4' };
|
||||
|
||||
video.on('info', (info) => {
|
||||
console.log(info);
|
||||
logger.verbose(`Starting fetching stream from ${source.stream}`);
|
||||
});
|
||||
|
||||
video.on('end', (info) => {
|
||||
console.log(info);
|
||||
logger.verbose(`Finished fetching stream from ${source.stream}`);
|
||||
});
|
||||
const video = ffmpeg(source.stream)
|
||||
.format('mp4')
|
||||
.outputOptions(['-movflags frag_keyframe+empty_moov'])
|
||||
.on('start', cmd => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`))
|
||||
.on('error', error => logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`))
|
||||
.pipe();
|
||||
|
||||
await pipeline(video, hashStream, tempFileTarget);
|
||||
|
||||
return { mimetype: null };
|
||||
logger.verbose(`Finished fetching stream from ${source.stream}`);
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
async function fetchSource(source, baseMedia) {
|
||||
@@ -457,7 +456,7 @@ async function fetchSource(source, baseMedia) {
|
||||
});
|
||||
|
||||
const { mimetype } = source.stream
|
||||
? await fetchStreamSource(source, tempFileTarget, hashStream)
|
||||
? await fetchStreamSource(source, tempFileTarget, tempFilePath, hashStream)
|
||||
: await fetchHttpSource(source, tempFileTarget, hashStream);
|
||||
|
||||
hasher.end();
|
||||
|
||||
@@ -7,8 +7,6 @@ function curateRelease(release, withMedia = false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const network = release.site_network || release.network;
|
||||
|
||||
return {
|
||||
id: release.id,
|
||||
entryId: release.entry_id,
|
||||
@@ -18,22 +16,22 @@ function curateRelease(release, withMedia = false) {
|
||||
date: release.date,
|
||||
description: release.description,
|
||||
duration: release.duration,
|
||||
site: release.site && {
|
||||
id: release.site.id,
|
||||
name: release.site.name,
|
||||
slug: release.site.slug,
|
||||
},
|
||||
network: network && {
|
||||
id: network.id,
|
||||
name: network.name,
|
||||
slug: network.slug,
|
||||
entity: release.entity && {
|
||||
id: release.entity.id,
|
||||
name: release.entity.name,
|
||||
slug: release.entity.slug,
|
||||
parent: release.entity.parent && {
|
||||
id: release.entity.parent.id,
|
||||
name: release.entity.parent.name,
|
||||
slug: release.entity.parent.slug,
|
||||
},
|
||||
},
|
||||
actors: (release.actors || []).map(actor => ({
|
||||
id: actor.id,
|
||||
name: actor.name,
|
||||
slug: actor.slug,
|
||||
gender: actor.gender,
|
||||
networkId: actor.network_id,
|
||||
entityId: actor.entity_id,
|
||||
aliasFor: actor.alias_for,
|
||||
})),
|
||||
tags: (release.tags || []).map(tag => ({
|
||||
@@ -67,23 +65,21 @@ function withRelations(queryBuilder, withMedia = false, type = 'scene') {
|
||||
queryBuilder
|
||||
.select(knex.raw(`
|
||||
releases.id, releases.entry_id, releases.shoot_id, releases.title, releases.url, releases.date, releases.description, releases.duration, releases.created_at,
|
||||
row_to_json(sites) as site,
|
||||
row_to_json(networks) as network,
|
||||
row_to_json(site_networks) as site_network,
|
||||
row_to_json(entities) as entity,
|
||||
row_to_json(parents) as parent,
|
||||
COALESCE(json_agg(DISTINCT actors) FILTER (WHERE actors.id IS NOT NULL), '[]') as actors,
|
||||
COALESCE(json_agg(DISTINCT tags) FILTER (WHERE tags.id IS NOT NULL), '[]') as tags
|
||||
`))
|
||||
.where('type', type)
|
||||
.leftJoin('sites', 'sites.id', 'releases.site_id')
|
||||
.leftJoin('networks', 'networks.id', 'releases.network_id')
|
||||
.leftJoin('networks as site_networks', 'site_networks.id', 'sites.network_id')
|
||||
.where('releases.type', type)
|
||||
.leftJoin('entities', 'entities.id', 'releases.entity_id')
|
||||
.leftJoin('entities as parents', 'parents.id', 'entities.parent_id')
|
||||
.leftJoin('releases_actors', 'releases_actors.release_id', 'releases.id')
|
||||
.leftJoin('actors', 'actors.id', 'releases_actors.actor_id')
|
||||
.leftJoin('releases_tags', 'releases_tags.release_id', 'releases.id')
|
||||
.leftJoin('tags', 'tags.id', 'releases_tags.tag_id')
|
||||
.groupBy(knex.raw(`
|
||||
releases.id, releases.entry_id, releases.shoot_id, releases.title, releases.url, releases.date, releases.description, releases.duration, releases.created_at,
|
||||
sites.id, networks.id, site_networks.id
|
||||
entities.id, parents.id
|
||||
`));
|
||||
|
||||
if (withMedia) {
|
||||
|
||||
136
src/scrapers/elegantangel.js
Normal file
136
src/scrapers/elegantangel.js
Normal file
@@ -0,0 +1,136 @@
|
||||
'use strict';
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, '');
|
||||
|
||||
release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY');
|
||||
release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map(actor => actor.trim());
|
||||
|
||||
const poster = query.img('.scene-img-wrapper img');
|
||||
release.poster = [
|
||||
poster.replace(/\/res\/\d+/, '/res/1920'),
|
||||
poster.replace(/\/res\/\d+/, '/res/1600'),
|
||||
poster,
|
||||
];
|
||||
|
||||
release.trailer = { src: query.video('.scene-img-wrapper source') };
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene({ query, html }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.scene-page .description');
|
||||
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
|
||||
release.duration = query.number('.release-date:last-child') * 60;
|
||||
|
||||
release.actors = query.all('.video-performer').map((el) => {
|
||||
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
|
||||
|
||||
return {
|
||||
name: qu.query.cnt(el, 'span'),
|
||||
avatar: [
|
||||
avatar.replace(/\/actor\/\d+/, '/actor/1600'),
|
||||
avatar,
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
release.tags = query.cnts('.tags a');
|
||||
release.poster = query.url('link[rel="image_src"]') || query.meta('property="og:image"');
|
||||
|
||||
release.photos = query.imgs('#dv_frames a > img').map(photo => [
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`),
|
||||
photo,
|
||||
]);
|
||||
|
||||
const trailerId = html.match(/item: (\d+),/)?.[1];
|
||||
|
||||
if (trailerId) {
|
||||
const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`;
|
||||
const trailerRes = await qu.get(trailerUrl);
|
||||
|
||||
if (trailerRes.ok) {
|
||||
const stream = trailerRes.item.query.video();
|
||||
|
||||
release.trailer = { stream };
|
||||
}
|
||||
}
|
||||
|
||||
// console.log(release);
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, actorName, entity, include) {
|
||||
const profile = {};
|
||||
|
||||
profile.description = query.cnt('.bio-text');
|
||||
profile.birthPlace = query.cnt('.birth-place span');
|
||||
|
||||
profile.avatar = query.img('.actor-photo img');
|
||||
|
||||
if (include.releases) {
|
||||
return scrapeAll(qu.initAll(el, '.scene'));
|
||||
}
|
||||
|
||||
console.log(profile);
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/tour?page=${page}`;
|
||||
const res = await qu.getAll(url, '.scene-update', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, entity, include) {
|
||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, actorName, entity, include);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
@@ -3,13 +3,12 @@
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
function scrapeAll(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
const pathname = query.url('.title a');
|
||||
|
||||
release.entryId = pathname.match(/\/scene\/(\d+)/)[1];
|
||||
release.url = `${site.url}${pathname}`;
|
||||
release.url = query.url('.title a');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.title a');
|
||||
release.description = query.cnt('.description');
|
||||
@@ -17,7 +16,8 @@ function scrapeAll(scenes, site) {
|
||||
release.date = query.date('.date', 'MMM DD, YYYY');
|
||||
release.actors = query.cnts('.models a.model');
|
||||
|
||||
release.poster = query.q('img.poster');
|
||||
release.poster = query.img('img.poster');
|
||||
release.teaser = { src: query.video('.teaser video') };
|
||||
|
||||
release.stars = query.number('.rating');
|
||||
release.likes = query.number('.likes');
|
||||
@@ -27,16 +27,16 @@ function scrapeAll(scenes, site) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }) {
|
||||
function scrapeScene({ query }, url) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/scene\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('h3.title');
|
||||
release.description = query.cnt('p.description');
|
||||
|
||||
[release.poster, ...release.photos] = query.imgs('.preview-thumb');
|
||||
|
||||
const trailer = query.video('.trailer video');
|
||||
release.trailer = { src: trailer };
|
||||
release.trailer = { src: query.video('.trailer video') };
|
||||
|
||||
console.log(release);
|
||||
return release;
|
||||
@@ -73,7 +73,7 @@ async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, channel);
|
||||
return scrapeScene(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
@@ -140,10 +140,18 @@ function styles(context, selector, styleAttr) {
|
||||
return elStyles;
|
||||
}
|
||||
|
||||
function number(context, selector, attr = true) {
|
||||
function number(context, selector, match = /\d+/, attr = 'textContent') {
|
||||
const value = q(context, selector, attr);
|
||||
|
||||
return value ? Number(value) : null;
|
||||
if (value && match) {
|
||||
return Number(value.match(match)?.[0]);
|
||||
}
|
||||
|
||||
if (value) {
|
||||
return Number(value);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function meta(context, selector, attrArg = 'content', applyTrim = true) {
|
||||
@@ -280,6 +288,8 @@ const quFuncs = {
|
||||
date,
|
||||
dur: duration,
|
||||
duration,
|
||||
element: q,
|
||||
el: q,
|
||||
exists,
|
||||
image,
|
||||
images,
|
||||
|
||||
Reference in New Issue
Block a user