Added virtual entity spawning for multi-page updates (i.e. Elegant Angel). Fixed ffmpeg error freezing process. Refactored Adult Empire/Elegant Angel scraper.
This commit is contained in:
parent
958c6d83fa
commit
bca677b0a8
|
@ -189,12 +189,8 @@ module.exports = {
|
|||
'hotcrazymess',
|
||||
'thatsitcomshow',
|
||||
],
|
||||
[
|
||||
// Adult DVD Empire
|
||||
'elegantangel',
|
||||
'westcoastproductions',
|
||||
],
|
||||
'21sextury',
|
||||
'adultempire',
|
||||
'julesjordan',
|
||||
'dorcelclub',
|
||||
'bang',
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
const config = require('config');
|
||||
|
||||
exports.up = async (knex) => {
|
||||
await knex.schema.alterTable('entities', (table) => {
|
||||
// internal options, as opposed to parameters for scraper options
|
||||
table.json('options');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.dropForeign('entity_id');
|
||||
|
||||
table.foreign('entity_id')
|
||||
.references('id')
|
||||
.inTable('entities')
|
||||
.onDelete('cascade');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('releases_caps', (table) => {
|
||||
table.unique(['release_id', 'media_id']);
|
||||
});
|
||||
|
||||
await knex.schema.createTable('movies_tags', (table) => {
|
||||
table.integer('tag_id')
|
||||
.references('id')
|
||||
.inTable('tags');
|
||||
|
||||
table.integer('movie_id')
|
||||
.notNullable()
|
||||
.references('id')
|
||||
.inTable('movies')
|
||||
.onDelete('cascade');
|
||||
|
||||
table.text('original_tag');
|
||||
|
||||
table.text('source')
|
||||
.defaultTo('scraper');
|
||||
|
||||
table.unique(['tag_id', 'movie_id']);
|
||||
});
|
||||
|
||||
await knex.raw('GRANT ALL ON ALL TABLES IN SCHEMA public TO :visitor;', {
|
||||
visitor: knex.raw(config.database.query.user),
|
||||
});
|
||||
};
|
||||
|
||||
exports.down = async (knex) => {
|
||||
await knex.schema.alterTable('entities', (table) => {
|
||||
table.dropColumn('options');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('releases', (table) => {
|
||||
table.dropForeign('entity_id');
|
||||
|
||||
table.foreign('entity_id')
|
||||
.references('id')
|
||||
.inTable('entities')
|
||||
.onDelete('no action');
|
||||
});
|
||||
|
||||
await knex.schema.alterTable('releases_caps', (table) => {
|
||||
table.dropUnique(['release_id', 'media_id']);
|
||||
});
|
||||
|
||||
await knex.schema.dropTable('movies_tags');
|
||||
};
|
|
@ -47,7 +47,7 @@
|
|||
"express-session": "^1.17.3",
|
||||
"face-api.js": "^0.22.2",
|
||||
"file-type": "^18.7.0",
|
||||
"fluent-ffmpeg": "^2.1.2",
|
||||
"fluent-ffmpeg": "^2.1.3",
|
||||
"fs-extra": "^11.1.1",
|
||||
"graphile-build": "^4.14.0",
|
||||
"graphile-utils": "^4.14.0",
|
||||
|
@ -88,7 +88,7 @@
|
|||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.11.5",
|
||||
"unprint": "^0.11.8",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
|
@ -9851,17 +9851,22 @@
|
|||
"integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ=="
|
||||
},
|
||||
"node_modules/fluent-ffmpeg": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz",
|
||||
"integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==",
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.3.tgz",
|
||||
"integrity": "sha512-Be3narBNt2s6bsaqP6Jzq91heDgOEaDCJAXcE3qcma/EJBSy5FB4cvO31XBInuAuKBx8Kptf8dkhjK0IOru39Q==",
|
||||
"dependencies": {
|
||||
"async": ">=0.2.9",
|
||||
"async": "^0.2.9",
|
||||
"which": "^1.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8.0"
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/fluent-ffmpeg/node_modules/async": {
|
||||
"version": "0.2.10",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-0.2.10.tgz",
|
||||
"integrity": "sha512-eAkdoKxU6/LkKDBzLpT+t6Ff5EtfSF4wx1WfJiPEEV7WNLnDaRXk0oVysiEPm262roaachGexwUv94WhSgN5TQ=="
|
||||
},
|
||||
"node_modules/fluent-ffmpeg/node_modules/which": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz",
|
||||
|
@ -18293,9 +18298,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/unprint": {
|
||||
"version": "0.11.5",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.5.tgz",
|
||||
"integrity": "sha512-tLhiFGeSU40GN12625+9oqmNGDFSToMPME60pB+DSGT9wd9fJM0L/lyZMQeNFmWMSThwa/id/FHAOnN7cE1aOw==",
|
||||
"version": "0.11.8",
|
||||
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.8.tgz",
|
||||
"integrity": "sha512-UCtfdbbHSNS/F0hlFwMa+ZmUqkVdp7V3SZVJjcMNnb0GUKm/7VWjhdvzHe+dIejhRdJykHfXWkI/BCbKwl51Vg==",
|
||||
"dependencies": {
|
||||
"axios": "^0.27.2",
|
||||
"bottleneck": "^2.19.5",
|
||||
|
|
|
@ -106,7 +106,7 @@
|
|||
"express-session": "^1.17.3",
|
||||
"face-api.js": "^0.22.2",
|
||||
"file-type": "^18.7.0",
|
||||
"fluent-ffmpeg": "^2.1.2",
|
||||
"fluent-ffmpeg": "^2.1.3",
|
||||
"fs-extra": "^11.1.1",
|
||||
"graphile-build": "^4.14.0",
|
||||
"graphile-utils": "^4.14.0",
|
||||
|
@ -147,7 +147,7 @@
|
|||
"tunnel": "0.0.6",
|
||||
"ua-parser-js": "^1.0.37",
|
||||
"undici": "^5.28.1",
|
||||
"unprint": "^0.11.5",
|
||||
"unprint": "^0.11.8",
|
||||
"url-pattern": "^1.0.3",
|
||||
"v-tooltip": "^2.1.3",
|
||||
"video.js": "^8.6.1",
|
||||
|
|
|
@ -1251,6 +1251,10 @@ const tags = [
|
|||
name: 'voodoo',
|
||||
slug: 'voodoo',
|
||||
},
|
||||
{
|
||||
name: 'bikini',
|
||||
slug: 'bikini',
|
||||
},
|
||||
];
|
||||
|
||||
const aliases = [
|
||||
|
@ -2545,6 +2549,30 @@ const aliases = [
|
|||
name: 'parasites',
|
||||
for: 'parasite',
|
||||
},
|
||||
{
|
||||
name: 'threesome - fmm',
|
||||
for: 'mfm',
|
||||
},
|
||||
{
|
||||
name: '4k ultra hd',
|
||||
for: '4k',
|
||||
},
|
||||
{
|
||||
name: 'sex toy play',
|
||||
for: 'toys',
|
||||
},
|
||||
{
|
||||
name: 'cumshots',
|
||||
for: 'cumshot',
|
||||
},
|
||||
{
|
||||
name: 'bikini babes',
|
||||
for: 'bikini',
|
||||
},
|
||||
{
|
||||
name: 'threesomes',
|
||||
for: 'threesome',
|
||||
},
|
||||
];
|
||||
|
||||
const priorities = [ // higher index is higher priority
|
||||
|
|
|
@ -104,6 +104,12 @@ const networks = [
|
|||
},
|
||||
parent: '21sextury',
|
||||
},
|
||||
{
|
||||
slug: 'adultempire',
|
||||
name: 'Adult Empire',
|
||||
url: 'https://www.adultempire.com',
|
||||
type: 'info',
|
||||
},
|
||||
{
|
||||
slug: 'adulttime',
|
||||
name: 'Adult Time',
|
||||
|
|
|
@ -3270,6 +3270,15 @@ const sites = [
|
|||
slug: 'elegantangel',
|
||||
name: 'Elegant Angel',
|
||||
url: 'https://www.elegantangel.com',
|
||||
options: {
|
||||
spawn: [
|
||||
{
|
||||
parameters: {
|
||||
latest: 'https://www.elegantangel.com/watch-exclusive-elegant-angel-scenes.html',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
// EVIL ANGEL
|
||||
{
|
||||
|
@ -13478,7 +13487,6 @@ const sites = [
|
|||
tags: ['black-cock'],
|
||||
parameters: {
|
||||
studio: false,
|
||||
layout: 'grid',
|
||||
},
|
||||
},
|
||||
// WHALE MEMBER
|
||||
|
@ -13713,27 +13721,36 @@ exports.seed = (knex) => Promise.resolve()
|
|||
.then(async () => {
|
||||
await Promise.all(sites.map(async (channel) => {
|
||||
if (channel.rename) {
|
||||
return knex('entities')
|
||||
await knex('entities')
|
||||
.where({
|
||||
type: channel.type || 'channel',
|
||||
slug: channel.rename,
|
||||
})
|
||||
.update('slug', channel.slug);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
return null;
|
||||
if (channel.delete) {
|
||||
await knex('entities')
|
||||
.where({
|
||||
type: channel.type || 'channel',
|
||||
slug: channel.slug,
|
||||
})
|
||||
.delete();
|
||||
}
|
||||
}).filter(Boolean));
|
||||
|
||||
const networks = await knex('entities')
|
||||
.where('type', 'network')
|
||||
.orWhereNull('parent_id');
|
||||
|
||||
const networksMap = networks.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {});
|
||||
const networksMap = networks.filter((network) => !network.delete).reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {});
|
||||
|
||||
const tags = await knex('tags').select('*').whereNull('alias_for');
|
||||
const tagsMap = tags.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {});
|
||||
|
||||
const sitesWithNetworks = sites.map((site) => ({
|
||||
const sitesWithNetworks = sites.filter((site) => !site.delete).map((site) => ({
|
||||
slug: site.slug,
|
||||
name: site.name,
|
||||
type: site.type || 'channel',
|
||||
|
@ -13741,6 +13758,7 @@ exports.seed = (knex) => Promise.resolve()
|
|||
description: site.description,
|
||||
url: site.url,
|
||||
parameters: site.parameters,
|
||||
options: site.options,
|
||||
parent_id: networksMap[site.parent],
|
||||
priority: site.priority || 0,
|
||||
independent: !!site.independent,
|
||||
|
|
|
@ -410,7 +410,7 @@ async function curateProfile(profile, actor) {
|
|||
curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null;
|
||||
curatedProfile.hairType = profile.hairType?.trim() || null;
|
||||
curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.toLowerCase().replace('hair', '').trim()] || null;
|
||||
curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null;
|
||||
curatedProfile.eyes = eyeColors[profile.eyes?.replace(/eyes?/i).trim().toLowerCase()] || null;
|
||||
|
||||
curatedProfile.tattoos = profile.tattoos?.trim() || null;
|
||||
curatedProfile.piercings = profile.piercings?.trim() || null;
|
||||
|
@ -878,7 +878,7 @@ async function scrapeActors(argNames) {
|
|||
const entitySlugs = sources.flat();
|
||||
|
||||
const [entitiesBySlug, existingActorEntries] = await Promise.all([
|
||||
fetchEntitiesBySlug(entitySlugs, 'desc'),
|
||||
fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'] }),
|
||||
knex('actors')
|
||||
.select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity'))
|
||||
.whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug))
|
||||
|
|
|
@ -84,7 +84,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc
|
|||
}
|
||||
|
||||
if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) {
|
||||
if (scraper.useUnprint || scraper.scrapeScene?.unprint || scraper.scrapeMovie?.unprint) {
|
||||
if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) {
|
||||
return fetchUnprintScene(scraper, url, entity, baseRelease, options, type);
|
||||
}
|
||||
|
||||
|
|
|
@ -55,7 +55,8 @@ function curateEntity(entity, includeParameters = false) {
|
|||
}
|
||||
|
||||
if (includeParameters) {
|
||||
curatedEntity.parameters = entity.parameters;
|
||||
curatedEntity.options = entity.options; // global internal options
|
||||
curatedEntity.parameters = entity.parameters; // scraper-specific parameters
|
||||
}
|
||||
|
||||
if (entity.children) {
|
||||
|
@ -66,10 +67,25 @@ function curateEntity(entity, includeParameters = false) {
|
|||
}
|
||||
|
||||
if (entity.included_children) {
|
||||
curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({
|
||||
...child,
|
||||
parent: curatedEntity.id ? curatedEntity : null,
|
||||
}, includeParameters));
|
||||
curatedEntity.includedChildren = entity.included_children.flatMap((child) => {
|
||||
const curatedChild = curateEntity({
|
||||
...child,
|
||||
parent: curatedEntity.id ? curatedEntity : null,
|
||||
}, includeParameters);
|
||||
|
||||
// allow entities to 'spawn' virtual copies of themselves, this is useful for sites that use two separate update pages (i.e. Elegant Angel)
|
||||
if (child.options?.spawn) {
|
||||
return [
|
||||
curatedChild,
|
||||
...child.options.spawn.map((spawnEntity) => ({
|
||||
...curatedChild,
|
||||
...spawnEntity,
|
||||
})),
|
||||
];
|
||||
}
|
||||
|
||||
return curatedChild;
|
||||
});
|
||||
}
|
||||
|
||||
const scraper = resolveScraper(curatedEntity);
|
||||
|
@ -199,7 +215,7 @@ async function fetchIncludedEntities() {
|
|||
return curatedNetworks;
|
||||
}
|
||||
|
||||
async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
|
||||
async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' }) {
|
||||
const entities = await knex.raw(`
|
||||
WITH RECURSIVE entity_tree as (
|
||||
SELECT to_jsonb(entities) as entity,
|
||||
|
@ -208,7 +224,7 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
|
|||
FROM entities
|
||||
WHERE (slug = ANY(:entitySlugs)
|
||||
OR url ILIKE ANY(:entityHosts))
|
||||
AND type IN ('channel', 'network')
|
||||
AND type = ANY(:entityTypes)
|
||||
|
||||
UNION ALL
|
||||
|
||||
|
@ -236,7 +252,8 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') {
|
|||
`, {
|
||||
entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')),
|
||||
entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`),
|
||||
sort: knex.raw(prefer === 'channel' ? 'asc' : 'desc'),
|
||||
entityTypes: options.types || ['channel', 'network'],
|
||||
sort: knex.raw(options.prefer === 'channel' ? 'asc' : 'desc'),
|
||||
});
|
||||
|
||||
// channel entity will overwrite network entity
|
||||
|
@ -263,7 +280,7 @@ async function fetchReleaseEntities(baseReleases) {
|
|||
.filter(Boolean),
|
||||
));
|
||||
|
||||
return fetchEntitiesBySlug(entitySlugs, argv.prefer || 'network');
|
||||
return fetchEntitiesBySlug(entitySlugs, { prefer: argv.prefer || 'network' });
|
||||
}
|
||||
|
||||
async function fetchEntity(entityId, type) {
|
||||
|
|
|
@ -648,7 +648,12 @@ streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStr
|
|||
.format('mp4')
|
||||
.outputOptions(['-movflags frag_keyframe+empty_moov'])
|
||||
.on('start', (cmd) => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`))
|
||||
.on('error', (error) => logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`))
|
||||
.on('error', (error) => {
|
||||
logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`);
|
||||
|
||||
hashStream.end();
|
||||
tempFileTarget.end();
|
||||
})
|
||||
.pipe();
|
||||
|
||||
// await pipeline(video, hashStream, tempFileTarget);
|
||||
|
|
|
@ -1,97 +1,61 @@
|
|||
'use strict';
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const http = require('../utils/http');
|
||||
const slugify = require('../utils/slugify');
|
||||
const { feetInchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
async function getPhotos(entryId, channel) {
|
||||
const res = await http.get(`${channel.url}/Membership/GetScreenshots?sceneID=scene_${entryId}`);
|
||||
|
||||
if (res.ok) {
|
||||
return res.body.split(/[\s,]+/).filter(Boolean);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function scrapeAllTour(scenes, channel) {
|
||||
function scrapeAll(scenes, channel, _options) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
release.url = query.url('a.scene-title, a.scene-img', { origin: channel.url });
|
||||
release.entryId = query.attribute('article[data-scene-id]', 'data-scene-id') || new URL(release.url).pathname.match(/^\/(\d+)/)?.[1];
|
||||
|
||||
release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, '');
|
||||
release.title = query.content('.scene-title')?.trim();
|
||||
release.duration = query.duration('.scene-length');
|
||||
|
||||
release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY');
|
||||
release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map((actor) => actor.trim());
|
||||
release.actors = query.content('.scene-performer-names')?.split(/[,&]/).map((actor) => actor.trim());
|
||||
|
||||
const poster = query.img('.scene-img-wrapper img');
|
||||
release.poster = [
|
||||
poster.replace(/\/res\/\d+/, '/res/1920'),
|
||||
poster.replace(/\/res\/\d+/, '/res/1600'),
|
||||
poster,
|
||||
];
|
||||
release.poster = query.sourceSet('.screenshot', 'data-srcset');
|
||||
|
||||
release.trailer = { src: query.video('.scene-img-wrapper source') };
|
||||
const sceneId = query.attribute('article[data-scene-id]', 'data-scene-id');
|
||||
const masterId = query.attribute('article[data-master-id]', 'data-master-id');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeAllGrid(scenes, channel, options) {
|
||||
return Promise.all(scenes.map(async ({ query, el }) => {
|
||||
const release = {};
|
||||
const uri = query.url('.grid-item-title') || query.url('a.animated-screen');
|
||||
|
||||
release.entryId = el.id.match(/\d+/)?.[0] || uri.match(/^(\d+)\//)?.[1];
|
||||
|
||||
release.title = query.cnt('.grid-item-title');
|
||||
release.url = qu.prefixUrl(uri, channel.url);
|
||||
|
||||
release.poster = query.img('.screenshot');
|
||||
|
||||
if (options.includePhotos) {
|
||||
release.photos = await getPhotos(release.entryId, channel);
|
||||
if (sceneId && masterId) {
|
||||
release.teaser = `https://video.adultempire.com/hls/previewscene/${masterId}/${sceneId}/index-f1-v1.m3u8`;
|
||||
}
|
||||
|
||||
return release;
|
||||
}));
|
||||
}
|
||||
|
||||
function scrapeMovieScenes(scenes) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.title = query.cnt('.scene-title a');
|
||||
release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.duration = query.number('.scene-length') * 60;
|
||||
release.actors = query.cnts('.scene-cast-list a');
|
||||
|
||||
release.poster = query.img('a img');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeRelease({ query, html }, url, channel, baseRelease, options) {
|
||||
const photoRegex = /(\/\w\/\d+\/)\d+/;
|
||||
|
||||
async function scrapeRelease({ query, html, element }, { url, entity, baseRelease, parameters }) {
|
||||
const release = {};
|
||||
const type = query.exists('.scene-list-header') ? 'movie' : 'scene';
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.scene-page .description, .video-page .description');
|
||||
const title = query.content('.scene-page .description, .video-page .description');
|
||||
|
||||
if (/^scene \d+$/i.test(title)) {
|
||||
release.sceneIndex = unprint.extractNumber(title);
|
||||
} else {
|
||||
release.title = title;
|
||||
}
|
||||
|
||||
release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/);
|
||||
release.duration = query.duration('.release-date:last-child');
|
||||
|
||||
release.actors = query.all('.video-performer').map((el) => {
|
||||
const avatar = qu.query.img(el, 'img', 'data-bgsrc');
|
||||
const avatar = unprint.query.img(el, 'img', 'data-bgsrc');
|
||||
|
||||
return {
|
||||
name: qu.query.cnt(el, 'span'),
|
||||
url: qu.query.url(el, 'a', 'href', { origin: channel.url }),
|
||||
name: unprint.query.content(el, 'span').trim(),
|
||||
url: unprint.query.url(el, 'a', { origin: entity.url }),
|
||||
avatar: [
|
||||
avatar.replace(/\/actor\/\d+/, '/actor/1600'),
|
||||
avatar,
|
||||
|
@ -99,8 +63,8 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options
|
|||
};
|
||||
});
|
||||
|
||||
release.tags = query.cnts('.tags a, .categories a');
|
||||
release.studio = options?.parameters.studio === false ? null : slugify(query.cnt('.studio span:last-child'), '');
|
||||
release.tags = query.contents('.tags a, .categories a');
|
||||
release.studio = parameters?.studio === false ? null : slugify(query.content('.studio span:last-child, .studio a'), '');
|
||||
|
||||
if (type === 'scene') {
|
||||
release.director = query.text('.director');
|
||||
|
@ -109,87 +73,44 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options
|
|||
}
|
||||
|
||||
if (type === 'movie') {
|
||||
release.director = query.cnt('.director a');
|
||||
release.covers = query.imgs('.carousel-item > img');
|
||||
release.director = query.content('.director a');
|
||||
release.covers = [query.sourceSet('.carousel-item .boxcover-image', 'data-srcset')];
|
||||
|
||||
release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel);
|
||||
release.scenes = scrapeAll(unprint.initAll(element, '#scenes .grid-item'), entity);
|
||||
}
|
||||
|
||||
if (query.exists('.video-title .movie-title')) {
|
||||
release.movie = {
|
||||
title: query.cnt('#viewLargeBoxcover .modal-title a'),
|
||||
url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: channel.url }),
|
||||
title: query.content('#viewLargeBoxcover .modal-title a'),
|
||||
url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: entity.url }),
|
||||
entryId: query.url('#viewLargeBoxcover .modal-title a')?.match(/(\d+)\//)[1],
|
||||
covers: query.imgs('#viewLargeBoxcover #viewLargeBoxcoverCarousel .carousel-item > img'),
|
||||
};
|
||||
}
|
||||
|
||||
release.photos = query.imgs('#dv_frames a > img').map((photo) => [
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`),
|
||||
photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`),
|
||||
release.caps = query.imgs('#dv_frames a > img', { attribute: 'data-src' }).map((photo) => [
|
||||
photo.replace(photoRegex, (match, path) => `${path}1920`),
|
||||
photo.replace(photoRegex, (match, path) => `${path}1280`),
|
||||
photo,
|
||||
]);
|
||||
|
||||
const trailerId = html.match(/item: (\d+),/)?.[1];
|
||||
|
||||
if (trailerId) {
|
||||
const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`;
|
||||
const trailerRes = await qu.get(trailerUrl);
|
||||
release.trailer = `https://trailer.adultempire.com/hls/trailer/${trailerId}/master.m3u8`;
|
||||
}
|
||||
|
||||
if (trailerRes.ok) {
|
||||
const stream = trailerRes.item.query.video();
|
||||
|
||||
release.trailer = { stream };
|
||||
}
|
||||
if (query.exists('.user-actions .btn-4k')) {
|
||||
release.qualities = [2160];
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeMovies(movies, channel) {
|
||||
return movies.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.boxcover', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('span');
|
||||
|
||||
const cover = query.img('picture img');
|
||||
|
||||
release.covers = [
|
||||
// filename is ignored, back-cover has suffix after media ID
|
||||
cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'),
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeActorScenes(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('a', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1];
|
||||
|
||||
release.title = query.cnt('.grid-item-title');
|
||||
|
||||
const poster = query.img('a img');
|
||||
release.poster = [
|
||||
poster.replace(/\/\d+\//, '/1600/'),
|
||||
poster,
|
||||
];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeProfile({ query }, url, channel, include) {
|
||||
async function scrapeProfile({ query }) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.cnts('.performer-page-header li').reduce((acc, info) => {
|
||||
const bio = query.contents('#profileModal .well li').reduce((acc, info) => {
|
||||
const [key, value] = info.split(':');
|
||||
|
||||
return {
|
||||
|
@ -198,11 +119,14 @@ async function scrapeProfile({ query }, url, channel, include) {
|
|||
};
|
||||
}, {});
|
||||
|
||||
const measurements = bio.meas?.match(/(\d+)(\w+)-(\d+)-(\d+)/);
|
||||
const bioText = query.content('#profileModal .well');
|
||||
|
||||
if (measurements) {
|
||||
[profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1);
|
||||
}
|
||||
profile.description = query.content('#profileModal .modal-body')
|
||||
.slice(bioText.length)
|
||||
.replace(/Biography Text ©Adult DVD Empire/i, '')
|
||||
.trim();
|
||||
|
||||
profile.measurements = bio.measurements?.replace(/["\s]+/g, '');
|
||||
|
||||
profile.hair = bio.hair;
|
||||
profile.eyes = bio.eyes;
|
||||
|
@ -211,79 +135,41 @@ async function scrapeProfile({ query }, url, channel, include) {
|
|||
profile.height = feetInchesToCm(bio.height);
|
||||
profile.weight = lbsToKg(bio.weight);
|
||||
|
||||
profile.avatar = query.img('picture img');
|
||||
const avatar = query.img('picture img, .performer-image-container img');
|
||||
|
||||
if (include) {
|
||||
const actorId = new URL(url).pathname.match(/\/(\d+)/)[1];
|
||||
const res = await qu.getAll(`${channel.url}/www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
profile.releases = scrapeActorScenes(res.items, channel);
|
||||
}
|
||||
if (avatar) {
|
||||
profile.avatar = [
|
||||
avatar
|
||||
.replace('_bust', '_body')
|
||||
.replace(/\/actor\/\d+\//i, '/actor/1000/'),
|
||||
avatar,
|
||||
];
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatestTour(channel, page = 1) {
|
||||
const url = `${channel.url}/tour?page=${page}`;
|
||||
const res = await qu.getAll(url, '.scene-update', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
async function fetchLatest(channel, page, options) {
|
||||
// const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item');
|
||||
const res = await unprint.get(options.parameters?.latest
|
||||
? `${options.parameters.latest}?page=${page}&view=grid`
|
||||
: `${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&view=grid`, { selectAll: '.item-grid-scene .grid-item' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllTour(res.items, channel);
|
||||
return scrapeAll(res.context, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchLatestGrid(channel, page, options) {
|
||||
const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAllGrid(res.items, channel, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovie(url, channel, baseRelease, options) {
|
||||
const res = await qu.get(url, null, null, {
|
||||
// invalid certificate
|
||||
async function fetchProfilePage(actorUrl) {
|
||||
const res = await unprint.get(actorUrl, {
|
||||
select: '#content',
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeRelease(res.item, url, channel, baseRelease, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchMovies(channel, page = 1) {
|
||||
const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, {
|
||||
// invalid certificate
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeMovies(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfilePage(actorUrl, channel, include) {
|
||||
const res = await qu.get(actorUrl, '.performer-page', null, {
|
||||
rejectUnauthorized: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, actorUrl, channel, include);
|
||||
return scrapeProfile(res.context);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
@ -298,13 +184,15 @@ async function fetchProfile(baseActor, channel, include) {
|
|||
}
|
||||
}
|
||||
|
||||
const searchRes = await http.get(`${channel.url}/search/SearchAutoComplete_Agg_ByMedia?rows=9&name_startsWith=${slugify(baseActor.name, '+')}`);
|
||||
const searchRes = await http.get(`https://www.adultempire.com/search/SearchAutoComplete_Agg_EmpireDTRank?search_type=Pornstars&rows=9&name_startsWith=${slugify(baseActor.name, '+')}`);
|
||||
|
||||
if (searchRes.ok) {
|
||||
if (searchRes.ok && searchRes.body.Results) {
|
||||
const actorResult = searchRes.body.Results.find((result) => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description));
|
||||
|
||||
if (actorResult) {
|
||||
return fetchProfilePage(`${channel.url}${actorResult.BasicResponseGroup.id}`, channel, include);
|
||||
const url = `https://www.adultempire.com/${actorResult.BasicResponseGroup.id}`;
|
||||
|
||||
return fetchProfilePage(url);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -314,16 +202,15 @@ async function fetchProfile(baseActor, channel, include) {
|
|||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestTour,
|
||||
fetchMovies,
|
||||
fetchMovie,
|
||||
fetchLatest,
|
||||
// fetchMovies,
|
||||
fetchProfile,
|
||||
scrapeScene: scrapeRelease,
|
||||
scrapeMovie: scrapeRelease,
|
||||
grid: {
|
||||
fetchLatest: fetchLatestGrid,
|
||||
scrapeScene: scrapeRelease,
|
||||
fetchMovie,
|
||||
fetchProfile,
|
||||
scrapeScene: {
|
||||
scraper: scrapeRelease,
|
||||
unprint: true,
|
||||
},
|
||||
scrapeMovie: {
|
||||
scraper: scrapeRelease,
|
||||
unprint: true,
|
||||
},
|
||||
};
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
const scrapers = require('./scrapers');
|
||||
|
||||
function resolveScraper(entity) {
|
||||
if (entity.parameters?.useScraper && scrapers.releases[entity.parameters.useScraper]) {
|
||||
return scrapers.releases[entity.parameters.useScraper];
|
||||
}
|
||||
|
||||
if (scrapers.releases[entity.slug]) {
|
||||
return scrapers.releases[entity.slug];
|
||||
}
|
||||
|
|
|
@ -177,6 +177,7 @@ const scrapers = {
|
|||
actors: {
|
||||
'18vr': badoink,
|
||||
'21sextury': gamma,
|
||||
adultempire,
|
||||
allanal: mikeadriano,
|
||||
amateureuro: porndoe,
|
||||
americanpornstar,
|
||||
|
@ -217,7 +218,6 @@ const scrapers = {
|
|||
dorcelclub: dorcel,
|
||||
doubleviewcasting: firstanalquest,
|
||||
dtfsluts: fullpornnetwork,
|
||||
elegantangel: adultempire,
|
||||
evilangel: gamma,
|
||||
exploitedcollegegirls: elevatedx,
|
||||
eyeontheguy: hush,
|
||||
|
@ -323,7 +323,6 @@ const scrapers = {
|
|||
vixen,
|
||||
vrcosplayx: badoink,
|
||||
wankzvr,
|
||||
westcoastproductions: adultempire,
|
||||
wicked: gamma,
|
||||
wildoncam: cherrypimps,
|
||||
xempire: gamma,
|
||||
|
|
|
@ -288,23 +288,28 @@ async function associateMovieScenes(movies, movieScenes) {
|
|||
},
|
||||
}), {});
|
||||
|
||||
const associations = movieScenes.map((scene) => {
|
||||
if (!scene.movie) {
|
||||
const associations = movieScenes
|
||||
.toSorted((sceneA, sceneB) => {
|
||||
return (sceneA.sceneIndex || 1) - (sceneB.sceneIndex || 1);
|
||||
})
|
||||
.map((scene) => {
|
||||
if (!scene.movie) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId]
|
||||
|| moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId];
|
||||
|
||||
if (sceneMovie?.id) {
|
||||
return {
|
||||
movie_id: sceneMovie.id,
|
||||
scene_id: scene.id,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId]
|
||||
|| moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId];
|
||||
|
||||
if (sceneMovie?.id) {
|
||||
return {
|
||||
movie_id: sceneMovie.id,
|
||||
scene_id: scene.id,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}).filter(Boolean);
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
await bulkInsert('movies_scenes', associations, false);
|
||||
}
|
||||
|
@ -354,6 +359,7 @@ async function storeMovies(movies, useBatchId) {
|
|||
|
||||
await updateMovieSearch(moviesWithId.map((movie) => movie.id));
|
||||
await associateReleaseMedia(moviesWithId, 'movie');
|
||||
await associateReleaseTags(moviesWithId, 'movie');
|
||||
|
||||
return moviesWithId;
|
||||
}
|
||||
|
|
|
@ -298,6 +298,8 @@ async function scrapeNetworkParallel(networkEntity) {
|
|||
async function fetchUpdates() {
|
||||
const includedNetworks = await fetchIncludedEntities();
|
||||
|
||||
// console.log(includedNetworks[0]);
|
||||
|
||||
const scrapedNetworks = await Promise.map(
|
||||
includedNetworks,
|
||||
async (networkEntity) => (networkEntity.parameters?.sequential
|
||||
|
|
Loading…
Reference in New Issue