diff --git a/config/default.js b/config/default.js index 6309173b..46b49235 100755 --- a/config/default.js +++ b/config/default.js @@ -189,12 +189,8 @@ module.exports = { 'hotcrazymess', 'thatsitcomshow', ], - [ - // Adult DVD Empire - 'elegantangel', - 'westcoastproductions', - ], '21sextury', + 'adultempire', 'julesjordan', 'dorcelclub', 'bang', diff --git a/migrations/20240815013526_entities_options.js b/migrations/20240815013526_entities_options.js new file mode 100644 index 00000000..f4eb899a --- /dev/null +++ b/migrations/20240815013526_entities_options.js @@ -0,0 +1,65 @@ +const config = require('config'); + +exports.up = async (knex) => { + await knex.schema.alterTable('entities', (table) => { + // internal options, as opposed to parameters for scraper options + table.json('options'); + }); + + await knex.schema.alterTable('releases', (table) => { + table.dropForeign('entity_id'); + + table.foreign('entity_id') + .references('id') + .inTable('entities') + .onDelete('cascade'); + }); + + await knex.schema.alterTable('releases_caps', (table) => { + table.unique(['release_id', 'media_id']); + }); + + await knex.schema.createTable('movies_tags', (table) => { + table.integer('tag_id') + .references('id') + .inTable('tags'); + + table.integer('movie_id') + .notNullable() + .references('id') + .inTable('movies') + .onDelete('cascade'); + + table.text('original_tag'); + + table.text('source') + .defaultTo('scraper'); + + table.unique(['tag_id', 'movie_id']); + }); + + await knex.raw('GRANT ALL ON ALL TABLES IN SCHEMA public TO :visitor;', { + visitor: knex.raw(config.database.query.user), + }); +}; + +exports.down = async (knex) => { + await knex.schema.alterTable('entities', (table) => { + table.dropColumn('options'); + }); + + await knex.schema.alterTable('releases', (table) => { + table.dropForeign('entity_id'); + + table.foreign('entity_id') + .references('id') + .inTable('entities') + .onDelete('no action'); + }); + + await knex.schema.alterTable('releases_caps', (table) => { + table.dropUnique(['release_id', 'media_id']); + }); + + await knex.schema.dropTable('movies_tags'); +}; diff --git a/package-lock.json b/package-lock.json index 98a2b262..11906dc5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,7 +47,7 @@ "express-session": "^1.17.3", "face-api.js": "^0.22.2", "file-type": "^18.7.0", - "fluent-ffmpeg": "^2.1.2", + "fluent-ffmpeg": "^2.1.3", "fs-extra": "^11.1.1", "graphile-build": "^4.14.0", "graphile-utils": "^4.14.0", @@ -88,7 +88,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.11.5", + "unprint": "^0.11.8", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", @@ -9851,17 +9851,22 @@ "integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ==" }, "node_modules/fluent-ffmpeg": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz", - "integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.3.tgz", + "integrity": "sha512-Be3narBNt2s6bsaqP6Jzq91heDgOEaDCJAXcE3qcma/EJBSy5FB4cvO31XBInuAuKBx8Kptf8dkhjK0IOru39Q==", "dependencies": { - "async": ">=0.2.9", + "async": "^0.2.9", "which": "^1.1.1" }, "engines": { - "node": ">=0.8.0" + "node": ">=18" } }, + "node_modules/fluent-ffmpeg/node_modules/async": { + "version": "0.2.10", + "resolved": "https://registry.npmjs.org/async/-/async-0.2.10.tgz", + "integrity": "sha512-eAkdoKxU6/LkKDBzLpT+t6Ff5EtfSF4wx1WfJiPEEV7WNLnDaRXk0oVysiEPm262roaachGexwUv94WhSgN5TQ==" + }, "node_modules/fluent-ffmpeg/node_modules/which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", @@ -18293,9 +18298,9 @@ } }, "node_modules/unprint": { - "version": "0.11.5", - "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.5.tgz", - "integrity": "sha512-tLhiFGeSU40GN12625+9oqmNGDFSToMPME60pB+DSGT9wd9fJM0L/lyZMQeNFmWMSThwa/id/FHAOnN7cE1aOw==", + "version": "0.11.8", + "resolved": "https://registry.npmjs.org/unprint/-/unprint-0.11.8.tgz", + "integrity": "sha512-UCtfdbbHSNS/F0hlFwMa+ZmUqkVdp7V3SZVJjcMNnb0GUKm/7VWjhdvzHe+dIejhRdJykHfXWkI/BCbKwl51Vg==", "dependencies": { "axios": "^0.27.2", "bottleneck": "^2.19.5", diff --git a/package.json b/package.json index adec1c03..b4b0a417 100755 --- a/package.json +++ b/package.json @@ -106,7 +106,7 @@ "express-session": "^1.17.3", "face-api.js": "^0.22.2", "file-type": "^18.7.0", - "fluent-ffmpeg": "^2.1.2", + "fluent-ffmpeg": "^2.1.3", "fs-extra": "^11.1.1", "graphile-build": "^4.14.0", "graphile-utils": "^4.14.0", @@ -147,7 +147,7 @@ "tunnel": "0.0.6", "ua-parser-js": "^1.0.37", "undici": "^5.28.1", - "unprint": "^0.11.5", + "unprint": "^0.11.8", "url-pattern": "^1.0.3", "v-tooltip": "^2.1.3", "video.js": "^8.6.1", diff --git a/seeds/00_tags.js b/seeds/00_tags.js index 33d38f66..8fb673df 100755 --- a/seeds/00_tags.js +++ b/seeds/00_tags.js @@ -1251,6 +1251,10 @@ const tags = [ name: 'voodoo', slug: 'voodoo', }, + { + name: 'bikini', + slug: 'bikini', + }, ]; const aliases = [ @@ -2545,6 +2549,30 @@ const aliases = [ name: 'parasites', for: 'parasite', }, + { + name: 'threesome - fmm', + for: 'mfm', + }, + { + name: '4k ultra hd', + for: '4k', + }, + { + name: 'sex toy play', + for: 'toys', + }, + { + name: 'cumshots', + for: 'cumshot', + }, + { + name: 'bikini babes', + for: 'bikini', + }, + { + name: 'threesomes', + for: 'threesome', + }, ]; const priorities = [ // higher index is higher priority diff --git a/seeds/01_networks.js b/seeds/01_networks.js index 4556c0fa..f8102952 100755 --- a/seeds/01_networks.js +++ b/seeds/01_networks.js @@ -104,6 +104,12 @@ const networks = [ }, parent: '21sextury', }, + { + slug: 'adultempire', + name: 'Adult Empire', + url: 'https://www.adultempire.com', + type: 'info', + }, { slug: 'adulttime', name: 'Adult Time', diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 480b7436..06896f03 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -3270,6 +3270,15 @@ const sites = [ slug: 'elegantangel', name: 'Elegant Angel', url: 'https://www.elegantangel.com', + options: { + spawn: [ + { + parameters: { + latest: 'https://www.elegantangel.com/watch-exclusive-elegant-angel-scenes.html', + }, + }, + ], + }, }, // EVIL ANGEL { @@ -13478,7 +13487,6 @@ const sites = [ tags: ['black-cock'], parameters: { studio: false, - layout: 'grid', }, }, // WHALE MEMBER @@ -13713,27 +13721,36 @@ exports.seed = (knex) => Promise.resolve() .then(async () => { await Promise.all(sites.map(async (channel) => { if (channel.rename) { - return knex('entities') + await knex('entities') .where({ type: channel.type || 'channel', slug: channel.rename, }) .update('slug', channel.slug); + + return; } - return null; + if (channel.delete) { + await knex('entities') + .where({ + type: channel.type || 'channel', + slug: channel.slug, + }) + .delete(); + } }).filter(Boolean)); const networks = await knex('entities') .where('type', 'network') .orWhereNull('parent_id'); - const networksMap = networks.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); + const networksMap = networks.filter((network) => !network.delete).reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); const tags = await knex('tags').select('*').whereNull('alias_for'); const tagsMap = tags.reduce((acc, { id, slug }) => ({ ...acc, [slug]: id }), {}); - const sitesWithNetworks = sites.map((site) => ({ + const sitesWithNetworks = sites.filter((site) => !site.delete).map((site) => ({ slug: site.slug, name: site.name, type: site.type || 'channel', @@ -13741,6 +13758,7 @@ exports.seed = (knex) => Promise.resolve() description: site.description, url: site.url, parameters: site.parameters, + options: site.options, parent_id: networksMap[site.parent], priority: site.priority || 0, independent: !!site.independent, diff --git a/src/actors.js b/src/actors.js index b133d18e..13327fff 100755 --- a/src/actors.js +++ b/src/actors.js @@ -410,7 +410,7 @@ async function curateProfile(profile, actor) { curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null; curatedProfile.hairType = profile.hairType?.trim() || null; curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.toLowerCase().replace('hair', '').trim()] || null; - curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null; + curatedProfile.eyes = eyeColors[profile.eyes?.replace(/eyes?/i).trim().toLowerCase()] || null; curatedProfile.tattoos = profile.tattoos?.trim() || null; curatedProfile.piercings = profile.piercings?.trim() || null; @@ -878,7 +878,7 @@ async function scrapeActors(argNames) { const entitySlugs = sources.flat(); const [entitiesBySlug, existingActorEntries] = await Promise.all([ - fetchEntitiesBySlug(entitySlugs, 'desc'), + fetchEntitiesBySlug(entitySlugs, { types: ['channel', 'network', 'info'] }), knex('actors') .select(knex.raw('actors.id, actors.name, actors.slug, actors.entry_id, actors.entity_id, row_to_json(entities) as entity')) .whereIn('actors.slug', baseActors.map((baseActor) => baseActor.slug)) diff --git a/src/deep.js b/src/deep.js index 49466739..047d6aae 100755 --- a/src/deep.js +++ b/src/deep.js @@ -84,7 +84,7 @@ async function fetchScene(scraper, url, entity, baseRelease, options, type = 'sc } if ((type === 'scene' && scraper.scrapeScene) || (type === 'movie' && scraper.scrapeMovie)) { - if (scraper.useUnprint || scraper.scrapeScene?.unprint || scraper.scrapeMovie?.unprint) { + if (scraper.useUnprint || (type === 'scene' && scraper.scrapeScene?.unprint) || (type === 'movie' && scraper.scrapeMovie?.unprint)) { return fetchUnprintScene(scraper, url, entity, baseRelease, options, type); } diff --git a/src/entities.js b/src/entities.js index 4945c6da..6801205a 100755 --- a/src/entities.js +++ b/src/entities.js @@ -55,7 +55,8 @@ function curateEntity(entity, includeParameters = false) { } if (includeParameters) { - curatedEntity.parameters = entity.parameters; + curatedEntity.options = entity.options; // global internal options + curatedEntity.parameters = entity.parameters; // scraper-specific parameters } if (entity.children) { @@ -66,10 +67,25 @@ function curateEntity(entity, includeParameters = false) { } if (entity.included_children) { - curatedEntity.includedChildren = entity.included_children.map((child) => curateEntity({ - ...child, - parent: curatedEntity.id ? curatedEntity : null, - }, includeParameters)); + curatedEntity.includedChildren = entity.included_children.flatMap((child) => { + const curatedChild = curateEntity({ + ...child, + parent: curatedEntity.id ? curatedEntity : null, + }, includeParameters); + + // allow entities to 'spawn' virtual copies of themselves, this is useful for sites that use two separate update pages (i.e. Elegant Angel) + if (child.options?.spawn) { + return [ + curatedChild, + ...child.options.spawn.map((spawnEntity) => ({ + ...curatedChild, + ...spawnEntity, + })), + ]; + } + + return curatedChild; + }); } const scraper = resolveScraper(curatedEntity); @@ -199,7 +215,7 @@ async function fetchIncludedEntities() { return curatedNetworks; } -async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { +async function fetchEntitiesBySlug(entitySlugs, options = { prefer: 'channel' }) { const entities = await knex.raw(` WITH RECURSIVE entity_tree as ( SELECT to_jsonb(entities) as entity, @@ -208,7 +224,7 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { FROM entities WHERE (slug = ANY(:entitySlugs) OR url ILIKE ANY(:entityHosts)) - AND type IN ('channel', 'network') + AND type = ANY(:entityTypes) UNION ALL @@ -236,7 +252,8 @@ async function fetchEntitiesBySlug(entitySlugs, prefer = 'channel') { `, { entitySlugs: entitySlugs.filter((slug) => !slug.includes('.')), entityHosts: entitySlugs.filter((slug) => slug.includes('.')).map((hostname) => `%${hostname}`), - sort: knex.raw(prefer === 'channel' ? 'asc' : 'desc'), + entityTypes: options.types || ['channel', 'network'], + sort: knex.raw(options.prefer === 'channel' ? 'asc' : 'desc'), }); // channel entity will overwrite network entity @@ -263,7 +280,7 @@ async function fetchReleaseEntities(baseReleases) { .filter(Boolean), )); - return fetchEntitiesBySlug(entitySlugs, argv.prefer || 'network'); + return fetchEntitiesBySlug(entitySlugs, { prefer: argv.prefer || 'network' }); } async function fetchEntity(entityId, type) { diff --git a/src/media.js b/src/media.js index 38a3d3bf..90494a15 100755 --- a/src/media.js +++ b/src/media.js @@ -648,7 +648,12 @@ streamQueue.define('fetchStreamSource', async ({ source, tempFileTarget, hashStr .format('mp4') .outputOptions(['-movflags frag_keyframe+empty_moov']) .on('start', (cmd) => logger.verbose(`Fetching stream from ${source.stream} with "${cmd}"`)) - .on('error', (error) => logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`)) + .on('error', (error) => { + logger.error(`Failed to fetch stream from ${source.stream}: ${error.message}`); + + hashStream.end(); + tempFileTarget.end(); + }) .pipe(); // await pipeline(video, hashStream, tempFileTarget); diff --git a/src/scrapers/adultempire.js b/src/scrapers/adultempire.js index b67d9abf..0cdb4ec3 100755 --- a/src/scrapers/adultempire.js +++ b/src/scrapers/adultempire.js @@ -1,97 +1,61 @@ 'use strict'; -const qu = require('../utils/qu'); +const unprint = require('unprint'); + const http = require('../utils/http'); const slugify = require('../utils/slugify'); const { feetInchesToCm, lbsToKg } = require('../utils/convert'); -async function getPhotos(entryId, channel) { - const res = await http.get(`${channel.url}/Membership/GetScreenshots?sceneID=scene_${entryId}`); - - if (res.ok) { - return res.body.split(/[\s,]+/).filter(Boolean); - } - - return []; -} - -function scrapeAllTour(scenes, channel) { +function scrapeAll(scenes, channel, _options) { return scenes.map(({ query }) => { const release = {}; - release.url = query.url('.scene-update-details, .feature-update-details', 'href', { origin: channel.url }); - release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; + release.url = query.url('a.scene-title, a.scene-img', { origin: channel.url }); + release.entryId = query.attribute('article[data-scene-id]', 'data-scene-id') || new URL(release.url).pathname.match(/^\/(\d+)/)?.[1]; - release.title = query.q('.scene-img-wrapper img', 'alt').replace(/\s*image$/i, ''); + release.title = query.content('.scene-title')?.trim(); + release.duration = query.duration('.scene-length'); - release.date = query.date('.scene-update-stats span, .feature-update-details span', 'MMM DD, YYYY'); - release.actors = query.cnt('.scene-update-details h3, .feature-update-details h2')?.split(/\s*\|\s*/).map((actor) => actor.trim()); + release.actors = query.content('.scene-performer-names')?.split(/[,&]/).map((actor) => actor.trim()); - const poster = query.img('.scene-img-wrapper img'); - release.poster = [ - poster.replace(/\/res\/\d+/, '/res/1920'), - poster.replace(/\/res\/\d+/, '/res/1600'), - poster, - ]; + release.poster = query.sourceSet('.screenshot', 'data-srcset'); - release.trailer = { src: query.video('.scene-img-wrapper source') }; + const sceneId = query.attribute('article[data-scene-id]', 'data-scene-id'); + const masterId = query.attribute('article[data-master-id]', 'data-master-id'); - return release; - }); -} - -async function scrapeAllGrid(scenes, channel, options) { - return Promise.all(scenes.map(async ({ query, el }) => { - const release = {}; - const uri = query.url('.grid-item-title') || query.url('a.animated-screen'); - - release.entryId = el.id.match(/\d+/)?.[0] || uri.match(/^(\d+)\//)?.[1]; - - release.title = query.cnt('.grid-item-title'); - release.url = qu.prefixUrl(uri, channel.url); - - release.poster = query.img('.screenshot'); - - if (options.includePhotos) { - release.photos = await getPhotos(release.entryId, channel); + if (sceneId && masterId) { + release.teaser = `https://video.adultempire.com/hls/previewscene/${masterId}/${sceneId}/index-f1-v1.m3u8`; } - return release; - })); -} - -function scrapeMovieScenes(scenes) { - return scenes.map(({ query }) => { - const release = {}; - - release.title = query.cnt('.scene-title a'); - release.url = query.url('.scene-title a', 'href', { origin: 'https://www.elegantangel.com' }); - release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; - - release.duration = query.number('.scene-length') * 60; - release.actors = query.cnts('.scene-cast-list a'); - - release.poster = query.img('a img'); - return release; }); } -async function scrapeRelease({ query, html }, url, channel, baseRelease, options) { +const photoRegex = /(\/\w\/\d+\/)\d+/; + +async function scrapeRelease({ query, html, element }, { url, entity, baseRelease, parameters }) { const release = {}; const type = query.exists('.scene-list-header') ? 'movie' : 'scene'; release.entryId = new URL(url).pathname.match(/\/(\d+)/)[1]; - release.title = query.cnt('.scene-page .description, .video-page .description'); + const title = query.content('.scene-page .description, .video-page .description'); + + if (/^scene \d+$/i.test(title)) { + release.sceneIndex = unprint.extractNumber(title); + } else { + release.title = title; + } + release.date = query.date('.release-date:first-child', 'MMM DD, YYYY', /\w{3} \d{2}, \d{4}/); + release.duration = query.duration('.release-date:last-child'); release.actors = query.all('.video-performer').map((el) => { - const avatar = qu.query.img(el, 'img', 'data-bgsrc'); + const avatar = unprint.query.img(el, 'img', 'data-bgsrc'); return { - name: qu.query.cnt(el, 'span'), - url: qu.query.url(el, 'a', 'href', { origin: channel.url }), + name: unprint.query.content(el, 'span').trim(), + url: unprint.query.url(el, 'a', { origin: entity.url }), avatar: [ avatar.replace(/\/actor\/\d+/, '/actor/1600'), avatar, @@ -99,8 +63,8 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options }; }); - release.tags = query.cnts('.tags a, .categories a'); - release.studio = options?.parameters.studio === false ? null : slugify(query.cnt('.studio span:last-child'), ''); + release.tags = query.contents('.tags a, .categories a'); + release.studio = parameters?.studio === false ? null : slugify(query.content('.studio span:last-child, .studio a'), ''); if (type === 'scene') { release.director = query.text('.director'); @@ -109,87 +73,44 @@ async function scrapeRelease({ query, html }, url, channel, baseRelease, options } if (type === 'movie') { - release.director = query.cnt('.director a'); - release.covers = query.imgs('.carousel-item > img'); + release.director = query.content('.director a'); + release.covers = [query.sourceSet('.carousel-item .boxcover-image', 'data-srcset')]; - release.scenes = scrapeMovieScenes(qu.initAll(query.all('#scenes .grid-item')), channel); + release.scenes = scrapeAll(unprint.initAll(element, '#scenes .grid-item'), entity); } if (query.exists('.video-title .movie-title')) { release.movie = { - title: query.cnt('#viewLargeBoxcover .modal-title a'), - url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: channel.url }), + title: query.content('#viewLargeBoxcover .modal-title a'), + url: query.url('#viewLargeBoxcover .modal-title a', 'href', { origin: entity.url }), entryId: query.url('#viewLargeBoxcover .modal-title a')?.match(/(\d+)\//)[1], covers: query.imgs('#viewLargeBoxcover #viewLargeBoxcoverCarousel .carousel-item > img'), }; } - release.photos = query.imgs('#dv_frames a > img').map((photo) => [ - photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1920`), - photo.replace(/(\/p\/\d+\/)\d+/, (match, path) => `${path}1600`), + release.caps = query.imgs('#dv_frames a > img', { attribute: 'data-src' }).map((photo) => [ + photo.replace(photoRegex, (match, path) => `${path}1920`), + photo.replace(photoRegex, (match, path) => `${path}1280`), photo, ]); const trailerId = html.match(/item: (\d+),/)?.[1]; if (trailerId) { - const trailerUrl = `https://www.adultempire.com/videoEmbed/${trailerId}?type=preview`; - const trailerRes = await qu.get(trailerUrl); + release.trailer = `https://trailer.adultempire.com/hls/trailer/${trailerId}/master.m3u8`; + } - if (trailerRes.ok) { - const stream = trailerRes.item.query.video(); - - release.trailer = { stream }; - } + if (query.exists('.user-actions .btn-4k')) { + release.qualities = [2160]; } return release; } -function scrapeMovies(movies, channel) { - return movies.map(({ query }) => { - const release = {}; - - release.url = query.url('.boxcover', 'href', { origin: channel.url }); - release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; - - release.title = query.cnt('span'); - - const cover = query.img('picture img'); - - release.covers = [ - // filename is ignored, back-cover has suffix after media ID - cover.replace('_sq.jpg', '/front.jpg').replace(/\/product\/\d+/, '/product/500'), - cover.replace('_sq.jpg', 'b/back.jpg').replace(/\/product\/\d+/, '/product/500'), - ]; - - return release; - }); -} - -function scrapeActorScenes(scenes, channel) { - return scenes.map(({ query }) => { - const release = {}; - - release.url = query.url('a', 'href', { origin: channel.url }); - release.entryId = new URL(release.url).pathname.match(/\/(\d+)/)[1]; - - release.title = query.cnt('.grid-item-title'); - - const poster = query.img('a img'); - release.poster = [ - poster.replace(/\/\d+\//, '/1600/'), - poster, - ]; - - return release; - }); -} - -async function scrapeProfile({ query }, url, channel, include) { +async function scrapeProfile({ query }) { const profile = {}; - const bio = query.cnts('.performer-page-header li').reduce((acc, info) => { + const bio = query.contents('#profileModal .well li').reduce((acc, info) => { const [key, value] = info.split(':'); return { @@ -198,11 +119,14 @@ async function scrapeProfile({ query }, url, channel, include) { }; }, {}); - const measurements = bio.meas?.match(/(\d+)(\w+)-(\d+)-(\d+)/); + const bioText = query.content('#profileModal .well'); - if (measurements) { - [profile.bust, profile.cup, profile.waist, profile.hip] = measurements.slice(1); - } + profile.description = query.content('#profileModal .modal-body') + .slice(bioText.length) + .replace(/Biography Text ©Adult DVD Empire/i, '') + .trim(); + + profile.measurements = bio.measurements?.replace(/["\s]+/g, ''); profile.hair = bio.hair; profile.eyes = bio.eyes; @@ -211,79 +135,41 @@ async function scrapeProfile({ query }, url, channel, include) { profile.height = feetInchesToCm(bio.height); profile.weight = lbsToKg(bio.weight); - profile.avatar = query.img('picture img'); + const avatar = query.img('picture img, .performer-image-container img'); - if (include) { - const actorId = new URL(url).pathname.match(/\/(\d+)/)[1]; - const res = await qu.getAll(`${channel.url}/www.elegantangel.com/streaming-video-by-scene.html?cast=${actorId}`, '.grid-item', null, { - rejectUnauthorized: false, - }); - - if (res.ok) { - profile.releases = scrapeActorScenes(res.items, channel); - } + if (avatar) { + profile.avatar = [ + avatar + .replace('_bust', '_body') + .replace(/\/actor\/\d+\//i, '/actor/1000/'), + avatar, + ]; } return profile; } -async function fetchLatestTour(channel, page = 1) { - const url = `${channel.url}/tour?page=${page}`; - const res = await qu.getAll(url, '.scene-update', null, { - // invalid certificate - rejectUnauthorized: false, - }); +async function fetchLatest(channel, page, options) { + // const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item'); + const res = await unprint.get(options.parameters?.latest + ? `${options.parameters.latest}?page=${page}&view=grid` + : `${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&view=grid`, { selectAll: '.item-grid-scene .grid-item' }); if (res.ok) { - return scrapeAllTour(res.items, channel); + return scrapeAll(res.context, channel, options); } return res.status; } -async function fetchLatestGrid(channel, page, options) { - const res = await qu.getAll(`${channel.url}/watch-newest-clips-and-scenes.html?page=${page}&hybridview=member`, '.item-grid-scene .grid-item'); - - if (res.ok) { - return scrapeAllGrid(res.items, channel, options); - } - - return res.status; -} - -async function fetchMovie(url, channel, baseRelease, options) { - const res = await qu.get(url, null, null, { - // invalid certificate +async function fetchProfilePage(actorUrl) { + const res = await unprint.get(actorUrl, { + select: '#content', rejectUnauthorized: false, }); if (res.ok) { - return scrapeRelease(res.item, url, channel, baseRelease, options); - } - - return res.status; -} - -async function fetchMovies(channel, page = 1) { - const res = await qu.getAll(`https://www.elegantangel.com/streaming-elegant-angel-dvds-on-video.html?page=${page}`, '.grid-item', null, { - // invalid certificate - rejectUnauthorized: false, - }); - - if (res.ok) { - return scrapeMovies(res.items, channel); - } - - return res.status; -} - -async function fetchProfilePage(actorUrl, channel, include) { - const res = await qu.get(actorUrl, '.performer-page', null, { - rejectUnauthorized: false, - }); - - if (res.ok) { - return scrapeProfile(res.item, actorUrl, channel, include); + return scrapeProfile(res.context); } return res.status; @@ -298,13 +184,15 @@ async function fetchProfile(baseActor, channel, include) { } } - const searchRes = await http.get(`${channel.url}/search/SearchAutoComplete_Agg_ByMedia?rows=9&name_startsWith=${slugify(baseActor.name, '+')}`); + const searchRes = await http.get(`https://www.adultempire.com/search/SearchAutoComplete_Agg_EmpireDTRank?search_type=Pornstars&rows=9&name_startsWith=${slugify(baseActor.name, '+')}`); - if (searchRes.ok) { + if (searchRes.ok && searchRes.body.Results) { const actorResult = searchRes.body.Results.find((result) => /performer/i.test(result.BasicResponseGroup?.displaytype) && new RegExp(baseActor.name, 'i').test(result.BasicResponseGroup?.description)); if (actorResult) { - return fetchProfilePage(`${channel.url}${actorResult.BasicResponseGroup.id}`, channel, include); + const url = `https://www.adultempire.com/${actorResult.BasicResponseGroup.id}`; + + return fetchProfilePage(url); } return null; @@ -314,16 +202,15 @@ async function fetchProfile(baseActor, channel, include) { } module.exports = { - fetchLatest: fetchLatestTour, - fetchMovies, - fetchMovie, + fetchLatest, + // fetchMovies, fetchProfile, - scrapeScene: scrapeRelease, - scrapeMovie: scrapeRelease, - grid: { - fetchLatest: fetchLatestGrid, - scrapeScene: scrapeRelease, - fetchMovie, - fetchProfile, + scrapeScene: { + scraper: scrapeRelease, + unprint: true, + }, + scrapeMovie: { + scraper: scrapeRelease, + unprint: true, }, }; diff --git a/src/scrapers/resolve.js b/src/scrapers/resolve.js index db72f51a..0e6c3453 100755 --- a/src/scrapers/resolve.js +++ b/src/scrapers/resolve.js @@ -3,6 +3,10 @@ const scrapers = require('./scrapers'); function resolveScraper(entity) { + if (entity.parameters?.useScraper && scrapers.releases[entity.parameters.useScraper]) { + return scrapers.releases[entity.parameters.useScraper]; + } + if (scrapers.releases[entity.slug]) { return scrapers.releases[entity.slug]; } diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index a48c6efb..0a9108e9 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -177,6 +177,7 @@ const scrapers = { actors: { '18vr': badoink, '21sextury': gamma, + adultempire, allanal: mikeadriano, amateureuro: porndoe, americanpornstar, @@ -217,7 +218,6 @@ const scrapers = { dorcelclub: dorcel, doubleviewcasting: firstanalquest, dtfsluts: fullpornnetwork, - elegantangel: adultempire, evilangel: gamma, exploitedcollegegirls: elevatedx, eyeontheguy: hush, @@ -323,7 +323,6 @@ const scrapers = { vixen, vrcosplayx: badoink, wankzvr, - westcoastproductions: adultempire, wicked: gamma, wildoncam: cherrypimps, xempire: gamma, diff --git a/src/store-releases.js b/src/store-releases.js index 2bd45590..b363b9fe 100755 --- a/src/store-releases.js +++ b/src/store-releases.js @@ -288,23 +288,28 @@ async function associateMovieScenes(movies, movieScenes) { }, }), {}); - const associations = movieScenes.map((scene) => { - if (!scene.movie) { + const associations = movieScenes + .toSorted((sceneA, sceneB) => { + return (sceneA.sceneIndex || 1) - (sceneB.sceneIndex || 1); + }) + .map((scene) => { + if (!scene.movie) { + return null; + } + + const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId] + || moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId]; + + if (sceneMovie?.id) { + return { + movie_id: sceneMovie.id, + scene_id: scene.id, + }; + } + return null; - } - - const sceneMovie = moviesByEntityIdAndEntryId[scene.entity.id]?.[scene.movie.entryId] - || moviesByEntityIdAndEntryId[scene.entity.parent?.id]?.[scene.movie.entryId]; - - if (sceneMovie?.id) { - return { - movie_id: sceneMovie.id, - scene_id: scene.id, - }; - } - - return null; - }).filter(Boolean); + }) + .filter(Boolean); await bulkInsert('movies_scenes', associations, false); } @@ -354,6 +359,7 @@ async function storeMovies(movies, useBatchId) { await updateMovieSearch(moviesWithId.map((movie) => movie.id)); await associateReleaseMedia(moviesWithId, 'movie'); + await associateReleaseTags(moviesWithId, 'movie'); return moviesWithId; } diff --git a/src/updates.js b/src/updates.js index 9b1a58f9..78e0c29e 100755 --- a/src/updates.js +++ b/src/updates.js @@ -298,6 +298,8 @@ async function scrapeNetworkParallel(networkEntity) { async function fetchUpdates() { const includedNetworks = await fetchIncludedEntities(); + // console.log(includedNetworks[0]); + const scrapedNetworks = await Promise.map( includedNetworks, async (networkEntity) => (networkEntity.parameters?.sequential