From 44a8ced30c0a64a7ae558138a96505dfd60fd6da Mon Sep 17 00:00:00 2001 From: Niels Simenon Date: Thu, 9 Jul 2020 02:00:54 +0200 Subject: [PATCH] Separated actor expand buttons. Refactored Brazzers scraper. Fixed actor releases not included in shallow scrape. Added number query and data-src default to qu img. Updated README. Removed post-install migrate and seed. --- README.md | 65 ++++----- assets/components/actors/actor.vue | 128 ++++------------- config/default.js | 110 +++++++------- package.json | 1 - seeds/02_sites.js | 2 +- src/actors.js | 2 +- src/app.js | 4 +- src/entities.js | 55 ++++++- src/scrapers/brazzers.js | 221 +++++++++++++---------------- src/utils/qu.js | 27 +++- 10 files changed, 289 insertions(+), 326 deletions(-) diff --git a/README.md b/README.md index 3b208e11..f3bcd570 100644 --- a/README.md +++ b/README.md @@ -7,50 +7,45 @@ Use [nvm](https://github.com/creationix/nvm) to install a recent version of Node `npm install` ### Set up database -Install PostgreSQL, make sure password authentication is enabled (scram-sha-256) and create a database with a privileged user. +Install PostgreSQL, make sure password authentication is enabled (scram-sha-256) and create a database with a privileged user. For optimal search engine performance, copy `traxxx.stop` to your PostgresQL text search directory, usually `/usr/share/postgresql/tsearch_data/ or `/usr/local/share/postgresql/tsearch_data/`. -For optimal search engine performance, copy `traxxx.stop` to your PostgresQL text search directory, usually `/usr/share/postgresql/tsearch_data/ or `/usr/local/share/postgresql/tsearch_data/`. +### Configuration +Do not modify `config/default.js`, but instead create a copy at `config/local.js` containing the properties you wish to change. If you have set `NODE_ENV`, copy `assets/js/config/default.js` to `assets/js/config/[environment].js`. After setting up PostgreSQL and configuring the details, run the following commands to create and populate the tables, and build the project: `npm run migrate` `npm run seed` -`npm start` +You can also use `npm run flush` to run both steps at once, and wipe the database completely later. -### Configuration -Do not modify `config/default.js`, but instead create a copy at `config/local.js` containing the properties you wish to change. If you have set `NODE_ENV`, copy `assets/js/config/default.js` to `assets/js/config/[environment].js`. +### Building +To build traxxx, run the following command: -### Options -`npm start -- --option value` +`npm run build` + +To generate the thumbnails for logos and tag photos, run: + +`npm run logos-thumbs` + +`npm run tags-thumbs` + +### Run +`./traxxx --option value` or `npm start -- --option value` * `--server`: Run the web server -* `--fetch`: Fetch updates instead of running the webserver. Without further arguments, it will use the networks and sites defined in the configuration file. -* `--site [site ID]`: Fetch updates from a specific site. The site ID is typically the site name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. -* `--network [network ID]`: Fetch updates from all sites of a specific network. The network ID is composed similarly to the site ID. -* `--after "[time]"`: Do not fetch scenes older than this. Example values are: `"1 month"`, `"2 weeks"`, `"3 years"`. -* `--scene [URL]`: Try to retrieve scene details from its official site or network URL. -* `--deep`: Follow each release link found running `--site` or `--network` and scrape it for more details. Enabled by default at the moment of writing; use `--no-deep` to only save information found on the overview pages. -* `--copy`: Try to copy relevant results to the clipboard. When used with `--scene`, it will copy the filename as defined in the config with all the details filled in. +* `--all`: Fetch updates from the channels and networks in the configuration file. +* `--channel [slug] [slug]`: Fetch updates from specific channels. The slug is the channel's name in lowercase and without cases or special characters. For example, Teens Like It Big is teenslikeitbig. +* `--network [slug] [slug]`: Fetch updates from all sites of a specific network. The network slug is composed similarly to the channel slug. +* `--after "[time]"`: Do not fetch scenes older than this period or date. Example values are: `"1 month"`, `"3 years"`, `"2019-01-01"`. +* `--scene [URL]`: Try to retrieve scene details from its official channel or network URL. +* `--deep`: Follow each release link found running `--channel` or `--network` and scrape it for more details. Enabled by default ; use `--no-deep` to only save information found on the overview pages. -## Developers - -### Options +#### Developers * `--no-save`: Do not store retrieved information in local database, forcing re-fetch. -* `--debug`: Show full error stack trace. - -### Generating thumbnails -Ensure each tag or sfw category directory has a `thumbs` and `lazy` directory: `for dir in \*; do mkdir "$dir/thumbs $dir/lazy"; done` - -Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px lazy pre-loading images: - -* Generate thumbnails within tag or sfw directory: `mogrify -path lazy -resize x240 -quality 90% \*.jpeg` -* Generate lazy loading images within tag or sfw directory: `mogrify -path lazy -resize x90 -quality 90% \*.jpeg` - -* Generate thumbnails for all tags or categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/thumbs" -resize x240 -quality 90% "$dir/\*.jpeg"; done` -* Generate lazy loading images for all tags categories in `tags` or `sfw` directory: `for dir in \*; do mogrify -path "$dir/lazy" -resize x90 -quality 90% "$dir/\*.jpeg"; done` +* `--level`: Change log level to `silly`, `verbose`, `info`, `warn` or `error`. ## Supported networks & sites -768 sites on 62 networks, continuously expanding! +896 channels on 64 networks, continuously expanding! * 21Naturals * 21Sextreme @@ -60,7 +55,6 @@ Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px la * Assylum * Aziani * Babes -* BAM Visions * Bang! * Bang Bros * Blowpass @@ -75,24 +69,20 @@ Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px la * Fame Digital * Fantasy Massage * Full Porn Network -* Gamma Entertainment * Girlsway -* Hush * Hush Pass * Hussie Pass * Insex * Interracial Pass * JayRock Productions -* Jesse Loads Monster Facials * Jules Jordan -* Kelly Madison Media +* Kelly Madison Media (Teen Fidelity) * Kink * LegalPorno * Men * Metro HD * Mike Adriano * Mile High Media -* Mind Geek * MOFOS * Naughty America * New Sensations @@ -107,10 +97,11 @@ Using ImageMagick's bulk tool `mogrify` to generate 240px thumbnails and 90px la * SCORE * Sexy Hub * Team Skeet +* Teen Core Club * Twistys * Vivid * Vixen * VogoV -* Whale Member +* Whale Member (Holed, POVD) * Wicked * XEmpire diff --git a/assets/components/actors/actor.vue b/assets/components/actors/actor.vue index c1cb69f5..dc95471b 100644 --- a/assets/components/actors/actor.vue +++ b/assets/components/actors/actor.vue @@ -32,7 +32,7 @@
- +
  • {{ actor.origin.city }}{{ actor.origin.city ? `, ${actor.origin.state}` : actor.origin.state }} Lives in {{ actor.residence.city }}{{ actor.residence.city ? `, ${actor.residence.state}` : actor.residence.state }} Updated {{ formatDate(actor.updatedAt, 'YYYY-MM-DD HH:mm') }}, ID: {{ actor.id }}
- -
- +
@@ -319,6 +314,7 @@ import Pagination from '../pagination/pagination.vue'; import FilterBar from '../header/filter-bar.vue'; import Releases from '../releases/releases.vue'; import Photos from './photos.vue'; +import Expand from '../expand/expand.vue'; import Scroll from '../scroll/scroll.vue'; import Gender from './gender.vue'; import Social from './social.vue'; @@ -358,6 +354,7 @@ export default { Pagination, Photos, Scroll, + Expand, Releases, Gender, Social, @@ -369,7 +366,8 @@ export default { totalCount: 0, limit: 10, pageTitle: null, - expanded: false, + bioExpanded: false, + photosExpanded: false, }; }, computed: { @@ -640,57 +638,7 @@ export default { } .expand { - display: none; - justify-content: center; - align-items: center; - padding: .5rem .25rem; - font-weight: bold; - font-size: .9rem; - cursor: pointer; - - .icon { - fill: var(--shadow); - } - - &:hover { - background: var(--shadow-hint); - - .icon { - fill: var(--shadow-strong); - } - } -} - -.expand-sidebar:hover { - background: var(--shadow-hint); -} - -.expand-header { - display: none; - - &:hover { - background: var(--shadow-hint); - } -} - -.collapse-header { display: none; - width: 100%; - justify-content: center; - align-items: center; - padding: 0; - background: var(--profile); - - .icon { - width: 100%; - fill: var(--highlight); - padding: .5rem 0; - } - - &:hover .icon { - background: var(--highlight-hint); - fill: var(--text-contrast); - } } .scroll { @@ -718,7 +666,6 @@ export default { height: auto; max-height: none; flex-direction: column; - padding: 0 0 .5rem 0; &.with-avatar { height: auto; @@ -742,30 +689,13 @@ export default { margin: 0; } - .expand, - .expand-header { - display: flex; + .expanded .bio-value { + white-space: normal; } - /* - .expanded .descriptions-container { - display: block; - max-width: 100%; - max-height: 30rem; - margin: 0; - padding: 0 1rem; - } - */ - - .expanded { - .collapse-header { - display: block; - } - - .bio-value { - white-space: normal; - } - } + .expand { + display: block; + } } @media(max-width: $breakpoint0) { diff --git a/config/default.js b/config/default.js index 9f91c920..f24afa7d 100644 --- a/config/default.js +++ b/config/default.js @@ -12,58 +12,64 @@ module.exports = { sfwPort: 5001, }, // include: [], - // exclude: [], - exclude: [ - // 21sextreme, no longer updated - 'mightymistress', - 'dominatedgirls', - 'homepornreality', - 'peeandblow', - 'cummingmatures', - 'mandyiskinky', - 'speculumplays', - 'creampiereality', - // aziani - 'amberathome', - 'marycarey', - 'racqueldevonshire', - // boobpedia - 'boobpedia', - // blowpass - 'sunlustxxx', - // ddfnetwork - 'fuckinhd', - 'bustylover', - // famedigital - 'daringsex', - 'lowartfilms', - // freeones - 'freeones', - // pornpros - 'milfhumiliation', - 'humiliated', - 'flexiblepositions', - 'publicviolations', - 'amateurviolations', - 'squirtdisgrace', - 'cumdisgrace', - 'webcamhackers', - 'collegeteens', - // score - 'bigboobbundle', - 'milfbundle', - 'pornmegaload', - 'scorelandtv', - 'scoretv', - // teenscoreclub - 'maiko', - 'ncuksinners', - 'ncversocinema', - 'pussybabes', - 'uksinners', - // mindgeek - 'pornhub', - ], + exclude: { + networks: [ + 'gamma', + 'mindgeek', + 'julesjordan', + ], + channels: [ + // 21sextreme, no longer updated + 'mightymistress', + 'dominatedgirls', + 'homepornreality', + 'peeandblow', + 'cummingmatures', + 'mandyiskinky', + 'speculumplays', + 'creampiereality', + // aziani + 'amberathome', + 'marycarey', + 'racqueldevonshire', + // boobpedia + 'boobpedia', + // blowpass + 'sunlustxxx', + // ddfnetwork + 'fuckinhd', + 'bustylover', + // famedigital + 'daringsex', + 'lowartfilms', + // freeones + 'freeones', + // pornpros + 'milfhumiliation', + 'humiliated', + 'flexiblepositions', + 'publicviolations', + 'amateurviolations', + 'squirtdisgrace', + 'cumdisgrace', + 'webcamhackers', + 'collegeteens', + // score + 'bigboobbundle', + 'milfbundle', + 'pornmegaload', + 'scorelandtv', + 'scoretv', + // teenscoreclub + 'maiko', + 'ncuksinners', + 'ncversocinema', + 'pussybabes', + 'uksinners', + // mindgeek + 'pornhub', + ], + }, profiles: [ [ 'evilangel', diff --git a/package.json b/package.json index 869c7bca..9121d955 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,6 @@ "description": "All the latest porn releases in one place", "main": "src/app.js", "scripts": { - "postinstall": "npm run migrate && npm run seed", "start": "node -r source-map-support/register dist/init.js", "webpack": "webpack --env=production --mode=production", "webpack-dev": "webpack --env=development --mode=development", diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 961b94ba..0b964d4a 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -4825,7 +4825,7 @@ const sites = [ name: 'Lil Humpers', url: 'https://lilhumpers.com', description: '', - parameters: { siteId: 310 }, + parameters: { siteId: 336 }, slug: 'lilhumpers', parent: 'realitykings', }, diff --git a/src/actors.js b/src/actors.js index 292c3ded..b6f31d2f 100644 --- a/src/actors.js +++ b/src/actors.js @@ -281,7 +281,7 @@ async function curateProfile(profile) { curatedProfile.nationality = profile.nationality?.trim() || null; // used to derive country when country not available curatedProfile.ethnicity = ethnicities[profile.ethnicity?.trim().toLowerCase()] || null; - curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.trim().toLowerCase()] || null; + curatedProfile.hairColor = hairColors[(profile.hairColor || profile.hair)?.toLowerCase().replace('hair', '').trim()] || null; curatedProfile.eyes = eyeColors[profile.eyes?.trim().toLowerCase()] || null; curatedProfile.tattoos = profile.tattoos?.trim() || null; diff --git a/src/app.js b/src/app.js index a9474948..d99f374f 100644 --- a/src/app.js +++ b/src/app.js @@ -24,11 +24,11 @@ async function init() { const actors = argv.actors && await scrapeActors(argv.actors); const actorBaseScenes = argv.actors && argv.actorScenes && actors.map(actor => actor.releases).flat().filter(Boolean); - const updateBaseScenes = (argv.scrape || argv.channels || argv.networks) && await fetchUpdates(); + const updateBaseScenes = (argv.all || argv.channels || argv.networks) && await fetchUpdates(); const deepScenes = argv.deep ? await fetchScenes([...(argv.scenes || []), ...(updateBaseScenes || []), ...(actorBaseScenes || [])]) - : updateBaseScenes; + : [...(updateBaseScenes || []), ...(actorBaseScenes || [])]; const sceneMovies = deepScenes && argv.sceneMovies && deepScenes.map(scene => scene.movie).filter(Boolean); const deepMovies = await fetchMovies([...(argv.movies || []), ...(sceneMovies || [])]); diff --git a/src/entities.js b/src/entities.js index eb29aa0a..3f762529 100644 --- a/src/entities.js +++ b/src/entities.js @@ -85,7 +85,54 @@ async function fetchChannelsFromArgv() { } async function fetchChannelsFromConfig() { - const rawSites = await knex('entities') + const rawNetworks = await knex.raw(` + WITH RECURSIVE children AS ( + SELECT + id, parent_id, name, slug, type, url, description, parameters + FROM + entities + WHERE + CASE WHEN array_length(?, 1) IS NOT NULL + THEN slug = ANY(?) + ELSE true + END + AND NOT + slug = ANY(?) + AND + entities.type = 'network' + UNION ALL + SELECT + entities.id, entities.parent_id, entities.name, entities.slug, entities.type, entities.url, entities.description, entities.parameters + FROM + entities + INNER JOIN + children ON children.id = entities.parent_id + ) + SELECT + entities.*, row_to_json(parents) as parent, json_agg(children) as children + FROM + children + LEFT JOIN + entities ON entities.id = children.parent_id + LEFT JOIN + entities AS parents ON parents.id = entities.parent_id + WHERE + children.type = 'channel' + GROUP BY + children.parent_id, entities.id, entities.name, parents.id + `, [ + config.include.networks, + config.include.networks, + config.exclude.networks, + ]); + + console.log(rawNetworks.rows); + + /* + const curatedSites = await curateEntities(rawChannels, true); + logger.info(`Found ${curatedSites.length} entities in database`); + + const rawChannels = await knex('entities') .select(knex.raw('entities.*, row_to_json(parents) as parent')) .leftJoin('entities as parents', 'parents.id', 'entities.parent_id') .where((builder) => { @@ -97,10 +144,10 @@ async function fetchChannelsFromConfig() { builder.whereIn('entities.slug', config.exclude || []); }); - const curatedSites = await curateEntities(rawSites, true); - logger.info(`Found ${curatedSites.length} entities in database`); + console.log(rawChannels); + */ - return curatedSites; + // return curatedSites; } async function fetchIncludedEntities() { diff --git a/src/scrapers/brazzers.js b/src/scrapers/brazzers.js index f70d466f..ec59f50b 100644 --- a/src/scrapers/brazzers.js +++ b/src/scrapers/brazzers.js @@ -1,142 +1,111 @@ 'use strict'; /* eslint-disable newline-per-chained-call */ -const bhttp = require('bhttp'); -const cheerio = require('cheerio'); -const { JSDOM } = require('jsdom'); -const moment = require('moment'); - -const { get, ex } = require('../utils/q'); +const qu = require('../utils/qu'); const slugify = require('../utils/slugify'); const { heightToCm, lbsToKg } = require('../utils/convert'); -const hairMap = { - Blonde: 'blonde', - Brunette: 'brown', - 'Black Hair': 'black', - Redhead: 'red', -}; - -function scrapeAll(html, site, upcoming) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); - const sceneElements = $('.release-card.scene').toArray(); - - return sceneElements.reduce((acc, element) => { - const isUpcoming = $(element).find('.icon-upcoming.active').length === 1; +function scrapeAll(items, channel, upcoming) { + return items.reduce((acc, { query }) => { + const isUpcoming = query.exists('.icon-upcoming.active'); if ((upcoming && !isUpcoming) || (!upcoming && isUpcoming)) { return acc; } - const sceneLinkElement = $(element).find('a'); + const release = {}; + const pathname = query.url('a'); - const url = `https://www.brazzers.com${sceneLinkElement.attr('href')}`; - const title = sceneLinkElement.attr('title'); - const entryId = url.split('/').slice(-3, -2)[0]; + release.url = `https://www.brazzers.com${pathname}`; + release.entryId = pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2]; - const date = moment.utc($(element).find('time').text(), 'MMMM DD, YYYY').toDate(); - const actors = $(element).find('.model-names a').map((actorIndex, actorElement) => $(actorElement).attr('title')).toArray(); + release.title = query.q('a', 'title'); + release.date = query.date('time', 'MMMM DD, YYYY'); - const likes = Number($(element).find('.label-rating .like-amount').text()); - const dislikes = Number($(element).find('.label-rating .dislike-amount').text()); + release.actors = query.all('.model-names a', 'title'); - const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`; - const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray(); + release.likes = query.number('.label-rating .like-amount'); + release.dislikes = query.number('.label-rating .dislike-amount'); - const channel = slugify($(element).find('.collection').attr('title'), ''); + release.poster = query.img('.card-main-img'); + release.photos = query.imgs('.card-overlay .image-under'); - return acc.concat({ - url, - entryId, - title, - actors, - date, - poster, - photos, - rating: { - likes, - dislikes, - }, - channel, - site, - }); + release.channel = slugify(query.q('.collection', 'title'), ''); + + return acc.concat(release); }, []); } -async function scrapeScene(html, url, _site) { - const $ = cheerio.load(html, { normalizeWhitespace: true }); +function getVideoData(html) { + try { + const videoScriptStart = html.indexOf('window.videoUiOptions'); + const videoScript = html.slice(videoScriptStart, html.indexOf('};', videoScriptStart)); + const videoString = videoScript.slice(videoScript.indexOf('{"stream_info"'), videoScript.lastIndexOf('},') + 1); + + return JSON.parse(videoString); + } catch (error) { + return null; + } +} + +async function scrapeScene({ query, html }, url, _site) { const release = {}; - const videoJson = $('script:contains("window.videoUiOptions")').html(); - const videoString = videoJson.slice(videoJson.indexOf('{"stream_info":'), videoJson.lastIndexOf('},') + 1); - const videoData = JSON.parse(videoString); + release.entryId = new URL(url).pathname.match(/(\/view\/id\/|\/episode\/)(\d+)/)[2]; - [release.entryId] = url.split('/').slice(-3, -2); - release.title = $('.scene-title[itemprop="name"]').text(); + release.title = query.q('.scene-title[itemprop="name"]', true); + release.description = query.text('#scene-description p[itemprop="description"]'); - release.description = $('#scene-description p[itemprop="description"]') - .contents() - .first() - .text() - .trim(); + release.date = query.date('.more-scene-info .scene-date', 'MMMM DD, YYYY'); + release.duration = Number(query.q('#trailer-player-container', 'data-duration')) // more accurate + || Number(query.q('.scene-length[itemprop="duration"]', 'content').slice(1, -1) * 60); - release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate(); - release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60; + // actor cards have avatar, but truncated name + const actorImagesByActorId = query.imgs('.featured-model .card-image img').reduce((acc, img) => ({ + ...acc, + [img.match(/\/models\/(\d+)/)[1]]: [ + img.replace('medium', 'large'), + img, + ], + }), {}); - const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => { - const avatar = `https:${$(actorElement).find('img').attr('data-src')}`; + release.actors = query.all('.related-model a').map(actorEl => ({ + name: query.q(actorEl, null, 'title'), + avatar: actorImagesByActorId[query.url(actorEl, null).match(/\/view\/id\/(\d+)/)?.[1]], + })); - return { - name: $(actorElement).attr('title'), - avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar], - }; - }).toArray(); + release.likes = query.number('.label-rating .like'); + release.dislikes = query.number('.label-rating .dislike'); - release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray(); - - release.likes = Number($('.label-rating .like').text()); - release.dislikes = Number($('.label-rating .dislike').text()); - - const siteElement = $('.niche-site-logo'); - // const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`; - const siteName = siteElement.attr('title'); - release.channel = slugify(siteName, ''); - - const tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); - const categories = $('.timeline a[href*="/categories"]').map((tagIndex, categoryElement) => $(categoryElement).attr('title')).toArray(); + const tags = query.all('.tag-card-container a', true); + const categories = query.all('.timeline a[href*="/categories"]', 'title'); release.tags = tags.concat(categories); - release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray(); + release.channel = slugify(query.q('.scene-site .label-text', true) || query.q('.niche-site-logo', 'title'), ''); - const posterPath = videoData?.poster || $('meta[itemprop="thumbnailUrl"]').attr('content') || $('#trailer-player-container').attr('data-player-img'); - if (posterPath) release.poster = `https:${posterPath}`; + const videoData = getVideoData(html); + const poster = videoData?.poster || query.meta('itemprop="thumbnailUrl"') || query.q('#trailer-player-container', 'data-player-img'); + + release.poster = qu.prefixUrl(poster); + release.photos = query.urls('.carousel-thumb a'); if (videoData) { release.trailer = Object.entries(videoData.stream_info.http.paths).map(([quality, path]) => ({ - src: `https:${path}`, + src: qu.prefixUrl(path), quality: Number(quality.match(/\d{3,}/)[0]), })); } - console.log(release); - return release; } -function scrapeActorSearch(html, url, actorName) { - const { document } = new JSDOM(html).window; - const actorLink = document.querySelector(`a[title="${actorName}" i]`); - - return actorLink ? actorLink.href : null; -} - -async function fetchActorReleases({ qu, html }, accReleases = []) { - const releases = scrapeAll(html); - const next = qu.url('.pagination .next a'); +async function fetchActorReleases({ query }, accReleases = []) { + const releases = scrapeAll(qu.initAll(query.all('.release-card.scene'))); + const next = query.url('.pagination .next a'); if (next) { const url = `https://www.brazzers.com${next}`; - const res = await get(url); + const res = await qu.get(url); if (res.ok) { return fetchActorReleases(res.item, accReleases.concat(releases)); @@ -146,12 +115,9 @@ async function fetchActorReleases({ qu, html }, accReleases = []) { return accReleases.concat(releases); } -async function scrapeProfile(html, url, actorName, include) { - const qProfile = ex(html); - const { q, qa } = qProfile; - - const bioKeys = qa('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim()); - const bioValues = qa('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim()); +async function scrapeProfile({ query }, url, actorName, include) { + const bioKeys = query.all('.profile-spec-list label', true).map(key => key.replace(/\n+|\s{2,}/g, '').trim()); + const bioValues = query.all('.profile-spec-list var', true).map(value => value.replace(/\n+|\s{2,}/g, '').trim()); const bio = bioKeys.reduce((acc, key, index) => ({ ...acc, [key]: bioValues[index] }), {}); @@ -159,17 +125,17 @@ async function scrapeProfile(html, url, actorName, include) { name: actorName, }; - profile.description = q('.model-profile-specs p', true); + profile.description = query.q('.model-profile-specs p', true); if (bio.Ethnicity) profile.ethnicity = bio.Ethnicity; if (bio.Measurements && bio.Measurements.match(/\d+[A-Z]+-\d+-\d+/)) [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-'); - if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = moment.utc(bio['Date of Birth'], 'MMMM DD, YYYY').toDate(); + if (bio['Date of Birth'] && bio['Date of Birth'] !== 'Unknown') profile.birthdate = qu.extractDate(bio['Date of Birth'], 'MMMM DD, YYYY'); if (bio['Birth Location']) profile.birthPlace = bio['Birth Location']; if (bio['Pussy Type']) profile.pussy = bio['Pussy Type'].split(',').slice(-1)[0].toLowerCase(); if (bio.Height) profile.height = heightToCm(bio.Height); if (bio.Weight) profile.weight = lbsToKg(bio.Weight.match(/\d+/)[0]); - if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); + if (bio['Hair Color']) profile.hair = bio['Hair Color'].toLowerCase(); if (bio['Tits Type'] && bio['Tits Type'].match('Natural')) profile.naturalBoobs = true; if (bio['Tits Type'] && bio['Tits Type'].match('Enhanced')) profile.naturalBoobs = false; @@ -177,49 +143,60 @@ async function scrapeProfile(html, url, actorName, include) { if (bio['Body Art'] && bio['Body Art'].match('Tattoo')) profile.hasTattoos = true; if (bio['Body Art'] && bio['Body Art'].match('Piercing')) profile.hasPiercings = true; - const avatarEl = q('.big-pic-model-container img'); + const avatarEl = query.q('.big-pic-model-container img'); if (avatarEl) profile.avatar = `https:${avatarEl.src}`; if (include.releases) { - profile.releases = await fetchActorReleases(qProfile); + profile.releases = await fetchActorReleases({ query }); } return profile; } -async function fetchLatest(site, page = 1) { - const res = await bhttp.get(`${site.url}/page/${page}/`); +async function fetchLatest(channel, page = 1) { + const res = await qu.getAll(`${channel.url}/page/${page}/`, '.release-card.scene'); - return scrapeAll(res.body.toString(), site, false); + if (res.ok) { + return scrapeAll(res.items, channel, false); + } + + return res.status; } -async function fetchUpcoming(site) { - const res = await bhttp.get(`${site.url}/`); +async function fetchUpcoming(channel) { + const res = await qu.getAll(`${channel.url}/page/1`, '.release-card.scene'); - return scrapeAll(res.body.toString(), site, true); + if (res.ok) { + return scrapeAll(res.items, channel, true); + } + + return res.status; } async function fetchScene(url, site) { - const res = await bhttp.get(url); + const res = await qu.get(url); - return scrapeScene(res.body.toString(), url, site); + if (res.ok) { + return scrapeScene(res.item, url, site); + } + + return res.status; } async function fetchProfile(actorName, context, include) { - const searchUrl = 'https://brazzers.com/pornstars-search/'; - const searchRes = await bhttp.get(searchUrl, { - headers: { - Cookie: `textSearch=${encodeURIComponent(actorName)};`, - }, + const searchRes = await qu.get('https://brazzers.com/pornstars-search/', `a[title="${actorName}" i]`, { + Cookie: `textSearch=${encodeURIComponent(actorName)};`, }); - const actorLink = scrapeActorSearch(searchRes.body.toString(), searchUrl, actorName); + const actorLink = searchRes.ok && searchRes.item.qu.url(null); if (actorLink) { const url = `https://brazzers.com${actorLink}`; - const res = await bhttp.get(url); + const res = await qu.get(url); - return scrapeProfile(res.body.toString(), url, actorName, include); + if (res.ok) { + return scrapeProfile(res.item, url, actorName, include); + } } return null; diff --git a/src/utils/qu.js b/src/utils/qu.js index 58ec062b..b5bc1906 100644 --- a/src/utils/qu.js +++ b/src/utils/qu.js @@ -103,6 +103,12 @@ function text(context, selector, applyTrim = true) { return applyTrim ? trim(textValue) : textValue; } +function number(context, selector, attr = true) { + const value = q(context, selector, attr); + + return value ? Number(value) : null; +} + function meta(context, selector, attrArg = 'content', applyTrim = true) { if (/meta\[.*\]/.test(selector)) { return q(context, selector, attrArg, applyTrim); @@ -119,17 +125,22 @@ function date(context, selector, format, match, attr = 'textContent') { return extractDate(dateString, format, match); } -function image(context, selector = 'img', attr = 'src', origin, protocol = 'https') { - const imageEl = q(context, selector, attr); +function image(context, selector = 'img', attr, origin, protocol = 'https') { + const imageEl = (attr && q(context, selector, attr)) + || q(context, selector, 'src') + || q(context, selector, 'data-src'); - // no attribute means q output will be HTML element - return attr ? prefixUrl(imageEl, origin, protocol) : imageEl; + return prefixUrl(imageEl, origin, protocol); } -function images(context, selector = 'img', attr = 'src', origin, protocol = 'https') { - const imageEls = all(context, selector, attr); +function images(context, selector = 'img', attr, origin, protocol = 'https') { + const attribute = attr + || (q(context, selector, 'src') && 'src') + || (q(context, selector, 'data-src') && 'data-src'); - return attr ? imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)) : imageEls; + const imageEls = all(context, selector, attribute); + + return imageEls.map(imageEl => prefixUrl(imageEl, origin, protocol)); } function url(context, selector = 'a', attr = 'href', origin, protocol = 'https') { @@ -225,6 +236,8 @@ const quFuncs = { imgs: images, length: duration, meta, + number, + num: number, poster, q, text,