diff --git a/seeds/02_sites.js b/seeds/02_sites.js index f5daebef..b01b11b2 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -5599,6 +5599,7 @@ const sites = [ parameters: { native: true, preferSpartanId: true, + networkEntryId: true, }, }, { @@ -5894,9 +5895,11 @@ const sites = [ parent: 'kink', }, { - slug: 'brutalsessions', - name: 'Brutal Sessions', - url: 'https://www.kink.com/channel/brutal-sessions', + slug: 'dungeonsex', + rename: 'brutalsessions', + name: 'Dungeon Sex', + url: 'https://www.kink.com/channel/dungeon-sex', + alias: ['brutal sessions'], description: "Hardcore BDSM jam packed with XXX fucking in bondage! We're taking dungeon sex beyond the castle!", parent: 'kink', }, diff --git a/src/entities.js b/src/entities.js index 6801205a..2d2db80b 100755 --- a/src/entities.js +++ b/src/entities.js @@ -36,6 +36,7 @@ function curateEntity(entity, includeParameters = false) { id: entity.id, name: entity.name, url: entity.url, + origin: new URL(entity.url).origin, description: entity.description, slug: entity.slug, type: entity.type, diff --git a/src/scrapers/gamma.js b/src/scrapers/gamma.js index 84137c2e..3af0f137 100755 --- a/src/scrapers/gamma.js +++ b/src/scrapers/gamma.js @@ -299,6 +299,37 @@ async function scrapeApiReleases(json, site, options) { }); } +async function fetchLatestApi(site, page = 1, options, _preData, upcoming = false) { + const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`; + const { apiUrl } = await fetchApiCredentials(referer, site); + const slug = options.parameters.querySlug || site.slug; + + const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel + ? `&filters=channels.id:${options.parameters.queryChannel === true ? slug : options.parameters.queryChannel}` + : `&filters=availableOnSite:${slug}`}`; + + const res = await http.post(apiUrl, { + requests: [ + { + indexName: 'all_scenes', + params, + }, + ], + }, { + headers: { + Referer: referer, + }, + }, { + encodeJSON: true, + }); + + if (res.status === 200 && res.body.results?.[0]?.hits) { + return scrapeApiReleases(res.body.results[0].hits, site, options); + } + + return res.status; +} + function scrapeAll(scenes, site, networkUrl, hasTeaser = true) { return scenes.map(({ query, el }) => { const release = {}; @@ -328,6 +359,54 @@ function scrapeAll(scenes, site, networkUrl, hasTeaser = true) { }); } +function getLatestUrl(site, page) { + if (site.parameters?.latest) { + if (/^http/.test(site.parameters.latest)) { + return /%d/.test(site.parameters.latest) + ? util.format(site.parameters.latest, page) + : `${site.parameters.latest}${page}`; + } + + return /%d/.test(site.parameters.latest) + ? util.format(`${site.url}${site.parameters.latest}`, page) + : `${site.url}${site.parameters.latest}${page}`; + } + + return `${site.url}/en/videos/AllCategories/0/${page}`; +} + +function getUpcomingUrl(site) { + if (site.parameters?.upcoming) { + return /^http/.test(site.parameters.upcoming) + ? `${site.parameters.upcoming}` + : `${site.url}${site.parameters.upcoming}`; + } + + return `${site.url}/en/videos/AllCategories/0/1/upcoming`; +} + +async function fetchLatest(site, page = 1) { + const url = getLatestUrl(site, page); + const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]'); + + if (res.ok) { + return scrapeAll(res.items, site); + } + + return res.status; +} + +async function fetchUpcoming(site) { + const url = getUpcomingUrl(site); + const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]'); + + if (res.ok) { + return scrapeAll(res.items, site, null, false); + } + + return res.status; +} + async function scrapeScene({ query }, url, channel, baseRelease, mobileItem, options) { const release = { query }; // used by XEmpire scraper to resolve channel-specific details @@ -658,37 +737,6 @@ function scrapeApiProfile(data, releases, siteSlug) { return profile; } -async function fetchLatestApi(site, page = 1, options, _preData, upcoming = false) { - const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`; - const { apiUrl } = await fetchApiCredentials(referer, site); - const slug = options.parameters.querySlug || site.slug; - - const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel - ? `&filters=channels.id:${options.parameters.queryChannel === true ? slug : options.parameters.queryChannel}` - : `&filters=availableOnSite:${slug}`}`; - - const res = await http.post(apiUrl, { - requests: [ - { - indexName: 'all_scenes', - params, - }, - ], - }, { - headers: { - Referer: referer, - }, - }, { - encodeJSON: true, - }); - - if (res.status === 200 && res.body.results?.[0]?.hits) { - return scrapeApiReleases(res.body.results[0].hits, site, options); - } - - return res.status; -} - async function fetchUpcomingApi(site, page = 1, options, preData) { return fetchLatestApi(site, page, options, preData, true); } @@ -771,54 +819,6 @@ async function fetchMovieApi(url, site, baseRelease, options) { return res.status; } -function getLatestUrl(site, page) { - if (site.parameters?.latest) { - if (/^http/.test(site.parameters.latest)) { - return /%d/.test(site.parameters.latest) - ? util.format(site.parameters.latest, page) - : `${site.parameters.latest}${page}`; - } - - return /%d/.test(site.parameters.latest) - ? util.format(`${site.url}${site.parameters.latest}`, page) - : `${site.url}${site.parameters.latest}${page}`; - } - - return `${site.url}/en/videos/AllCategories/0/${page}`; -} - -function getUpcomingUrl(site) { - if (site.parameters?.upcoming) { - return /^http/.test(site.parameters.upcoming) - ? `${site.parameters.upcoming}` - : `${site.url}${site.parameters.upcoming}`; - } - - return `${site.url}/en/videos/AllCategories/0/1/upcoming`; -} - -async function fetchLatest(site, page = 1) { - const url = getLatestUrl(site, page); - const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]'); - - if (res.ok) { - return scrapeAll(res.items, site); - } - - return res.status; -} - -async function fetchUpcoming(site) { - const url = getUpcomingUrl(site); - const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]'); - - if (res.ok) { - return scrapeAll(res.items, site, null, false); - } - - return res.status; -} - function getDeepUrl(url, site, baseRelease, mobile) { const filter = new Set(['en', 'video', 'scene', site.slug, site.parent.slug]); const pathname = baseRelease?.path || new URL(url).pathname diff --git a/src/scrapers/porncz.js b/src/scrapers/porncz.js index 5c109900..748b6543 100755 --- a/src/scrapers/porncz.js +++ b/src/scrapers/porncz.js @@ -1,121 +1,123 @@ 'use strict'; -const http = require('../utils/http'); -const qu = require('../utils/qu'); -const slugify = require('../utils/slugify'); -const capitalize = require('../utils/capitalize'); +const unprint = require('unprint'); -function scrapeAll(scenes, channel) { - return scenes.map(({ query }) => { +const slugify = require('../utils/slugify'); + +function scrapeAll(scenes, _channel) { + return scenes.map(({ query, element }) => { const release = {}; - release.url = query.url('h4 a', 'href', { origin: channel.url }); - release.entryId = new URL(release.url).pathname.match(/\d+$/)[0]; + release.url = query.url('.card__link'); + release.entryId = new URL(release.url).pathname.match(/\/en\/(.*)/)[1]; - release.title = query.cnt('h4 a'); - release.duration = query.duration('.product-item-time'); + release.title = query.content('.card__link'); + release.duration = query.duration('.card__img_badge.bottom-right'); - release.poster = query.img('.product-item-image img', 'src', { origin: channel.url }); + release.poster = query.img('.card__img img'); + release.teaser = unprint.query.dataset(element, null, 'video'); return release; }); } +async function fetchLatest(channel, page = 1) { + const res = await unprint.get(`${channel.origin}/en/videos?page=${page}`, { selectAll: '.card--item' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + function scrapeScene({ query }, url, channel) { const release = {}; - release.entryId = new URL(url).pathname.match(/\d+$/)[0]; + release.entryId = new URL(url).pathname.match(/\/en\/(.*)/)[1]; - release.title = query.cnt('.heading-detail h1'); - release.description = query.cnt('.heading-detail p:nth-child(3)'); + release.title = query.content('h1.h2'); + release.description = query.attribute('meta[property="og:description"]', 'content'); // not usually used, if ever - const details = query.all('.video-info-item').reduce((acc, detailEl) => { - const key = detailEl.textContent.match(/(\w+):/)[1]; + release.date = query.date('meta[property="video:release_date"]', 'YYYY-MM-DD', { attribute: 'content' }); + release.duration = query.number('meta[property="video:duration"]', { attribute: 'content' }); - return { ...acc, [slugify(key, '_')]: detailEl }; - }, {}); + release.actors = query.all('.video-info .mini-avatars a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.origin }), + avatar: [ + unprint.query.img(actorEl, 'img')?.replace('-video_actor_avatar', '-actor_detail'), + unprint.query.img(actorEl, 'img'), + ], + })); - const { date, precision } = query.dateAgo(details.date); + release.tags = query.contents('.video-info a[href*="?category"]').map((tag) => tag.replace('#', '').trim()); + release.qualities = query.numbers('.download-dropdown-menu li div', { attribute: 'data-res' }); - release.date = date; - release.datePrecision = precision; + release.poster = [ + query.img('.video-player', { attribute: 'data-poster' }), + query.img('meta[property="og:image"]', { attribute: 'content' }), + ]; - release.actors = query.cnts(details.actors, 'a').map((actor) => capitalize(actor, { uncapitalize: true })); - release.duration = query.duration(details.duration); - release.tags = query.cnts(details.genres, 'a'); + release.trailer = query.all('.video-player source').map((videoEl) => ({ + src: unprint.query.video(videoEl, null), + quality: unprint.query.number(videoEl, null, { attribute: 'size' }), + })); - release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url }); - release.photos = query.imgs('#gallery .photo-item img', 'data-src', { origin: channel.url }); + release.photos = query.all('.image .gallery-popup').map((imgEl) => [ + unprint.query.img(imgEl, null, { attribute: 'href' }), + unprint.query.img(imgEl, 'img'), + ]); - release.trailer = query.video(); - - release.channel = slugify(query.q('.video-detail-logo img', 'alt'), ''); + release.channel = slugify(query.attribute('meta[property="og:site_name"]', 'content'), ''); return release; } -function scrapeProfile({ query }, entity) { - const profile = {}; +async function fetchScene(url, channel) { + const res = await unprint.get(url); - profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url }); - profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity); + if (res.ok) { + return scrapeScene(res.context, url, channel); + } + + return res.status; +} + +function scrapeProfile({ query }, url, _entity) { + const profile = { url }; + const bio = Object.fromEntries(query.all('.model-info__item').map((bioEl) => [ + slugify(unprint.query.content(bioEl, 'span:first-child'), '_'), + unprint.query.content(bioEl, 'span:last-child'), + ])); + + profile.avatar = query.img('.actor-img'); + + profile.gender = bio.gender; + profile.birthCountry = bio.nationality; + profile.ethnicity = bio.ethnicity; + profile.age = bio.age; + + profile.hairColor = bio.hair_color; + + if (!bio.breast_size?.includes('-')) profile.cup = bio.breast_size; // larger than F is defined as F-Z, not too useful + if (/natural/i.test(bio.breast_type)) profile.naturalBoobs = true; + if (/fake/i.test(bio.breast_type)) profile.naturalBoobs = false; + if (/no/i.test(bio.tattoo)) profile.hasTattoos = false; + if (/yes/i.test(bio.tattoo)) profile.hasTattoos = true; return profile; } -async function fetchLatest(channel, page = 1) { - const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`; - - // pagination state is kept in session, and new each 'page' includes all previous pages - const session = http.session(); - const headers = { 'X-Requested-With': 'XMLHttpRequest' }; - - for (let i = 0; i < page - 1; i += 1) { - await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop - } - - const res = await http.get(url, { headers, session }); - - if (res.ok) { - const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item'); - - return scrapeAll(items.slice((page - 1) * 16), channel); - } - - return res.status; -} - -async function fetchScene(url, channel) { - const res = await qu.get(url, 'body > .container'); - - if (res.ok) { - return scrapeScene(res.item, url, channel); - } - - return res.status; -} - async function fetchProfile(baseActor, entity) { - const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item'); + const url = `${new URL(entity.url).origin}/en/pornstars/${baseActor.slug}`; + const res = await unprint.get(`${new URL(entity.url).origin}/en/pornstars/${baseActor.slug}`); - if (searchRes.ok) { - const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name); - - if (model) { - const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' }); - const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos - - if (modelRes.ok) { - return scrapeProfile(modelRes.item, entity); - } - - return modelRes.status; - } - - return null; + if (res.ok) { + return scrapeProfile(res.context, url, entity); } - return searchRes.status; + return res.status; } module.exports = { diff --git a/tests/profiles.js b/tests/profiles.js index a80247b1..b0f9e7ad 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -173,6 +173,7 @@ const actors = [ { entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] }, { entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] }, { entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] }, + { entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] }, ]; const actorScrapers = scrapers.actors;