Added q scraping helper. Added Perfect Gonzo scraper.
| After Width: | Height: | Size: 9.1 KiB | 
| After Width: | Height: | Size: 9.5 KiB | 
| After Width: | Height: | Size: 28 KiB | 
| After Width: | Height: | Size: 788 B | 
| After Width: | Height: | Size: 36 KiB | 
| After Width: | Height: | Size: 9.2 KiB | 
| After Width: | Height: | Size: 36 KiB | 
| After Width: | Height: | Size: 3.4 KiB | 
| After Width: | Height: | Size: 18 KiB | 
| After Width: | Height: | Size: 19 KiB | 
| After Width: | Height: | Size: 25 KiB | 
| After Width: | Height: | Size: 56 KiB | 
| After Width: | Height: | Size: 22 KiB | 
| After Width: | Height: | Size: 23 KiB | 
|  | @ -113,6 +113,12 @@ const networks = [ | |||
|         url: 'https://www.naughtyamerica.com', | ||||
|         description: 'The best porn movies daily at Naughty America! Experience the most seductive porn stars in stunning virtual reality, 4K and HD porn videos!', | ||||
|     }, | ||||
|     { | ||||
|         slug: 'perfectgonzo', | ||||
|         name: 'Perfect Gonzo', | ||||
|         url: 'https://www.perfectgonzo.com', | ||||
|         description: '', | ||||
|     }, | ||||
|     { | ||||
|         slug: 'pervcity', | ||||
|         name: 'Perv City', | ||||
|  |  | |||
|  | @ -2078,6 +2078,67 @@ function getSites(networksMap) { | |||
|             url: 'https://www.naughtyamerica.com/site/live-naughty-nurse', | ||||
|             network_id: networksMap.naughtyamerica, | ||||
|         }, | ||||
|         // PERFECT GONZO
 | ||||
|         { | ||||
|             slug: 'allinternal', | ||||
|             name: 'All Internal', | ||||
|             url: 'https://allinternal.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'asstraffic', | ||||
|             name: 'Ass Traffic', | ||||
|             url: 'https://asstraffic.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'cumforcover', | ||||
|             name: 'Cum For Cover', | ||||
|             url: 'https://cumforcover.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'fistflush', | ||||
|             name: 'Fist Flush', | ||||
|             url: 'https://fistflush.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'givemepink', | ||||
|             name: 'Give Me Pink', | ||||
|             url: 'https://givemepink.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'milfthing', | ||||
|             name: 'MILF Thing', | ||||
|             url: 'https://milfthing.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'primecups', | ||||
|             name: 'Prime Cups', | ||||
|             url: 'https://primecups.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'purepov', | ||||
|             name: 'Pure POV', | ||||
|             url: 'https://purepov.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'spermswap', | ||||
|             name: 'Sperm Swap', | ||||
|             url: 'https://spermswap.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         { | ||||
|             slug: 'tamedteens', | ||||
|             name: 'Tamed Teens', | ||||
|             url: 'https://tamedteens.com', | ||||
|             network_id: networksMap.perfectgonzo, | ||||
|         }, | ||||
|         // PERVCITY
 | ||||
|         { | ||||
|             slug: 'analoverdose', | ||||
|  |  | |||
|  | @ -296,6 +296,11 @@ function getTags(groupsMap) { | |||
|             slug: 'cum-on-boobs', | ||||
|             alias_for: null, | ||||
|         }, | ||||
|         { | ||||
|             name: 'cum swapping', | ||||
|             slug: 'cum-swapping', | ||||
|             alias_for: null, | ||||
|         }, | ||||
|         { | ||||
|             name: 'cumshot', | ||||
|             slug: 'cumshot', | ||||
|  | @ -756,6 +761,11 @@ function getTags(groupsMap) { | |||
|             alias_for: null, | ||||
|             group_id: groupsMap.clothing, | ||||
|         }, | ||||
|         { | ||||
|             name: 'solo', | ||||
|             slug: 'solo', | ||||
|             alias_for: null, | ||||
|         }, | ||||
|         { | ||||
|             name: 'spanking', | ||||
|             slug: 'spanking', | ||||
|  | @ -1120,6 +1130,10 @@ function getTagAliases(tagsMap) { | |||
|             name: 'creampies', | ||||
|             alias_for: tagsMap.creampie, | ||||
|         }, | ||||
|         { | ||||
|             name: 'creampie - anal', | ||||
|             alias_for: tagsMap['anal-creampie'], | ||||
|         }, | ||||
|         { | ||||
|             name: 'crop', // a type of whip, not [sic] short for corporal
 | ||||
|             alias_for: tagsMap['corporal-punishment'], | ||||
|  | @ -1188,6 +1202,10 @@ function getTagAliases(tagsMap) { | |||
|             name: 'doggystyle - standing', | ||||
|             alias_for: tagsMap['standing-doggy-style'], | ||||
|         }, | ||||
|         { | ||||
|             name: 'doggystyle regular', | ||||
|             alias_for: tagsMap['doggy-style'], | ||||
|         }, | ||||
|         { | ||||
|             name: 'dom', | ||||
|             alias_for: tagsMap.bdsm, | ||||
|  | @ -1536,6 +1554,10 @@ function getTagAliases(tagsMap) { | |||
|             name: 'teens', | ||||
|             alias_for: tagsMap.teen, | ||||
|         }, | ||||
|         { | ||||
|             name: 'throat fucking', | ||||
|             alias_for: tagsMap.facefucking, | ||||
|         }, | ||||
|         { | ||||
|             name: 'tiny boobs', | ||||
|             alias_for: tagsMap['small-boobs'], | ||||
|  | @ -1598,12 +1620,14 @@ function getSiteTags() { | |||
|         dpparodies: ['parody'], | ||||
|         eighteenyearsold: ['teen'], | ||||
|         exotic4k: ['4k'], | ||||
|         givemepink: ['solo', 'masturbation'], | ||||
|         lubed: ['oil'], | ||||
|         familystrokes: ['family'], | ||||
|         massagecreep: ['massage'], | ||||
|         menonedge: ['gay'], | ||||
|         povd: ['pov'], | ||||
|         puremature: ['milf'], | ||||
|         spermswap: ['cum-swapping'], | ||||
|         spyfam: ['family'], | ||||
|         submissived: ['bdsm'], | ||||
|         swallowed: ['blowjob', 'deepthroat', 'facefucking'], | ||||
|  |  | |||
|  | @ -52,8 +52,10 @@ async function scrapeScene(scene, site, tokens) { | |||
|         entryId: scene.id, | ||||
|         title: scene.title, | ||||
|         duration: scene.length, | ||||
|         tokens, // attach tokens to reduce number of requests required for deep fetching
 | ||||
|         site, | ||||
|         meta: { | ||||
|             tokens, // attach tokens to reduce number of requests required for deep fetching
 | ||||
|         }, | ||||
|     }; | ||||
| 
 | ||||
|     release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, true)}`; | ||||
|  | @ -93,7 +95,7 @@ async function fetchLatest(site, page = 1) { | |||
| } | ||||
| 
 | ||||
| async function fetchScene(url, site, release) { | ||||
|     const { time, token } = release?.tokens || await fetchToken(site); // use attached tokens when deep fetching
 | ||||
|     const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
 | ||||
|     const { pathname } = new URL(url); | ||||
|     const entryId = pathname.split('/')[2]; | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,149 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const bhttp = require('bhttp'); | ||||
| const blake2 = require('blake2'); | ||||
| const knex = require('../knex'); | ||||
| 
 | ||||
| const { ex, ctxa } = require('../utils/q'); | ||||
| 
 | ||||
| async function getSiteSlugs() { | ||||
|     return knex('sites') | ||||
|         .pluck('sites.slug') | ||||
|         .join('networks', 'networks.id', 'sites.network_id') | ||||
|         .where('networks.slug', 'perfectgonzo'); | ||||
| } | ||||
| 
 | ||||
| function getHash(identifier) { | ||||
|     const hash = blake2.createHash('blake2b', { digestLength: 8 }); | ||||
| 
 | ||||
|     hash.update(Buffer.from(identifier)); | ||||
| 
 | ||||
|     return hash.digest('hex'); | ||||
| } | ||||
| 
 | ||||
| function extractMaleModelsFromTags(tagContainer) { | ||||
|     if (!tagContainer) { | ||||
|         return []; | ||||
|     } | ||||
| 
 | ||||
|     const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0); | ||||
|     const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models'); | ||||
| 
 | ||||
|     if (modelLabelIndex > -1) { | ||||
|         const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3); | ||||
|         const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex); | ||||
| 
 | ||||
|         return maleModels.map(model => model.text); | ||||
|     } | ||||
| 
 | ||||
|     return []; | ||||
| } | ||||
| 
 | ||||
| async function extractChannelFromPhoto(photo, metaSiteSlugs) { | ||||
|     const siteSlugs = metaSiteSlugs || await getSiteSlugs(); | ||||
|     const channelMatch = photo.match(new RegExp(siteSlugs.join('|'))); | ||||
| 
 | ||||
|     if (channelMatch) { | ||||
|         return channelMatch[0]; | ||||
|     } | ||||
| 
 | ||||
|     return null; | ||||
| } | ||||
| 
 | ||||
| async function scrapeLatest(html, site) { | ||||
|     const siteSlugs = await getSiteSlugs(); | ||||
|     const { element } = ex(html); | ||||
| 
 | ||||
|     return ctxa(element, '#content-main .itemm').map(({ | ||||
|         q, qa, qlength, qdate, qimages, | ||||
|     }) => { | ||||
|         const release = { | ||||
|             site, | ||||
|             meta: { | ||||
|                 siteSlugs, | ||||
|             }, | ||||
|         }; | ||||
| 
 | ||||
|         const sceneLink = q('a'); | ||||
| 
 | ||||
|         release.title = sceneLink.title; | ||||
|         release.url = `${site.url}${sceneLink.href}`; | ||||
|         release.date = qdate('.nm-date', 'MM/DD/YYYY'); | ||||
| 
 | ||||
|         const slug = new URL(release.url).pathname.split('/')[2]; | ||||
|         release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`); | ||||
| 
 | ||||
|         release.actors = release.title.split('&').map(actor => actor.trim()); | ||||
| 
 | ||||
|         [release.poster, ...release.photos] = qimages('.bloc-link img'); | ||||
| 
 | ||||
|         release.tags = qa('.dropdown ul a', true).slice(1); | ||||
|         release.duration = qlength('.dropdown p:first-child'); | ||||
| 
 | ||||
|         return release; | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| async function scrapeScene(html, site, url, metaSiteSlugs) { | ||||
|     const { | ||||
|         q, qa, qlength, qdate, qposter, qtrailer, | ||||
|     } = ex(html); | ||||
| 
 | ||||
|     const release = { url, site }; | ||||
| 
 | ||||
|     release.title = q('#movie-header h2', true); | ||||
|     release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/); | ||||
| 
 | ||||
|     release.description = q('.container .mg-md', true); | ||||
|     release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)'); | ||||
| 
 | ||||
|     release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container'))); | ||||
|     release.tags = qa('.tag-container a', true); | ||||
| 
 | ||||
|     const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true); | ||||
|     if (/4K/.test(uhd)) release.tags = release.tags.concat('4k'); | ||||
| 
 | ||||
|     release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src); | ||||
|     release.poster = qposter(); | ||||
| 
 | ||||
|     const trailer = qtrailer(); | ||||
|     if (trailer) release.trailer = { src: trailer }; | ||||
| 
 | ||||
|     if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs); | ||||
| 
 | ||||
|     if (release.channel) { | ||||
|         const { pathname } = new URL(url); | ||||
|         release.url = `https://${release.channel}.com${pathname}`; | ||||
| 
 | ||||
|         const slug = pathname.split('/')[2]; | ||||
|         release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`); | ||||
|     } | ||||
| 
 | ||||
|     return release; | ||||
| } | ||||
| 
 | ||||
| async function fetchLatest(site, page = 1) { | ||||
|     const url = `${site.url}/movies/page-${page}`; | ||||
|     const res = await bhttp.get(url); | ||||
| 
 | ||||
|     if (res.statusCode === 200) { | ||||
|         return scrapeLatest(res.body.toString(), site); | ||||
|     } | ||||
| 
 | ||||
|     return []; | ||||
| } | ||||
| 
 | ||||
| async function fetchScene(url, site, release) { | ||||
|     const res = await bhttp.get(url); | ||||
| 
 | ||||
|     if (res.statusCode === 200) { | ||||
|         return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs); | ||||
|     } | ||||
| 
 | ||||
|     return []; | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|     fetchLatest, | ||||
|     fetchScene, | ||||
| }; | ||||
|  | @ -3,13 +3,12 @@ | |||
| const bhttp = require('bhttp'); | ||||
| const { JSDOM } = require('jsdom'); | ||||
| const moment = require('moment'); | ||||
| const ex = require('../utils/ex'); | ||||
| 
 | ||||
| function scrapeLatest(html, site) { | ||||
|     const s = ex(html); | ||||
|     const { document } = new JSDOM(html).window; | ||||
|     const { origin } = new URL(site.url); | ||||
| 
 | ||||
|     const videos = s.qa('.video-releases-list').slice(-1)[0]; | ||||
|     const videos = document.querySelectorAll('.video-releases-list').slice(-1)[0]; | ||||
| 
 | ||||
|     return Array.from(videos.querySelectorAll('.card'), (scene) => { | ||||
|         const release = { site }; | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ const jayrock = require('./jayrock'); | |||
| const kink = require('./kink'); | ||||
| const mikeadriano = require('./mikeadriano'); | ||||
| const mofos = require('./mofos'); | ||||
| const perfectgonzo = require('./perfectgonzo'); | ||||
| const pervcity = require('./pervcity'); | ||||
| const pornpros = require('./pornpros'); | ||||
| const privateNetwork = require('./private'); // reserved keyword
 | ||||
|  | @ -56,6 +57,7 @@ module.exports = { | |||
|         legalporno, | ||||
|         mikeadriano, | ||||
|         mofos, | ||||
|         perfectgonzo, | ||||
|         pervcity, | ||||
|         pornpros, | ||||
|         private: privateNetwork, | ||||
|  |  | |||
|  | @ -1,23 +0,0 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const { JSDOM } = require('jsdom'); | ||||
| 
 | ||||
| function q(context, selector) { | ||||
|     return context.querySelector(selector); | ||||
| } | ||||
| 
 | ||||
| function qa(context, selector) { | ||||
|     return Array.from(context.querySelectorAll(selector)); | ||||
| } | ||||
| 
 | ||||
| function ex(html) { | ||||
|     const { document } = new JSDOM(html).window; | ||||
| 
 | ||||
|     return { | ||||
|         document, | ||||
|         q: selector => q(document, selector), | ||||
|         qa: selector => qa(document, selector), | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| module.exports = ex; | ||||
|  | @ -0,0 +1,107 @@ | |||
| 'use strict'; | ||||
| 
 | ||||
| const { JSDOM } = require('jsdom'); | ||||
| const moment = require('moment'); | ||||
| 
 | ||||
| function q(context, selector, attrArg, trim = true) { | ||||
|     const attr = attrArg === true ? 'textContent' : attrArg; | ||||
| 
 | ||||
|     if (attr) { | ||||
|         const value = context.querySelector(selector)[attr]; | ||||
| 
 | ||||
|         return trim ? value.trim() : value; | ||||
|     } | ||||
| 
 | ||||
|     return context.querySelector(selector); | ||||
| } | ||||
| 
 | ||||
| function qall(context, selector, attrArg, trim = true) { | ||||
|     const attr = attrArg === true ? 'textContent' : attrArg; | ||||
| 
 | ||||
|     if (attr) { | ||||
|         return Array.from(context.querySelectorAll(selector), el => (trim ? el[attr]?.trim() : el[attr])); | ||||
|     } | ||||
| 
 | ||||
|     return Array.from(context.querySelectorAll(selector)); | ||||
| } | ||||
| 
 | ||||
| function qdate(context, selector, format, match, attr = 'textContent') { | ||||
|     const dateString = context.querySelector(selector)[attr]; | ||||
| 
 | ||||
|     if (match) { | ||||
|         const dateStamp = dateString.match(match); | ||||
| 
 | ||||
|         if (dateStamp) return moment.utc(dateStamp[0], format).toDate(); | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     return moment.utc(dateString.trim(), format).toDate(); | ||||
| } | ||||
| 
 | ||||
| function qimages(context, selector = 'img', attr = 'src') { | ||||
|     return qall(context, selector, attr); | ||||
| } | ||||
| 
 | ||||
| function qposter(context, selector = 'video', attr = 'poster') { | ||||
|     return q(context, selector, attr); | ||||
| } | ||||
| 
 | ||||
| function qtrailer(context, selector = 'source', attr = 'src') { | ||||
|     return q(context, selector, attr); | ||||
| } | ||||
| 
 | ||||
| function qlength(context, selector, attr = 'textContent') { | ||||
|     const durationString = q(context, selector, attr); | ||||
|     const duration = durationString.match(/(\d+:)?\d+:\d+/); | ||||
| 
 | ||||
|     if (duration) { | ||||
|         const segments = ['00'].concat(duration[0].split(':')).slice(-3); | ||||
| 
 | ||||
|         return moment.duration(segments.join(':')).asSeconds(); | ||||
|     } | ||||
| 
 | ||||
|     return null; | ||||
| } | ||||
| 
 | ||||
| const funcs = { | ||||
|     q, | ||||
|     qall, | ||||
|     qdate, | ||||
|     qimages, | ||||
|     qposter, | ||||
|     qlength, | ||||
|     qtrailer, | ||||
|     qa: qall, | ||||
|     qd: qdate, | ||||
|     qi: qimages, | ||||
|     qp: qposter, | ||||
|     ql: qlength, | ||||
|     qt: qtrailer, | ||||
| }; | ||||
| 
 | ||||
| function ctx(element) { | ||||
|     const contextFuncs = Object.entries(funcs) | ||||
|         .reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => func(element, ...args) }), {}); | ||||
| 
 | ||||
|     return { | ||||
|         element, | ||||
|         ...contextFuncs, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| function ctxa(context, selector) { | ||||
|     return Array.from(context.querySelectorAll(selector)).map(element => ctx(element)); | ||||
| } | ||||
| 
 | ||||
| function ex(html) { | ||||
|     const { document } = new JSDOM(html).window; | ||||
| 
 | ||||
|     return ctx(document); | ||||
| } | ||||
| 
 | ||||
| module.exports = { | ||||
|     ex, | ||||
|     ctx, | ||||
|     ctxa, | ||||
|     ...funcs, | ||||
| }; | ||||