Added q scraping helper. Added Perfect Gonzo scraper.
After Width: | Height: | Size: 9.1 KiB |
After Width: | Height: | Size: 9.5 KiB |
After Width: | Height: | Size: 28 KiB |
After Width: | Height: | Size: 788 B |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 9.2 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 3.4 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 23 KiB |
|
@ -113,6 +113,12 @@ const networks = [
|
|||
url: 'https://www.naughtyamerica.com',
|
||||
description: 'The best porn movies daily at Naughty America! Experience the most seductive porn stars in stunning virtual reality, 4K and HD porn videos!',
|
||||
},
|
||||
{
|
||||
slug: 'perfectgonzo',
|
||||
name: 'Perfect Gonzo',
|
||||
url: 'https://www.perfectgonzo.com',
|
||||
description: '',
|
||||
},
|
||||
{
|
||||
slug: 'pervcity',
|
||||
name: 'Perv City',
|
||||
|
|
|
@ -2078,6 +2078,67 @@ function getSites(networksMap) {
|
|||
url: 'https://www.naughtyamerica.com/site/live-naughty-nurse',
|
||||
network_id: networksMap.naughtyamerica,
|
||||
},
|
||||
// PERFECT GONZO
|
||||
{
|
||||
slug: 'allinternal',
|
||||
name: 'All Internal',
|
||||
url: 'https://allinternal.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'asstraffic',
|
||||
name: 'Ass Traffic',
|
||||
url: 'https://asstraffic.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'cumforcover',
|
||||
name: 'Cum For Cover',
|
||||
url: 'https://cumforcover.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'fistflush',
|
||||
name: 'Fist Flush',
|
||||
url: 'https://fistflush.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'givemepink',
|
||||
name: 'Give Me Pink',
|
||||
url: 'https://givemepink.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'milfthing',
|
||||
name: 'MILF Thing',
|
||||
url: 'https://milfthing.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'primecups',
|
||||
name: 'Prime Cups',
|
||||
url: 'https://primecups.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'purepov',
|
||||
name: 'Pure POV',
|
||||
url: 'https://purepov.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'spermswap',
|
||||
name: 'Sperm Swap',
|
||||
url: 'https://spermswap.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
{
|
||||
slug: 'tamedteens',
|
||||
name: 'Tamed Teens',
|
||||
url: 'https://tamedteens.com',
|
||||
network_id: networksMap.perfectgonzo,
|
||||
},
|
||||
// PERVCITY
|
||||
{
|
||||
slug: 'analoverdose',
|
||||
|
|
|
@ -296,6 +296,11 @@ function getTags(groupsMap) {
|
|||
slug: 'cum-on-boobs',
|
||||
alias_for: null,
|
||||
},
|
||||
{
|
||||
name: 'cum swapping',
|
||||
slug: 'cum-swapping',
|
||||
alias_for: null,
|
||||
},
|
||||
{
|
||||
name: 'cumshot',
|
||||
slug: 'cumshot',
|
||||
|
@ -756,6 +761,11 @@ function getTags(groupsMap) {
|
|||
alias_for: null,
|
||||
group_id: groupsMap.clothing,
|
||||
},
|
||||
{
|
||||
name: 'solo',
|
||||
slug: 'solo',
|
||||
alias_for: null,
|
||||
},
|
||||
{
|
||||
name: 'spanking',
|
||||
slug: 'spanking',
|
||||
|
@ -1120,6 +1130,10 @@ function getTagAliases(tagsMap) {
|
|||
name: 'creampies',
|
||||
alias_for: tagsMap.creampie,
|
||||
},
|
||||
{
|
||||
name: 'creampie - anal',
|
||||
alias_for: tagsMap['anal-creampie'],
|
||||
},
|
||||
{
|
||||
name: 'crop', // a type of whip, not [sic] short for corporal
|
||||
alias_for: tagsMap['corporal-punishment'],
|
||||
|
@ -1188,6 +1202,10 @@ function getTagAliases(tagsMap) {
|
|||
name: 'doggystyle - standing',
|
||||
alias_for: tagsMap['standing-doggy-style'],
|
||||
},
|
||||
{
|
||||
name: 'doggystyle regular',
|
||||
alias_for: tagsMap['doggy-style'],
|
||||
},
|
||||
{
|
||||
name: 'dom',
|
||||
alias_for: tagsMap.bdsm,
|
||||
|
@ -1536,6 +1554,10 @@ function getTagAliases(tagsMap) {
|
|||
name: 'teens',
|
||||
alias_for: tagsMap.teen,
|
||||
},
|
||||
{
|
||||
name: 'throat fucking',
|
||||
alias_for: tagsMap.facefucking,
|
||||
},
|
||||
{
|
||||
name: 'tiny boobs',
|
||||
alias_for: tagsMap['small-boobs'],
|
||||
|
@ -1598,12 +1620,14 @@ function getSiteTags() {
|
|||
dpparodies: ['parody'],
|
||||
eighteenyearsold: ['teen'],
|
||||
exotic4k: ['4k'],
|
||||
givemepink: ['solo', 'masturbation'],
|
||||
lubed: ['oil'],
|
||||
familystrokes: ['family'],
|
||||
massagecreep: ['massage'],
|
||||
menonedge: ['gay'],
|
||||
povd: ['pov'],
|
||||
puremature: ['milf'],
|
||||
spermswap: ['cum-swapping'],
|
||||
spyfam: ['family'],
|
||||
submissived: ['bdsm'],
|
||||
swallowed: ['blowjob', 'deepthroat', 'facefucking'],
|
||||
|
|
|
@ -52,8 +52,10 @@ async function scrapeScene(scene, site, tokens) {
|
|||
entryId: scene.id,
|
||||
title: scene.title,
|
||||
duration: scene.length,
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
site,
|
||||
meta: {
|
||||
tokens, // attach tokens to reduce number of requests required for deep fetching
|
||||
},
|
||||
};
|
||||
|
||||
release.url = `${site.url}/scene/${release.entryId}/${slugify(release.title, true)}`;
|
||||
|
@ -93,7 +95,7 @@ async function fetchLatest(site, page = 1) {
|
|||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const { time, token } = release?.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { time, token } = release?.meta.tokens || await fetchToken(site); // use attached tokens when deep fetching
|
||||
const { pathname } = new URL(url);
|
||||
const entryId = pathname.split('/')[2];
|
||||
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
'use strict';
|
||||
|
||||
const bhttp = require('bhttp');
|
||||
const blake2 = require('blake2');
|
||||
const knex = require('../knex');
|
||||
|
||||
const { ex, ctxa } = require('../utils/q');
|
||||
|
||||
async function getSiteSlugs() {
|
||||
return knex('sites')
|
||||
.pluck('sites.slug')
|
||||
.join('networks', 'networks.id', 'sites.network_id')
|
||||
.where('networks.slug', 'perfectgonzo');
|
||||
}
|
||||
|
||||
function getHash(identifier) {
|
||||
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
||||
|
||||
hash.update(Buffer.from(identifier));
|
||||
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
function extractMaleModelsFromTags(tagContainer) {
|
||||
if (!tagContainer) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tagEls = Array.from(tagContainer.childNodes, node => ({ type: node.nodeType, text: node.textContent.trim() })).filter(node => node.text.length > 0);
|
||||
const modelLabelIndex = tagEls.findIndex(node => node.text === 'Male Models');
|
||||
|
||||
if (modelLabelIndex > -1) {
|
||||
const nextLabelIndex = tagEls.findIndex((node, index) => index > modelLabelIndex && node.type === 3);
|
||||
const maleModels = tagEls.slice(modelLabelIndex + 1, nextLabelIndex);
|
||||
|
||||
return maleModels.map(model => model.text);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function extractChannelFromPhoto(photo, metaSiteSlugs) {
|
||||
const siteSlugs = metaSiteSlugs || await getSiteSlugs();
|
||||
const channelMatch = photo.match(new RegExp(siteSlugs.join('|')));
|
||||
|
||||
if (channelMatch) {
|
||||
return channelMatch[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeLatest(html, site) {
|
||||
const siteSlugs = await getSiteSlugs();
|
||||
const { element } = ex(html);
|
||||
|
||||
return ctxa(element, '#content-main .itemm').map(({
|
||||
q, qa, qlength, qdate, qimages,
|
||||
}) => {
|
||||
const release = {
|
||||
site,
|
||||
meta: {
|
||||
siteSlugs,
|
||||
},
|
||||
};
|
||||
|
||||
const sceneLink = q('a');
|
||||
|
||||
release.title = sceneLink.title;
|
||||
release.url = `${site.url}${sceneLink.href}`;
|
||||
release.date = qdate('.nm-date', 'MM/DD/YYYY');
|
||||
|
||||
const slug = new URL(release.url).pathname.split('/')[2];
|
||||
release.entryId = getHash(`${site.slug}${slug}${release.date.toISOString()}`);
|
||||
|
||||
release.actors = release.title.split('&').map(actor => actor.trim());
|
||||
|
||||
[release.poster, ...release.photos] = qimages('.bloc-link img');
|
||||
|
||||
release.tags = qa('.dropdown ul a', true).slice(1);
|
||||
release.duration = qlength('.dropdown p:first-child');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, site, url, metaSiteSlugs) {
|
||||
const {
|
||||
q, qa, qlength, qdate, qposter, qtrailer,
|
||||
} = ex(html);
|
||||
|
||||
const release = { url, site };
|
||||
|
||||
release.title = q('#movie-header h2', true);
|
||||
release.date = qdate('#movie-header div span', 'MMMM DD, YYYY', /\w+ \d{1,2}, \d{4}/);
|
||||
|
||||
release.description = q('.container .mg-md', true);
|
||||
release.duration = qlength('#video-ribbon .container > div > span:nth-child(3)');
|
||||
|
||||
release.actors = qa('#video-info a', true).concat(extractMaleModelsFromTags(q('.tag-container')));
|
||||
release.tags = qa('.tag-container a', true);
|
||||
|
||||
const uhd = q('#video-ribbon .container > div > span:nth-child(2)', true);
|
||||
if (/4K/.test(uhd)) release.tags = release.tags.concat('4k');
|
||||
|
||||
release.photos = qa('.bxslider_pics img').map(el => el.dataset.original || el.src);
|
||||
release.poster = qposter();
|
||||
|
||||
const trailer = qtrailer();
|
||||
if (trailer) release.trailer = { src: trailer };
|
||||
|
||||
if (release.photos.length > 0) release.channel = await extractChannelFromPhoto(release.photos[0], metaSiteSlugs);
|
||||
|
||||
if (release.channel) {
|
||||
const { pathname } = new URL(url);
|
||||
release.url = `https://${release.channel}.com${pathname}`;
|
||||
|
||||
const slug = pathname.split('/')[2];
|
||||
release.entryId = getHash(`${release.channel}${slug}${release.date.toISOString()}`);
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = `${site.url}/movies/page-${page}`;
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeLatest(res.body.toString(), site);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchScene(url, site, release) {
|
||||
const res = await bhttp.get(url);
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeScene(res.body.toString(), site, url, release?.meta.siteSlugs);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
};
|
|
@ -3,13 +3,12 @@
|
|||
const bhttp = require('bhttp');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
const ex = require('../utils/ex');
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const s = ex(html);
|
||||
const { document } = new JSDOM(html).window;
|
||||
const { origin } = new URL(site.url);
|
||||
|
||||
const videos = s.qa('.video-releases-list').slice(-1)[0];
|
||||
const videos = document.querySelectorAll('.video-releases-list').slice(-1)[0];
|
||||
|
||||
return Array.from(videos.querySelectorAll('.card'), (scene) => {
|
||||
const release = { site };
|
||||
|
|
|
@ -14,6 +14,7 @@ const jayrock = require('./jayrock');
|
|||
const kink = require('./kink');
|
||||
const mikeadriano = require('./mikeadriano');
|
||||
const mofos = require('./mofos');
|
||||
const perfectgonzo = require('./perfectgonzo');
|
||||
const pervcity = require('./pervcity');
|
||||
const pornpros = require('./pornpros');
|
||||
const privateNetwork = require('./private'); // reserved keyword
|
||||
|
@ -56,6 +57,7 @@ module.exports = {
|
|||
legalporno,
|
||||
mikeadriano,
|
||||
mofos,
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pornpros,
|
||||
private: privateNetwork,
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
function q(context, selector) {
|
||||
return context.querySelector(selector);
|
||||
}
|
||||
|
||||
function qa(context, selector) {
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function ex(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return {
|
||||
document,
|
||||
q: selector => q(document, selector),
|
||||
qa: selector => qa(document, selector),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = ex;
|
|
@ -0,0 +1,107 @@
|
|||
'use strict';
|
||||
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
function q(context, selector, attrArg, trim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
const value = context.querySelector(selector)[attr];
|
||||
|
||||
return trim ? value.trim() : value;
|
||||
}
|
||||
|
||||
return context.querySelector(selector);
|
||||
}
|
||||
|
||||
function qall(context, selector, attrArg, trim = true) {
|
||||
const attr = attrArg === true ? 'textContent' : attrArg;
|
||||
|
||||
if (attr) {
|
||||
return Array.from(context.querySelectorAll(selector), el => (trim ? el[attr]?.trim() : el[attr]));
|
||||
}
|
||||
|
||||
return Array.from(context.querySelectorAll(selector));
|
||||
}
|
||||
|
||||
function qdate(context, selector, format, match, attr = 'textContent') {
|
||||
const dateString = context.querySelector(selector)[attr];
|
||||
|
||||
if (match) {
|
||||
const dateStamp = dateString.match(match);
|
||||
|
||||
if (dateStamp) return moment.utc(dateStamp[0], format).toDate();
|
||||
return null;
|
||||
}
|
||||
|
||||
return moment.utc(dateString.trim(), format).toDate();
|
||||
}
|
||||
|
||||
function qimages(context, selector = 'img', attr = 'src') {
|
||||
return qall(context, selector, attr);
|
||||
}
|
||||
|
||||
function qposter(context, selector = 'video', attr = 'poster') {
|
||||
return q(context, selector, attr);
|
||||
}
|
||||
|
||||
function qtrailer(context, selector = 'source', attr = 'src') {
|
||||
return q(context, selector, attr);
|
||||
}
|
||||
|
||||
function qlength(context, selector, attr = 'textContent') {
|
||||
const durationString = q(context, selector, attr);
|
||||
const duration = durationString.match(/(\d+:)?\d+:\d+/);
|
||||
|
||||
if (duration) {
|
||||
const segments = ['00'].concat(duration[0].split(':')).slice(-3);
|
||||
|
||||
return moment.duration(segments.join(':')).asSeconds();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const funcs = {
|
||||
q,
|
||||
qall,
|
||||
qdate,
|
||||
qimages,
|
||||
qposter,
|
||||
qlength,
|
||||
qtrailer,
|
||||
qa: qall,
|
||||
qd: qdate,
|
||||
qi: qimages,
|
||||
qp: qposter,
|
||||
ql: qlength,
|
||||
qt: qtrailer,
|
||||
};
|
||||
|
||||
function ctx(element) {
|
||||
const contextFuncs = Object.entries(funcs)
|
||||
.reduce((acc, [key, func]) => ({ ...acc, [key]: (...args) => func(element, ...args) }), {});
|
||||
|
||||
return {
|
||||
element,
|
||||
...contextFuncs,
|
||||
};
|
||||
}
|
||||
|
||||
function ctxa(context, selector) {
|
||||
return Array.from(context.querySelectorAll(selector)).map(element => ctx(element));
|
||||
}
|
||||
|
||||
function ex(html) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
return ctx(document);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ex,
|
||||
ctx,
|
||||
ctxa,
|
||||
...funcs,
|
||||
};
|