Added PornCZ.

This commit is contained in:
DebaucheryLibrarian
2020-07-22 04:12:20 +02:00
parent 9d89a38490
commit 46c6c4dd21
125 changed files with 462 additions and 7 deletions

View File

@@ -9,7 +9,8 @@ const slugify = require('../utils/slugify');
function extractTitle(originalTitle) {
const titleComponents = originalTitle.split(' ');
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM)\d+/); // detect studio prefixes
// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;

119
src/scrapers/porncz.js Normal file
View File

@@ -0,0 +1,119 @@
'use strict';
const http = require('../utils/http');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
const capitalize = require('../utils/capitalize');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('h4 a', 'href', { origin: channel.url });
release.entryId = new URL(release.url).pathname.match(/\d+$/)[0];
release.title = query.cnt('h4 a');
release.duration = query.duration('.product-item-time');
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
console.log(release);
return release;
});
}
function scrapeScene({ query }, url, channel) {
const release = {};
release.entryId = new URL(url).pathname.match(/\d+$/)[0];
release.title = query.cnt('.heading-detail h1');
release.description = query.cnt('.heading-detail p:nth-child(3)');
const details = query.all('.video-info-item').reduce((acc, detailEl) => {
const key = detailEl.textContent.match(/(\w+):/)[1];
return { ...acc, [slugify(key, '_')]: detailEl };
}, {});
const { date, precision } = query.dateAgo(details.date);
release.date = date;
release.datePrecision = precision;
release.actors = query.cnts(details.actors, 'a').map(actor => capitalize(actor, { uncapitalize: true }));
release.duration = query.duration(details.duration);
release.tags = query.cnts(details.genres, 'a');
release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url });
release.photos = query.imgs('#gallery .photo-item img', 'src', { origin: channel.url });
release.trailer = query.video('.trailer source');
release.channel = slugify(query.q('.video-detail-logo img', 'alt'), '');
return release;
}
function scrapeProfile({ query }, entity) {
const profile = {};
profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url });
profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity);
return profile;
}
async function fetchLatest(channel, _page = 1) {
const url = `${channel.url}/en/new-videos?option=date&do=sort`;
// TODO: session
await http.head(url);
const res = await qu.getAll(url, '.product-item');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url, 'body > .container');
if (res.ok) {
return scrapeScene(res.item, url, channel);
}
return res.status;
}
async function fetchProfile(baseActor, entity) {
const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item');
if (searchRes.ok) {
const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name);
if (model) {
const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' });
const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos
if (modelRes.ok) {
return scrapeProfile(modelRes.item, entity);
}
return modelRes.status;
}
return null;
}
return searchRes.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
};

View File

@@ -47,6 +47,7 @@ const newsensations = require('./newsensations');
const nubiles = require('./nubiles');
const perfectgonzo = require('./perfectgonzo');
const pervcity = require('./pervcity');
const porncz = require('./porncz');
const pornhub = require('./pornhub');
const whalemember = require('./whalemember');
const privateNetwork = require('./private'); // reserved keyword
@@ -128,6 +129,7 @@ module.exports = {
perfectgonzo,
pervcity,
pimpxxx: cherrypimps,
porncz,
pornpros: whalemember,
private: privateNetwork,
puretaboo,
@@ -217,6 +219,7 @@ module.exports = {
pimpxxx: cherrypimps,
letsdoeit: porndoe,
mamacitaz: porndoe,
porncz,
pornhub,
povperverts: fullpornnetwork,
povpornstars: hush,

View File

@@ -1,13 +1,13 @@
'use strict';
function capitalize(string, trim = true) {
function capitalize(string, { trim = true, uncapitalize = false } = {}) {
if (!string) {
return '';
}
const capitalized = string
.split(/\s+/)
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
.map(component => `${component.charAt(0).toUpperCase()}${uncapitalize ? component.slice(1).toLowerCase() : component.slice(1)}`)
.join(' ');
return trim ? capitalized.trim() : capitalized;

View File

@@ -112,6 +112,15 @@ async function get(url, headers, options) {
});
}
async function head(url, headers, options) {
return queue.push(options?.queueMethod || defaultQueueMethod, {
method: 'HEAD',
url,
headers,
options,
});
}
async function post(url, body, headers, options) {
return queue.push(options?.queueMethod || defaultQueueMethod, {
method: 'POST',
@@ -125,4 +134,5 @@ async function post(url, body, headers, options) {
module.exports = {
get,
post,
head,
};

View File

@@ -170,6 +170,28 @@ function date(context, selector, format, match, attr = 'textContent') {
return extractDate(dateString, format, match);
}
function dateAgo(context, selector, match = /(\d+)\s*(\w+)/, attr = 'textContent') {
const timeString = q(context, selector, attr, 'textContent');
if (!timeString) {
return null;
}
const timeMatch = timeString.match(match);
if (timeMatch) {
const [n, period] = timeMatch.slice(1);
const thenDate = moment.utc().subtract(Number(n), period);
return {
date: thenDate.toDate(),
precision: period.replace(/s$/, ''),
};
}
return null;
}
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
const imageEl = (attr && q(context, selector, attr))
|| q(context, selector, 'data-src')
@@ -286,6 +308,7 @@ const quFuncs = {
cnt: content,
cnts: contents,
date,
dateAgo,
dur: duration,
duration,
element: q,