Added PornCZ.
This commit is contained in:
@@ -9,7 +9,8 @@ const slugify = require('../utils/slugify');
|
||||
|
||||
function extractTitle(originalTitle) {
|
||||
const titleComponents = originalTitle.split(' ');
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM)\d+/); // detect studio prefixes
|
||||
// const sceneIdMatch = titleComponents.slice(-1)[0].match(/(AB|AF|GP|SZ|IV|GIO|RS|TW|MA|FM|SAL|NR|AA|GL|BZ|FS|KS|OTS|NF|NT|AX|RV|CM|BTG)\d+/); // detect studio prefixes
|
||||
const sceneIdMatch = titleComponents.slice(-1)[0].match(/\w+\d+\s*$/); // detect studio prefixes
|
||||
const shootId = sceneIdMatch ? sceneIdMatch[0] : null;
|
||||
const title = sceneIdMatch ? titleComponents.slice(0, -1).join(' ') : originalTitle;
|
||||
|
||||
|
||||
119
src/scrapers/porncz.js
Normal file
119
src/scrapers/porncz.js
Normal file
@@ -0,0 +1,119 @@
|
||||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('h4 a', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\d+$/)[0];
|
||||
|
||||
release.title = query.cnt('h4 a');
|
||||
release.duration = query.duration('.product-item-time');
|
||||
|
||||
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
|
||||
|
||||
console.log(release);
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\d+$/)[0];
|
||||
|
||||
release.title = query.cnt('.heading-detail h1');
|
||||
release.description = query.cnt('.heading-detail p:nth-child(3)');
|
||||
|
||||
const details = query.all('.video-info-item').reduce((acc, detailEl) => {
|
||||
const key = detailEl.textContent.match(/(\w+):/)[1];
|
||||
|
||||
return { ...acc, [slugify(key, '_')]: detailEl };
|
||||
}, {});
|
||||
|
||||
const { date, precision } = query.dateAgo(details.date);
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
|
||||
release.actors = query.cnts(details.actors, 'a').map(actor => capitalize(actor, { uncapitalize: true }));
|
||||
release.duration = query.duration(details.duration);
|
||||
release.tags = query.cnts(details.genres, 'a');
|
||||
|
||||
release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url });
|
||||
release.photos = query.imgs('#gallery .photo-item img', 'src', { origin: channel.url });
|
||||
|
||||
release.trailer = query.video('.trailer source');
|
||||
|
||||
release.channel = slugify(query.q('.video-detail-logo img', 'alt'), '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, entity) {
|
||||
const profile = {};
|
||||
|
||||
profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url });
|
||||
profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, _page = 1) {
|
||||
const url = `${channel.url}/en/new-videos?option=date&do=sort`;
|
||||
|
||||
// TODO: session
|
||||
await http.head(url);
|
||||
const res = await qu.getAll(url, '.product-item');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url, 'body > .container');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, entity) {
|
||||
const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item');
|
||||
|
||||
if (searchRes.ok) {
|
||||
const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name);
|
||||
|
||||
if (model) {
|
||||
const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' });
|
||||
const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos
|
||||
|
||||
if (modelRes.ok) {
|
||||
return scrapeProfile(modelRes.item, entity);
|
||||
}
|
||||
|
||||
return modelRes.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return searchRes.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchScene,
|
||||
fetchProfile,
|
||||
};
|
||||
@@ -47,6 +47,7 @@ const newsensations = require('./newsensations');
|
||||
const nubiles = require('./nubiles');
|
||||
const perfectgonzo = require('./perfectgonzo');
|
||||
const pervcity = require('./pervcity');
|
||||
const porncz = require('./porncz');
|
||||
const pornhub = require('./pornhub');
|
||||
const whalemember = require('./whalemember');
|
||||
const privateNetwork = require('./private'); // reserved keyword
|
||||
@@ -128,6 +129,7 @@ module.exports = {
|
||||
perfectgonzo,
|
||||
pervcity,
|
||||
pimpxxx: cherrypimps,
|
||||
porncz,
|
||||
pornpros: whalemember,
|
||||
private: privateNetwork,
|
||||
puretaboo,
|
||||
@@ -217,6 +219,7 @@ module.exports = {
|
||||
pimpxxx: cherrypimps,
|
||||
letsdoeit: porndoe,
|
||||
mamacitaz: porndoe,
|
||||
porncz,
|
||||
pornhub,
|
||||
povperverts: fullpornnetwork,
|
||||
povpornstars: hush,
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
'use strict';
|
||||
|
||||
function capitalize(string, trim = true) {
|
||||
function capitalize(string, { trim = true, uncapitalize = false } = {}) {
|
||||
if (!string) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const capitalized = string
|
||||
.split(/\s+/)
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${component.slice(1)}`)
|
||||
.map(component => `${component.charAt(0).toUpperCase()}${uncapitalize ? component.slice(1).toLowerCase() : component.slice(1)}`)
|
||||
.join(' ');
|
||||
|
||||
return trim ? capitalized.trim() : capitalized;
|
||||
|
||||
@@ -112,6 +112,15 @@ async function get(url, headers, options) {
|
||||
});
|
||||
}
|
||||
|
||||
async function head(url, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'HEAD',
|
||||
url,
|
||||
headers,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
async function post(url, body, headers, options) {
|
||||
return queue.push(options?.queueMethod || defaultQueueMethod, {
|
||||
method: 'POST',
|
||||
@@ -125,4 +134,5 @@ async function post(url, body, headers, options) {
|
||||
module.exports = {
|
||||
get,
|
||||
post,
|
||||
head,
|
||||
};
|
||||
|
||||
@@ -170,6 +170,28 @@ function date(context, selector, format, match, attr = 'textContent') {
|
||||
return extractDate(dateString, format, match);
|
||||
}
|
||||
|
||||
function dateAgo(context, selector, match = /(\d+)\s*(\w+)/, attr = 'textContent') {
|
||||
const timeString = q(context, selector, attr, 'textContent');
|
||||
|
||||
if (!timeString) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const timeMatch = timeString.match(match);
|
||||
|
||||
if (timeMatch) {
|
||||
const [n, period] = timeMatch.slice(1);
|
||||
const thenDate = moment.utc().subtract(Number(n), period);
|
||||
|
||||
return {
|
||||
date: thenDate.toDate(),
|
||||
precision: period.replace(/s$/, ''),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function image(context, selector = 'img', attr, { origin, protocol = 'https' } = {}) {
|
||||
const imageEl = (attr && q(context, selector, attr))
|
||||
|| q(context, selector, 'data-src')
|
||||
@@ -286,6 +308,7 @@ const quFuncs = {
|
||||
cnt: content,
|
||||
cnts: contents,
|
||||
date,
|
||||
dateAgo,
|
||||
dur: duration,
|
||||
duration,
|
||||
element: q,
|
||||
|
||||
Reference in New Issue
Block a user