forked from DebaucheryLibrarian/traxxx
Refactored PornCZ scraper. Renamed Brutal Sessions to Dungeon Sex.
This commit is contained in:
parent
725087bf1c
commit
c64c4dd694
|
|
@ -5599,6 +5599,7 @@ const sites = [
|
|||
parameters: {
|
||||
native: true,
|
||||
preferSpartanId: true,
|
||||
networkEntryId: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
@ -5894,9 +5895,11 @@ const sites = [
|
|||
parent: 'kink',
|
||||
},
|
||||
{
|
||||
slug: 'brutalsessions',
|
||||
name: 'Brutal Sessions',
|
||||
url: 'https://www.kink.com/channel/brutal-sessions',
|
||||
slug: 'dungeonsex',
|
||||
rename: 'brutalsessions',
|
||||
name: 'Dungeon Sex',
|
||||
url: 'https://www.kink.com/channel/dungeon-sex',
|
||||
alias: ['brutal sessions'],
|
||||
description: "Hardcore BDSM jam packed with XXX fucking in bondage! We're taking dungeon sex beyond the castle!",
|
||||
parent: 'kink',
|
||||
},
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ function curateEntity(entity, includeParameters = false) {
|
|||
id: entity.id,
|
||||
name: entity.name,
|
||||
url: entity.url,
|
||||
origin: new URL(entity.url).origin,
|
||||
description: entity.description,
|
||||
slug: entity.slug,
|
||||
type: entity.type,
|
||||
|
|
|
|||
|
|
@ -299,6 +299,37 @@ async function scrapeApiReleases(json, site, options) {
|
|||
});
|
||||
}
|
||||
|
||||
async function fetchLatestApi(site, page = 1, options, _preData, upcoming = false) {
|
||||
const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
|
||||
const { apiUrl } = await fetchApiCredentials(referer, site);
|
||||
const slug = options.parameters.querySlug || site.slug;
|
||||
|
||||
const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel
|
||||
? `&filters=channels.id:${options.parameters.queryChannel === true ? slug : options.parameters.queryChannel}`
|
||||
: `&filters=availableOnSite:${slug}`}`;
|
||||
|
||||
const res = await http.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: referer,
|
||||
},
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.results?.[0]?.hits) {
|
||||
return scrapeApiReleases(res.body.results[0].hits, site, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, networkUrl, hasTeaser = true) {
|
||||
return scenes.map(({ query, el }) => {
|
||||
const release = {};
|
||||
|
|
@ -328,6 +359,54 @@ function scrapeAll(scenes, site, networkUrl, hasTeaser = true) {
|
|||
});
|
||||
}
|
||||
|
||||
function getLatestUrl(site, page) {
|
||||
if (site.parameters?.latest) {
|
||||
if (/^http/.test(site.parameters.latest)) {
|
||||
return /%d/.test(site.parameters.latest)
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.parameters.latest}${page}`;
|
||||
}
|
||||
|
||||
return /%d/.test(site.parameters.latest)
|
||||
? util.format(`${site.url}${site.parameters.latest}`, page)
|
||||
: `${site.url}${site.parameters.latest}${page}`;
|
||||
}
|
||||
|
||||
return `${site.url}/en/videos/AllCategories/0/${page}`;
|
||||
}
|
||||
|
||||
function getUpcomingUrl(site) {
|
||||
if (site.parameters?.upcoming) {
|
||||
return /^http/.test(site.parameters.upcoming)
|
||||
? `${site.parameters.upcoming}`
|
||||
: `${site.url}${site.parameters.upcoming}`;
|
||||
}
|
||||
|
||||
return `${site.url}/en/videos/AllCategories/0/1/upcoming`;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = getLatestUrl(site, page);
|
||||
const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const url = getUpcomingUrl(site);
|
||||
const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site, null, false);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function scrapeScene({ query }, url, channel, baseRelease, mobileItem, options) {
|
||||
const release = { query }; // used by XEmpire scraper to resolve channel-specific details
|
||||
|
||||
|
|
@ -658,37 +737,6 @@ function scrapeApiProfile(data, releases, siteSlug) {
|
|||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatestApi(site, page = 1, options, _preData, upcoming = false) {
|
||||
const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
|
||||
const { apiUrl } = await fetchApiCredentials(referer, site);
|
||||
const slug = options.parameters.querySlug || site.slug;
|
||||
|
||||
const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel
|
||||
? `&filters=channels.id:${options.parameters.queryChannel === true ? slug : options.parameters.queryChannel}`
|
||||
: `&filters=availableOnSite:${slug}`}`;
|
||||
|
||||
const res = await http.post(apiUrl, {
|
||||
requests: [
|
||||
{
|
||||
indexName: 'all_scenes',
|
||||
params,
|
||||
},
|
||||
],
|
||||
}, {
|
||||
headers: {
|
||||
Referer: referer,
|
||||
},
|
||||
}, {
|
||||
encodeJSON: true,
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.results?.[0]?.hits) {
|
||||
return scrapeApiReleases(res.body.results[0].hits, site, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcomingApi(site, page = 1, options, preData) {
|
||||
return fetchLatestApi(site, page, options, preData, true);
|
||||
}
|
||||
|
|
@ -771,54 +819,6 @@ async function fetchMovieApi(url, site, baseRelease, options) {
|
|||
return res.status;
|
||||
}
|
||||
|
||||
function getLatestUrl(site, page) {
|
||||
if (site.parameters?.latest) {
|
||||
if (/^http/.test(site.parameters.latest)) {
|
||||
return /%d/.test(site.parameters.latest)
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.parameters.latest}${page}`;
|
||||
}
|
||||
|
||||
return /%d/.test(site.parameters.latest)
|
||||
? util.format(`${site.url}${site.parameters.latest}`, page)
|
||||
: `${site.url}${site.parameters.latest}${page}`;
|
||||
}
|
||||
|
||||
return `${site.url}/en/videos/AllCategories/0/${page}`;
|
||||
}
|
||||
|
||||
function getUpcomingUrl(site) {
|
||||
if (site.parameters?.upcoming) {
|
||||
return /^http/.test(site.parameters.upcoming)
|
||||
? `${site.parameters.upcoming}`
|
||||
: `${site.url}${site.parameters.upcoming}`;
|
||||
}
|
||||
|
||||
return `${site.url}/en/videos/AllCategories/0/1/upcoming`;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
const url = getLatestUrl(site, page);
|
||||
const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
const url = getUpcomingUrl(site);
|
||||
const res = await qu.getAll(url, 'li[data-itemtype=scene], div[data-itemtype*=scene]');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, site, null, false);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function getDeepUrl(url, site, baseRelease, mobile) {
|
||||
const filter = new Set(['en', 'video', 'scene', site.slug, site.parent.slug]);
|
||||
const pathname = baseRelease?.path || new URL(url).pathname
|
||||
|
|
|
|||
|
|
@ -1,121 +1,123 @@
|
|||
'use strict';
|
||||
|
||||
const http = require('../utils/http');
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const capitalize = require('../utils/capitalize');
|
||||
const unprint = require('unprint');
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(scenes, _channel) {
|
||||
return scenes.map(({ query, element }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('h4 a', 'href', { origin: channel.url });
|
||||
release.entryId = new URL(release.url).pathname.match(/\d+$/)[0];
|
||||
release.url = query.url('.card__link');
|
||||
release.entryId = new URL(release.url).pathname.match(/\/en\/(.*)/)[1];
|
||||
|
||||
release.title = query.cnt('h4 a');
|
||||
release.duration = query.duration('.product-item-time');
|
||||
release.title = query.content('.card__link');
|
||||
release.duration = query.duration('.card__img_badge.bottom-right');
|
||||
|
||||
release.poster = query.img('.product-item-image img', 'src', { origin: channel.url });
|
||||
release.poster = query.img('.card__img img');
|
||||
release.teaser = unprint.query.dataset(element, null, 'video');
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const res = await unprint.get(`${channel.origin}/en/videos?page=${page}`, { selectAll: '.card--item' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, url, channel) {
|
||||
const release = {};
|
||||
|
||||
release.entryId = new URL(url).pathname.match(/\d+$/)[0];
|
||||
release.entryId = new URL(url).pathname.match(/\/en\/(.*)/)[1];
|
||||
|
||||
release.title = query.cnt('.heading-detail h1');
|
||||
release.description = query.cnt('.heading-detail p:nth-child(3)');
|
||||
release.title = query.content('h1.h2');
|
||||
release.description = query.attribute('meta[property="og:description"]', 'content'); // not usually used, if ever
|
||||
|
||||
const details = query.all('.video-info-item').reduce((acc, detailEl) => {
|
||||
const key = detailEl.textContent.match(/(\w+):/)[1];
|
||||
release.date = query.date('meta[property="video:release_date"]', 'YYYY-MM-DD', { attribute: 'content' });
|
||||
release.duration = query.number('meta[property="video:duration"]', { attribute: 'content' });
|
||||
|
||||
return { ...acc, [slugify(key, '_')]: detailEl };
|
||||
}, {});
|
||||
release.actors = query.all('.video-info .mini-avatars a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null, { origin: channel.origin }),
|
||||
avatar: [
|
||||
unprint.query.img(actorEl, 'img')?.replace('-video_actor_avatar', '-actor_detail'),
|
||||
unprint.query.img(actorEl, 'img'),
|
||||
],
|
||||
}));
|
||||
|
||||
const { date, precision } = query.dateAgo(details.date);
|
||||
release.tags = query.contents('.video-info a[href*="?category"]').map((tag) => tag.replace('#', '').trim());
|
||||
release.qualities = query.numbers('.download-dropdown-menu li div', { attribute: 'data-res' });
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
release.poster = [
|
||||
query.img('.video-player', { attribute: 'data-poster' }),
|
||||
query.img('meta[property="og:image"]', { attribute: 'content' }),
|
||||
];
|
||||
|
||||
release.actors = query.cnts(details.actors, 'a').map((actor) => capitalize(actor, { uncapitalize: true }));
|
||||
release.duration = query.duration(details.duration);
|
||||
release.tags = query.cnts(details.genres, 'a');
|
||||
release.trailer = query.all('.video-player source').map((videoEl) => ({
|
||||
src: unprint.query.video(videoEl, null),
|
||||
quality: unprint.query.number(videoEl, null, { attribute: 'size' }),
|
||||
}));
|
||||
|
||||
release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url });
|
||||
release.photos = query.imgs('#gallery .photo-item img', 'data-src', { origin: channel.url });
|
||||
release.photos = query.all('.image .gallery-popup').map((imgEl) => [
|
||||
unprint.query.img(imgEl, null, { attribute: 'href' }),
|
||||
unprint.query.img(imgEl, 'img'),
|
||||
]);
|
||||
|
||||
release.trailer = query.video();
|
||||
|
||||
release.channel = slugify(query.q('.video-detail-logo img', 'alt'), '');
|
||||
release.channel = slugify(query.attribute('meta[property="og:site_name"]', 'content'), '');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, entity) {
|
||||
const profile = {};
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await unprint.get(url);
|
||||
|
||||
profile.avatar = query.img('.model-heading-photo img', 'src', { origin: entity.url });
|
||||
profile.releases = scrapeAll(qu.initAll(query.all('.product-item')), entity);
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.context, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, url, _entity) {
|
||||
const profile = { url };
|
||||
const bio = Object.fromEntries(query.all('.model-info__item').map((bioEl) => [
|
||||
slugify(unprint.query.content(bioEl, 'span:first-child'), '_'),
|
||||
unprint.query.content(bioEl, 'span:last-child'),
|
||||
]));
|
||||
|
||||
profile.avatar = query.img('.actor-img');
|
||||
|
||||
profile.gender = bio.gender;
|
||||
profile.birthCountry = bio.nationality;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
profile.age = bio.age;
|
||||
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
if (!bio.breast_size?.includes('-')) profile.cup = bio.breast_size; // larger than F is defined as F-Z, not too useful
|
||||
if (/natural/i.test(bio.breast_type)) profile.naturalBoobs = true;
|
||||
if (/fake/i.test(bio.breast_type)) profile.naturalBoobs = false;
|
||||
if (/no/i.test(bio.tattoo)) profile.hasTattoos = false;
|
||||
if (/yes/i.test(bio.tattoo)) profile.hasTattoos = true;
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = page === 1 ? `${channel.url}/en/new-videos` : `${channel.url}/en/new-videos?do=next`;
|
||||
|
||||
// pagination state is kept in session, and new each 'page' includes all previous pages
|
||||
const session = http.session();
|
||||
const headers = { 'X-Requested-With': 'XMLHttpRequest' };
|
||||
|
||||
for (let i = 0; i < page - 1; i += 1) {
|
||||
await http.get(url, { headers, session }); // eslint-disable-line no-await-in-loop
|
||||
}
|
||||
|
||||
const res = await http.get(url, { headers, session });
|
||||
|
||||
if (res.ok) {
|
||||
const items = qu.extractAll(res.body.snippets?.['snippet--videoItems'] || res.body, '.product-item');
|
||||
|
||||
return scrapeAll(items.slice((page - 1) * 16), channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url, 'body > .container');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(baseActor, entity) {
|
||||
const searchRes = await qu.getAll(`https://www.porncz.com/en/search-results?showModels=1&value=${baseActor.name}`, '.project-item');
|
||||
const url = `${new URL(entity.url).origin}/en/pornstars/${baseActor.slug}`;
|
||||
const res = await unprint.get(`${new URL(entity.url).origin}/en/pornstars/${baseActor.slug}`);
|
||||
|
||||
if (searchRes.ok) {
|
||||
const model = searchRes.items.find(({ query }) => query.cnt('h3 a') === baseActor.name);
|
||||
|
||||
if (model) {
|
||||
const modelUrl = model.query.url('h3 a', 'href', { origin: 'https://www.porncz.com' });
|
||||
const modelRes = await qu.get(`${modelUrl}?do=nextDetail`); // get more videos
|
||||
|
||||
if (modelRes.ok) {
|
||||
return scrapeProfile(modelRes.item, entity);
|
||||
}
|
||||
|
||||
return modelRes.status;
|
||||
}
|
||||
|
||||
return null;
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, url, entity);
|
||||
}
|
||||
|
||||
return searchRes.status;
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@ const actors = [
|
|||
{ entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] },
|
||||
{ entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] },
|
||||
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
|
||||
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
|
||||
];
|
||||
|
||||
const actorScrapers = scrapers.actors;
|
||||
|
|
|
|||
Loading…
Reference in New Issue