Added DarkkoTV scraper. Removed some obsolete web components.
This commit is contained in:
@@ -11,6 +11,7 @@ const bradmontana = require('./bradmontana');
|
||||
const cherrypimps = require('./cherrypimps');
|
||||
const cumlouder = require('./cumlouder');
|
||||
const modelmedia = require('./modelmedia');
|
||||
const darkkotv = require('./darkkotv');
|
||||
const dorcel = require('./dorcel');
|
||||
// const famedigital = require('./famedigital');
|
||||
const firstanalquest = require('./firstanalquest');
|
||||
@@ -223,6 +224,7 @@ module.exports = {
|
||||
bradmontana,
|
||||
cherrypimps,
|
||||
cumlouder,
|
||||
darkkotv,
|
||||
dorcelclub: dorcel,
|
||||
freeones,
|
||||
hitzefrei,
|
||||
|
||||
157
src/scrapers/darkkotv.js
Executable file
157
src/scrapers/darkkotv.js
Executable file
@@ -0,0 +1,157 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
|
||||
const slugify = require('../utils/slugify');
|
||||
const tryUrls = require('../utils/try-urls');
|
||||
const { convert } = require('../utils/convert');
|
||||
|
||||
function getEntryId(url) {
|
||||
return slugify(new URL(url).pathname.match(/\/scenes\/(.*?)(_vids)?.html/)[1]);
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, channel) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
|
||||
release.url = query.url('.videoPic a, h4 a');
|
||||
release.entryId = getEntryId(release.url);
|
||||
|
||||
release.title = query.content('h4 a');
|
||||
|
||||
release.date = query.date('.videoInfo li:first-child ', 'MM-DD-YYYY');
|
||||
release.duration = query.number('.videoInfo li:nth-child(2)') * 60 || null;
|
||||
|
||||
release.actors = query.all('a[href*="models/"]').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.poster = Array.from({ length: 4 }, (_value, index) => query.img('.videoPic img', { attribute: `src0_${4 - index}x`, origin: channel.origin }));
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/categories/movies_${page}.html`;
|
||||
const res = await unprint.get(url, { selectAll: '.latestUpdateB' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchCaps(url) {
|
||||
if (!url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(url, { select: '.photoDetailsArea' });
|
||||
|
||||
if (res.ok) {
|
||||
return res.context.query.imgs('.photoDPic img');
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scrapeScene({ query: pageQuery, html }, { url, entity, include }) {
|
||||
const release = {};
|
||||
const { query } = unprint.init(pageQuery.element('.latestUpdateBinfo'));
|
||||
|
||||
release.entryId = getEntryId(url);
|
||||
|
||||
release.title = pageQuery.content('.vidImgTitle h4');
|
||||
release.description = query.content('.vidImgContent p');
|
||||
|
||||
release.date = query.date('.videoInfo li:first-child ', 'MM-DD-YYYY');
|
||||
release.duration = query.number('.videoInfo li:nth-child(2)') * 60 || null;
|
||||
|
||||
release.actors = query.all('a[href*="models/"]').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.tags = query.contents('.blogTags a');
|
||||
|
||||
const posterPath = html.match(/useimage\s*=\s*"(.*?)"/i)?.[1];
|
||||
const capsUrl = pageQuery.url('a[href*="_caps"]');
|
||||
|
||||
if (posterPath) {
|
||||
release.poster = Array.from({ length: 4 }, (_value, index) => unprint.prefixUrl(posterPath.replace('-4x', `-${4 - index}x`), entity.url));
|
||||
}
|
||||
|
||||
if (include.photos && capsUrl) {
|
||||
release.caps = await fetchCaps(capsUrl);
|
||||
}
|
||||
|
||||
release.trailer = pageQuery.video('#download_select option[value*=".mp4"]', { attribute: 'value' });
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, { url, actorName }) {
|
||||
const profile = { url };
|
||||
|
||||
const bio = Object.fromEntries(query.contents('.vitalStats li').map((entry) => {
|
||||
const [key, value] = entry.split(':');
|
||||
|
||||
if (!key || !value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [slugify(key, '_'), value?.trim()];
|
||||
}).filter(Boolean));
|
||||
|
||||
profile.description = `${query.content('.modelBioInfo')?.replace(new RegExp(`professional bio of ${actorName}`, 'i'), '')}${bio.awards ? ` Awards: ${bio.awards}` : ''}`;
|
||||
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'MMMM D, YYYY');
|
||||
profile.birthPlace = bio.birthplace;
|
||||
profile.ethnicity = bio.ethnicity;
|
||||
|
||||
profile.height = unprint.extractNumber(bio.height, { match: /(\d+)\s*cm/i, matchIndex: 1 })
|
||||
|| convert(bio.height?.match(/\d+\s*ft \d+\s*in/)?.[0], 'cm');
|
||||
|
||||
profile.weight = unprint.extractNumber(bio.weight, { match: /(\d+)\s*kg/i, matchIndex: 1 })
|
||||
|| convert(bio.weight?.match(/\d+\s*lbs/)[0], 'lb', 'kg');
|
||||
|
||||
profile.measurements = bio.measurements;
|
||||
|
||||
if (/yes/i.test(bio.natural_breasts)) profile.naturalBoobs = true;
|
||||
if (/no/i.test(bio.natural_breasts)) profile.naturalBoobs = false;
|
||||
|
||||
if (/yes/i.test(bio.tattoos)) profile.hasTattoos = true;
|
||||
if (/no/i.test(bio.tattoos)) profile.hasTattoos = false;
|
||||
|
||||
if (/yes/i.test(bio.piercings)) profile.hasPiercings = true;
|
||||
if (/no/i.test(bio.piercings)) profile.hasPiercings = false;
|
||||
|
||||
profile.socials = query.urls('.vitalStats a[href*="onlyfans"], .vitalStats a[href*="twitter"], .vitalStats a[href*="instagram"]');
|
||||
profile.avatar = Array.from({ length: 4 }, (_value, index) => query.img('.modelBioPic img', { attribute: `src0_${4 - index}x` }));
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName, url: actorUrl }, entity) {
|
||||
const { res, url } = await tryUrls([
|
||||
actorUrl,
|
||||
`${entity.url}/models/${slugify(actorName, '-')}.html`,
|
||||
`${entity.url}/models/${slugify(actorName, '')}.html`,
|
||||
`${entity.url}/models/${slugify(actorName, '_')}.html`,
|
||||
]);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, { url, entity, actorName });
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
scrapeScene,
|
||||
};
|
||||
@@ -16,6 +16,7 @@ const cherrypimps = require('./cherrypimps');
|
||||
const cliffmedia = require('./cliffmedia');
|
||||
const cumlouder = require('./cumlouder');
|
||||
const czechav = require('./czechav');
|
||||
const darkkotv = require('./darkkotv');
|
||||
const modelmedia = require('./modelmedia');
|
||||
const dorcel = require('./dorcel');
|
||||
const fabulouscash = require('./fabulouscash');
|
||||
@@ -118,6 +119,7 @@ module.exports = {
|
||||
cumlouder,
|
||||
czechav,
|
||||
pornworld,
|
||||
darkkotv,
|
||||
delphine: modelmedia,
|
||||
dorcel,
|
||||
elegantangel: adultempire,
|
||||
|
||||
@@ -30,6 +30,17 @@ function scrapeAll(scenes) {
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.scene' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, { url }) {
|
||||
const release = {};
|
||||
|
||||
@@ -62,17 +73,6 @@ function scrapeProfile({ query }) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page = 1) {
|
||||
const url = `${channel.url}/${page}`;
|
||||
const res = await unprint.get(url, { selectAll: '.scene' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }, entity) {
|
||||
const url = `${entity.url}/actors/${slugify(actorName, '_')}`;
|
||||
const res = await unprint.get(url);
|
||||
|
||||
Reference in New Issue
Block a user