From 668221dafd1f4d64ff2aaca423a6c331ab41c44c Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 1 Sep 2024 00:00:40 +0200 Subject: [PATCH] Added Porn World to replace DDF scraper. --- seeds/02_sites.js | 8 +- src/scrapers/ddfnetwork.js | 196 ------------------------------------- src/scrapers/pornworld.js | 113 +++++++++++++++++++++ src/scrapers/scrapers.js | 14 +-- src/scrapers/template.js | 10 +- 5 files changed, 134 insertions(+), 207 deletions(-) delete mode 100755 src/scrapers/ddfnetwork.js create mode 100755 src/scrapers/pornworld.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index da54d53c..48a7122c 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -2934,7 +2934,13 @@ const sites = [ description: "Do you love blowjob scenes? Welcome to the place with the best outdoors cock-eating techniques you've ever seen. The randiest females sucking cocks right there.", parent: 'cumlouder', }, - // DDF NETWORK + // PORN WORLD / DDF NETWORK + { + slug: 'pornworld', + name: 'Porn World', + url: 'https://pornworld.com', + parent: 'pornworld', + }, { slug: 'ddfbusty', name: 'DDF Busty', diff --git a/src/scrapers/ddfnetwork.js b/src/scrapers/ddfnetwork.js deleted file mode 100755 index 0ade26e6..00000000 --- a/src/scrapers/ddfnetwork.js +++ /dev/null @@ -1,196 +0,0 @@ -'use strict'; - -const qu = require('../utils/qu'); -const slugify = require('../utils/slugify'); -const http = require('../utils/http'); - -function scrapeAll(scenes, site, origin) { - return scenes.map(({ query }) => { - const release = {}; - - release.title = query.q('.card-title a, .videoContent h4 a', 'title'); - release.url = `${site?.url || origin || 'https://pornworld.com'}${query.q('a', 'href')}`; - release.entryId = release.url.match(/\/(\d+)$/)[1]; - - release.date = query.date('small[datetime]', 'YYYY-MM-DD HH:mm:ss', null, 'datetime'); - release.actors = query.all('.card-subtitle a, .featuring a', true).filter(Boolean); - - release.description = query.q('h4 + p', true); - - const duration = parseInt(query.q('.card-info div:nth-child(2) .card-text', true), 10) * 60; - if (duration) release.duration = duration; - else release.duration = query.dur('.time'); - - release.poster = query.img(); - - return release; - }); -} - -async function scrapeScene({ query }, url, channel) { - const release = {}; - - release.entryId = url.match(/\/(\d+)$/)[1]; - - release.title = query.meta('itemprop=name') || query.cnt('.video-title h1, .about-text .story-title, .video-specs h1') || query.cnt('h3'); - release.description = query.cnt('.descr-box p') || query.cnt('.about-text p:not(.story-title)') || query.text('.description p'); - - release.date = query.date('meta[itemprop=uploadDate]', 'YYYY-MM-DD', null, 'content') - || query.date('.actors time', 'MMMM DD, YYYY') - || query.date('.title-border:nth-child(2) p', 'MM.DD.YYYY') - || query.date('.length', 'MMMM DD, YYYY', /\w+ \d{2}, \d{4}/); - - if (query.exists('.pornstar-card > a')) release.actors = query.all('.pornstar-card > a', 'title'); - else if (query.exists('.actors a')) release.actors = query.cnts('.actors a'); - - if (query.exists('.tags-tab')) release.tags = query.cnts('.tags-tab .tags a'); - else if (query.exists('.tags-box')) release.tags = query.cnts('.tags-box .tags li'); - - release.duration = parseInt(query.cnt('.icon-video-red + span'), 10) * 60 || query.dur('.length') || null; - release.likes = Number(query.cnt('.icon-like-red + span')) || null; - - release.poster = query.poster() || query.poster('dl8-video') || query.img('#videoBlock img'); - release.photos = query.urls('.photo-slider-guest .card a'); - - release.trailer = query.all('source[type="video/mp4"]').map((trailer) => ({ - src: trailer.src, - quality: Number(trailer.attributes.res?.value || trailer.attributes.quality?.value.slice(0, -1)) || null, - vr: channel.tags?.some((tag) => tag.slug === 'vr'), - })); - - return release; -} - -async function fetchActorReleases(urls) { - // DDF Network and DDF Network Stream list all scenes, exclude - const sources = urls.filter((url) => !/ddfnetwork/.test(url)); - - const releases = await Promise.all(sources.map(async (url) => { - const res = await qu.getAll(url, '.card.m-1:not(.pornstar-card)'); - - return res.ok ? scrapeAll(res.items, null, new URL(url).origin) : null; - })); - - // DDF cross-releases scenes between sites, filter duplicates by entryId - return Object.values(releases - .flat() - .sort((releaseA, releaseB) => releaseB.date - releaseA.date) // sort by date so earliest scene remains - .reduce((acc, release) => ({ ...acc, [release.entryId]: release }), {})); -} - -async function scrapeProfile({ query }, _url, actorName) { - const keys = query.all('.about-title', true).map((key) => slugify(key, '_')); - const values = query.all('.about-info').map((el) => { - if (el.children.length > 0) { - return Array.from(el.children, (child) => child.textContent.trim()).join(', '); - } - - return el.textContent.trim(); - }); - - const bio = keys.reduce((acc, key, index) => { - if (values[index] === '-') return acc; - - return { - ...acc, - [key]: values[index], - }; - }, {}); - - const profile = { - name: actorName, - }; - - profile.description = query.q('.description-box', true); - profile.birthdate = qu.extractDate(bio.birthday, 'MMMM DD, YYYY'); - - if (bio.nationality) profile.nationality = bio.nationality; - - if (bio.bra_size) [profile.bust] = bio.bra_size.match(/\d+\w+/); - if (bio.waist) profile.waist = Number(bio.waist.match(/\d+/)[0]); - if (bio.hips) profile.hip = Number(bio.hips.match(/\d+/)[0]); - - if (bio.height) profile.height = Number(bio.height.match(/\d{2,}/)[0]); - - if (bio.tit_style && /Enhanced/.test(bio.tit_style)) profile.naturalBoobs = false; - if (bio.tit_style && /Natural/.test(bio.tit_style)) profile.naturalBoobs = true; - - if (bio.body_art && /Tattoo/.test(bio.body_art)) profile.hasTattoos = true; - if (bio.body_art && /Piercing/.test(bio.body_art)) profile.hasPiercings = true; - - if (bio.hair_style) profile.hair = bio.hair_style.split(',')[0].trim().toLowerCase(); - if (bio.eye_color) profile.eyes = bio.eye_color.match(/\w+/)[0].toLowerCase(); - - if (bio.shoe_size) profile.shoes = Number(bio.shoe_size.split('|')[1]); - - const avatarEl = query.q('.pornstar-details .card-img-top'); - if (avatarEl && avatarEl.dataset.src.match('^//')) profile.avatar = `https:${avatarEl.dataset.src}`; - - profile.releases = await fetchActorReleases(query.urls('.find-me-tab li a')); - - return profile; -} - -async function fetchLatest(channel, page = 1) { - /* ddfnetwork.com redirects to pornworld.com - const url = site.parameters?.native - ? `${site.url}/videos/search/latest/ever/allsite/-/${page}` - : `https://ddfnetwork.com/videos/search/latest/ever/${new URL(site.url).hostname}/-/${page}`; - */ - - const url = channel.parameters?.latest || `${channel.url}/videos/search/latest/ever/allsite/-/${page}`; - const res = await qu.getAll(url, '.card.m-1:not(.pornstar-card), .allVideos .videoBlock'); - - if (res.ok) { - return scrapeAll(res.items, channel); - } - - return res.status; -} - -async function fetchScene(url, site) { - // DDF's main site moved to Porn World - // const res = await http.get(`https://ddfnetwork.com${new URL(url).pathname}`); - const res = await qu.get(url, '.content, #content, .taspVideoPage'); - - return res.ok ? scrapeScene(res.item, url, site) : res.status; -} - -async function fetchProfile({ name: actorName }) { - const resSearch = await http.post('https://ddfnetwork.com/search/ajax', - { - type: 'hints', - word: actorName, - }, - { - decodeJSON: false, - headers: { - 'x-requested-with': 'XMLHttpRequest', - }, - }); - - if (resSearch.statusCode !== 200 || Array.isArray(resSearch.body.list)) { - return null; - } - - if (!resSearch.body.list.pornstarsName || resSearch.body.list.pornstarsName.length === 0) { - return null; - } - - const [actor] = resSearch.body.list.pornstarsName; - const url = `https://ddfnetwork.com${actor.href}`; - - const resActor = await http.get(url); - - if (resActor.statusCode !== 200) { - return null; - } - - return scrapeProfile(resActor.body.toString(), url, actorName); -} - -module.exports = { - fetchLatest, - fetchProfile, - fetchScene, -}; diff --git a/src/scrapers/pornworld.js b/src/scrapers/pornworld.js new file mode 100755 index 00000000..1d71fd29 --- /dev/null +++ b/src/scrapers/pornworld.js @@ -0,0 +1,113 @@ +'use strict'; + +const unprint = require('unprint'); + +function scrapeAll(scenes) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('.card-title a'); + release.entryId = new URL(release.url).pathname.match(/\/watch\/([\d-]+)/)[1]; + + release.title = query.content('.card-title a'); + + release.date = query.date('.release-date', 'YYYY MMMM, DD', { match: /\d{4} \w+, \d{1,2}/i }); + release.duration = query.duration('.video-duration'); + + release.actors = query.all('.starring a').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); + + const poster = query.img('.thumbnail-pic .card-img'); + + if (poster) { + const { origin, pathname } = new URL(poster); + + release.poster = [ + `${origin}${pathname}`, + poster, + ]; + } + + release.trailer = query.video('.thumbnail-pic', { attribute: 'data-video-src' }); // actually the full trailer with audio, not a teaser + + return release; + }); +} + +function scrapeScene({ query }, { url }) { + const release = {}; + + release.entryId = new URL(url).pathname.match(/\/watch\/([\d-]+)/)[1]; + + release.title = query.content('.scene__title'); + release.description = query.text('//p[span[contains(text(), "Description:")]]'); + + release.date = query.date('//p[strong[contains(text(), "Publication date:")]]/span', 'YYYY, MMMM D', { match: /\d{4}, \w+ \d{1,2}/i }); + release.duration = query.duration('//p[i[contains(@class, "bi-clock-fill")]]'); + + release.actors = query.all('p a[href*="model/"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); + + release.tags = query.contents('p a[href*="?tags"]'); + + release.poster = query.poster('.video-player'); + release.trailer = query.video('.video-player source'); + + return release; +} + +function scrapeProfile({ query }) { + const profile = {}; + + profile.nationality = query.content('//h3[contains(text(), "Nationality:")]/span') || null; + profile.age = query.number('//h3[contains(text(), "Age:")]/span'); + + profile.avatar = query.img(); + + return profile; +} + +async function fetchLatest(channel, page = 1) { + const url = `${channel.url}/videos?page=${page}`; + const res = await unprint.get(url, { selectAll: '.card.scene' }); + + if (res.ok) { + return scrapeAll(res.context, channel); + } + + return res.status; +} + +async function fetchProfile({ name: actorName }, entity) { + const searchUrl = `${entity.url}/models?name=${actorName}&sort=popularity`; + const searchRes = await unprint.get(searchUrl); + + if (searchRes.ok) { + const actorEl = searchRes.context.query.all('.pagination-items .model a').find((resultEl) => unprint.query.attribute(resultEl, null, 'title') === actorName); + const actorUrl = unprint.query.url(actorEl, null); + + if (actorUrl) { + const res = await unprint.get(actorUrl, { select: '.model-detail-card' }); + + if (res.ok) { + return scrapeProfile(res.context, actorName, entity); + } + + return res.status; + } + + return null; + } + + return searchRes.status; +} + +module.exports = { + fetchLatest, + fetchProfile, + scrapeScene, +}; diff --git a/src/scrapers/scrapers.js b/src/scrapers/scrapers.js index 3d8882ad..1b7db161 100755 --- a/src/scrapers/scrapers.js +++ b/src/scrapers/scrapers.js @@ -15,7 +15,6 @@ const cherrypimps = require('./cherrypimps'); const cliffmedia = require('./cliffmedia'); const cumlouder = require('./cumlouder'); const czechav = require('./czechav'); -const ddfnetwork = require('./ddfnetwork'); const modelmedia = require('./modelmedia'); const dorcel = require('./dorcel'); const fabulouscash = require('./fabulouscash'); @@ -48,16 +47,17 @@ const missax = require('./missax'); const naughtyamerica = require('./naughtyamerica'); const newsensations = require('./newsensations'); const nubiles = require('./nubiles'); +const pascalssubsluts = require('./pascalssubsluts'); const perfectgonzo = require('./perfectgonzo'); const pervcity = require('./pervcity'); +const pierrewoodman = require('./pierrewoodman'); +const pinkyxxx = require('./pinkyxxx'); const porndoe = require('./porndoe'); const porncz = require('./porncz'); const pornhub = require('./pornhub'); -const pascalssubsluts = require('./pascalssubsluts'); // reserved keyword -const pierrewoodman = require('./pierrewoodman'); -const pinkyxxx = require('./pinkyxxx'); +const pornworld = require('./pornworld'); const privateNetwork = require('./private'); // reserved keyword -const purgatoryx = require('./purgatoryx'); // reserved keyword +const purgatoryx = require('./purgatoryx'); const radical = require('./radical'); const rickysroom = require('./rickysroom'); const score = require('./score'); @@ -101,7 +101,7 @@ const scrapers = { cliffmedia, cumlouder, czechav, - pornworld: ddfnetwork, + pornworld, delphine: modelmedia, dorcel, elegantangel: adultempire, @@ -292,7 +292,7 @@ const scrapers = { pimpxxx: cherrypimps, porncz, pornhub, - pornworld: ddfnetwork, + pornworld, povperverts: fullpornnetwork, povpornstars: hush, private: privateNetwork, diff --git a/src/scrapers/template.js b/src/scrapers/template.js index 15e94554..e47c65f4 100755 --- a/src/scrapers/template.js +++ b/src/scrapers/template.js @@ -12,13 +12,17 @@ function scrapeAll(scenes) { release.entryId = new URL(release.url).pathname.match(/\/scene\/(\d+)/)[1]; release.title = query.content('.title a'); - release.description = query.content('.description'); release.date = query.date('.date', 'MMM DD, YYYY'); - release.actors = query.contents('.models a.model'); + release.duration = query.duration('.duration'); + + release.actors = query.all('.models a.model').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null), + })); release.poster = query.img('img.poster'); - release.teaser = { src: query.video('.teaser video') }; + release.teaser = query.video('.teaser video'); release.stars = query.number('.rating'); release.likes = query.number('.likes');