From 2a4dce106e9589c6a2af8c743256f5f679fec1f5 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 20 Jan 2026 04:28:49 +0100 Subject: [PATCH] Moved Arch Angel to Full Porn Network and adapted scraper. --- seeds/02_sites.js | 2 + src/scrapers/actors.js | 30 +++---- src/scrapers/archangel.js | 107 ----------------------- src/scrapers/fullpornnetwork.js | 147 ++++++++++++++++---------------- src/scrapers/mariskax.js | 2 +- src/scrapers/pornhub.js | 38 ++++----- src/scrapers/releases.js | 3 +- tests/profiles.js | 3 + 8 files changed, 113 insertions(+), 219 deletions(-) delete mode 100755 src/scrapers/archangel.js diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 7e8cbf17..d23414d8 100755 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -817,6 +817,8 @@ const sites = [ { slug: 'archangel', name: 'ArchAngel', + parent: 'fullpornnetwork', + independent: true, url: 'https://www.archangelvideo.com', }, // ASSYLUM diff --git a/src/scrapers/actors.js b/src/scrapers/actors.js index cf141913..342e4572 100644 --- a/src/scrapers/actors.js +++ b/src/scrapers/actors.js @@ -2,7 +2,6 @@ const adultempire = require('./adultempire'); const angelogodshackoriginal = require('./angelogodshackoriginal'); -const archangel = require('./archangel'); const americanpornstar = require('./americanpornstar'); const aziani = require('./aziani'); const badoink = require('./badoink'); @@ -178,20 +177,30 @@ module.exports = { // naughty america naughtyamerica, tonightsgirlfriend: naughtyamerica, + // full porn network + analbbc: fullpornnetwork, + analized: fullpornnetwork, + analviolation: fullpornnetwork, + archangel: fullpornnetwork, + baddaddypov: fullpornnetwork, + dtfsluts: fullpornnetwork, + girlfaction: fullpornnetwork, + hergape: fullpornnetwork, + homemadeanalwhores: fullpornnetwork, + jamesdeen: fullpornnetwork, + mugfucked: fullpornnetwork, + onlyprince: fullpornnetwork, + pervertgallery: fullpornnetwork, + povperverts: fullpornnetwork, // etc '18vr': badoink, theflourishxxx: theflourish, adultempire, - archangel, allherluv: missax, americanpornstar, - analbbc: fullpornnetwork, - analized: fullpornnetwork, - analviolation: fullpornnetwork, angelogodshackoriginal, asiam: modelmedia, babevr: badoink, - baddaddypov: fullpornnetwork, badoinkvr: badoink, bamvisions, bang, @@ -205,17 +214,12 @@ module.exports = { cumlouder, dorcelclub: dorcel, doubleviewcasting: firstanalquest, - dtfsluts: fullpornnetwork, exploitedx, // only from known URL that will specify site firstanalquest, freeones, - girlfaction: fullpornnetwork, - hergape: fullpornnetwork, hitzefrei, - homemadeanalwhores: fullpornnetwork, hookuphotshot, inthecrack, - jamesdeen: fullpornnetwork, jerkaoke: modelmedia, julesjordan, karups, @@ -233,17 +237,13 @@ module.exports = { mariskax, missax, mylf: teamskeet, - mugfucked: fullpornnetwork, nebraskacoeds: elevatedx, - onlyprince: fullpornnetwork, pascalssubsluts, pervcity, dpdiva: pervcity, - pervertgallery: fullpornnetwork, porncz, pornhub, pornworld, - povperverts: fullpornnetwork, private: privateNetwork, realvr: badoink, rickysroom, diff --git a/src/scrapers/archangel.js b/src/scrapers/archangel.js deleted file mode 100755 index 3961e7ae..00000000 --- a/src/scrapers/archangel.js +++ /dev/null @@ -1,107 +0,0 @@ -'use strict'; - -const unprint = require('unprint'); - -const slugify = require('../utils/slugify'); -const { convert } = require('../utils/convert'); -const tryUrls = require('../utils/try-urls'); - -function scrapeAll(scenes, channel) { - return scenes.map(({ query }) => { - const release = {}; - - release.url = query.url('a'); - release.entryId = slugify(new URL(release.url).pathname.match(/trailers\/(.*)/)[1]); - - release.title = query.content('h2 a'); - - release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({ - name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null, { origin: channel.url }), - })); - - release.poster = query.img('.thumbnail img'); - release.teaser = query.video('.thumbnail img', { attribute: 'data-vid' }); // not a mistake, video source is on img tag - - return release; - }); -} - -async function fetchLatest(channel, page = 1) { - const url = `${channel.url}/porn-categories/movies/?sort=most-recent&page=${page}`; - const res = await unprint.get(url, { selectAll: '.content div[data-setid]' }); - - if (res.ok) { - return scrapeAll(res.context, channel); - } - - return res.status; -} - -function scrapeScene({ query }, { url }) { - const release = {}; - - release.entryId = slugify(new URL(url).pathname.match(/trailers\/(.*)/)[1]); - - release.title = query.content('h1.title_bar'); - release.description = query.content('.description-text'); - - release.date = query.date('//label[contains(text(), \'Date\')]/following-sibling::p[1]', 'YYYY-MM-DD'); - - release.actors = query.all('.text a[href*="/models"]').map((actorEl) => ({ - name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null), - })); - - release.tags = query.contents('.text a[href*="categories/"]'); - - release.poster = query.poster('#preview video'); - release.trailer = query.video('#preview video source'); - - return release; -} - -function scrapeProfile({ query }, { url }) { - const profile = { url }; - - const bio = Object.fromEntries(query.all('.model-details > div').map((bioEl) => [ - slugify(unprint.query.content(bioEl, 'h2'), '_'), - unprint.query.text(bioEl), - ])); - - profile.avatar = [ - query.img('.model_bio_thumb', { attribute: 'src0_3x' }), - query.img('.model_bio_thumb', { attribute: 'src0_2x' }), - query.img('.model_bio_thumb', { attribute: 'src0_1x' }), - ]; - - profile.description = [query.content('.model-bio-text'), bio.funfact].filter(Boolean).join(' '); - profile.aliases = bio.alias?.split(/[,\n]/).map((alias) => alias.trim()); - - profile.age = parseInt(bio.age, 10) || null; - profile.dateOfBirth = unprint.extractDate(bio.age, 'MM/DD/YYYY'); - profile.measurements = bio.measurements; - profile.height = Number(bio.height.match(/(\d+)\s*cm/)?.[1]) || convert(bio.height, 'cm'); - - return profile; -} - -async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) { - const { res, url } = await tryUrls([ - actorUrl, - `${entity.url}/models/${slugify(actorName, '')}.html`, - `${entity.url}/models/${slugify(actorName, '-')}.html`, - ]); - - if (res.ok) { - return scrapeProfile(res.context, { entity, include, url }); - } - - return res.status; -} - -module.exports = { - fetchLatest, - fetchProfile, - scrapeScene, -}; diff --git a/src/scrapers/fullpornnetwork.js b/src/scrapers/fullpornnetwork.js index c61dab87..a2d3a077 100755 --- a/src/scrapers/fullpornnetwork.js +++ b/src/scrapers/fullpornnetwork.js @@ -1,112 +1,115 @@ 'use strict'; -const { get, geta, ctxa } = require('../utils/q'); -const slugify = require('../utils/slugify'); +const unprint = require('unprint'); -function scrapeAll(scenes, site) { - return scenes.map(({ _el, qu }) => { +const slugify = require('../utils/slugify'); +const { convert } = require('../utils/convert'); +const tryUrls = require('../utils/try-urls'); + +function scrapeAll(scenes, channel) { + return scenes.map(({ query }) => { const release = {}; - // release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1]; - release.url = `${site.url}${qu.url('.scene-title a')}`; - release.entryId = new URL(release.url).pathname - .toLowerCase() - .replace(/\/$/, '') - .split('/') - .slice(-1)[0]; + release.url = query.url('a'); + release.entryId = slugify(new URL(release.url).pathname.match(/trailers\/(.*)/)[1]); - release.title = qu.q('.scene-title', true); - // release.description = qu.q('.title', 'title'); + release.title = query.content('h2 a'); + release.duration = query.duration('.video-data'); - // release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD'); - const minutes = qu.q('.scene-details', true).match(/(\d+) minutes/)[1]; - release.duration = Number(minutes) * 60; + release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: channel.url }), + })); - release.actors = qu.text('.update-models').trim().split(/\s*,\s*/g); + release.poster = query.img('.thumbnail img'); - const poster = qu.img('.scene-thumb img'); + const teaser = query.video('.thumbnail img', { attribute: 'data-vid' }); // not a mistake, video source is on img tag - if (poster) { - release.poster = [ - poster.replace('-1x', '-2x'), - poster, - ]; + if (!teaser?.includes('blur')) { // seemingly global SFW + release.teaser = teaser; } return release; }); } -function scrapeScene({ qu }, url, site) { - const release = { url }; +async function fetchLatest(channel, page = 1) { + const url = `${channel.url}/porn-categories/movies/?page=${page}&sort=most-recent`; // parameter order matters for some reason! + const res = await unprint.get(url, { selectAll: '.content div[data-setid]' }); - release.entryId = new URL(url).pathname - .toLowerCase() - .replace(/\/$/, '') - .split('/') - .slice(-1)[0]; + if (res.ok) { + return scrapeAll(res.context, channel); + } - release.title = qu.q('h4.text-center', true); - release.description = qu.q('p.hide-for-small-only', true); + return res.status; +} - release.actors = qu.all('a[href*="/model"]', true); - release.tags = qu.all('a[href*="/category"]', true); +function scrapeScene({ query }, { url, entity }) { + const release = {}; - const trailer = qu.video('source'); - if (trailer) release.trailer = { src: `${site.url}${trailer}` }; + release.entryId = slugify(new URL(url).pathname.match(/trailers\/(.*)/)[1]); + + release.title = query.content('h1.title_bar'); + release.description = query.content('.description-text, #description'); + + release.date = query.date('//label[contains(text(), \'Date\')]/following-sibling::p[1]', 'YYYY-MM-DD') + || query.date('//label[contains(text(), \'Date Added\')]/following-sibling::text()[1]', 'YYYY-MM-DD'); + + release.actors = query.all('#preview a[href*="/models"]').map((actorEl) => ({ + name: unprint.query.content(actorEl), + url: unprint.query.url(actorEl, null, { origin: entity.origin }), + })); + + release.tags = query.contents('#preview a[href*="categories/"]'); + + release.poster = query.poster('#preview video'); + release.trailer = query.video('#preview video source'); return release; } -function scrapeProfile({ el, qu }, actorName) { - if (slugify(qu.q('h1', true)) !== slugify(actorName)) { - // no 404 when actor is not found - return null; - } +function scrapeProfile({ query }, { url }) { + const profile = { url }; - const profile = {}; + const bio = Object.fromEntries(query.all('.model-details > div').map((bioEl) => [ + slugify(unprint.query.content(bioEl, 'h2'), '_'), + unprint.query.text(bioEl), + ])); - const description = qu.q('h4 + p', true); - if (description) profile.description = description; + profile.avatar = [ + query.img('.model_bio_thumb', { attribute: 'src0_3x' }), + query.img('.model_bio_thumb', { attribute: 'src0_2x' }), + query.img('.model_bio_thumb', { attribute: 'src0_1x' }), + query.img('.model_bio_thumb'), + ].filter(Boolean); - const avatar = qu.img('main img'); + profile.description = [query.content('.model-bio-text, #performer-description'), bio.funfact].filter(Boolean).join(' '); + profile.aliases = bio.alias?.split(/[,\n]/).map((alias) => alias.trim()); - if (avatar) { - profile.avatar = [ - avatar.replace('set-1x', 'set-2x'), - avatar, - ]; - } - - profile.releases = scrapeAll(ctxa(el, '.update, .scene-update')); + profile.age = parseInt(bio.age, 10) || null; + profile.dateOfBirth = unprint.extractDate(bio.age, 'MM/DD/YYYY'); + profile.measurements = bio.measurements; + profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || convert(bio.height, 'cm'); return profile; } -async function fetchLatest(site, page = 1) { - const url = `${site.url}/1/scenes/recent/${page}/`; - const res = await geta(url, '.latest-updates .update, .scene-update'); +async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) { + const { res, url } = await tryUrls([ + actorUrl, + `${entity.url}/models/${slugify(actorName, '')}.html`, + `${entity.url}/models/${slugify(actorName, '-')}.html`, + ]); - return res.ok ? scrapeAll(res.items, site) : res.status; -} + if (res.ok) { + return scrapeProfile(res.context, { entity, include, url }); + } -async function fetchScene(url, site) { - const res = await get(url, 'main'); - - return res.ok && res.item ? scrapeScene(res.item, url, site) : res.status; -} - -async function fetchProfile({ name: actorName }, { site }) { - const actorSlug = slugify(actorName, ''); - - const url = `${site.url}/1/model/${actorSlug}`; - const res = await get(url); - - return res.ok ? scrapeProfile(res.item, actorName) : res.status; + return res.status; } module.exports = { fetchLatest, - fetchScene, fetchProfile, + scrapeScene, }; diff --git a/src/scrapers/mariskax.js b/src/scrapers/mariskax.js index e323fc30..46aef32e 100644 --- a/src/scrapers/mariskax.js +++ b/src/scrapers/mariskax.js @@ -94,7 +94,7 @@ function scrapeProfile(data) { profile.gender = bio.gender; - profile.dateOfBirth = bio.birthdate; + profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD'); profile.age = bio.age; profile.placeOfBirth = bio.born; diff --git a/src/scrapers/pornhub.js b/src/scrapers/pornhub.js index 61f0edd4..f9486013 100755 --- a/src/scrapers/pornhub.js +++ b/src/scrapers/pornhub.js @@ -4,20 +4,13 @@ const { JSDOM } = require('jsdom'); const moment = require('moment'); const http = require('../utils/http'); - -const ethnicityMap = { - White: 'Caucasian', -}; - -const hairMap = { - Brunette: 'brown', -}; +const slugify = require('../utils/slugify'); async function scrapeProfile(html, _url, actorName) { const { document } = new JSDOM(html).window; const entries = Array.from(document.querySelectorAll('.infoPiece'), (el) => el.textContent.replace(/\n|\t/g, '').split(':')); - const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {}); + const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [slugify(key, '_')]: value.trim() } : acc), {}); const profile = { name: actorName, @@ -26,25 +19,26 @@ async function scrapeProfile(html, _url, actorName) { const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio'); const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img'); - if (bio.Gender) profile.gender = bio.Gender.toLowerCase(); - if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity; + if (bio.gender) profile.gender = bio.gender; + if (bio.ethnicity) profile.ethnicity = bio.ethnicity; if (descriptionString) profile.description = descriptionString.textContent; - if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020 - if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate(); + if (bio.birthday && !/-0001/.test(bio.birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020 + if (bio.born) profile.birthdate = moment.utc(bio.born, 'YYYY-MM-DD').toDate(); - profile.birthPlace = bio['Birth Place'] || bio.Birthplace; - profile.residencePlace = bio['City and Country']; + profile.birthPlace = bio.birth_place || bio.birthplace; + profile.residencePlace = bio.city_and_country; - if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-'); - if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No'; + if (bio.measurements && bio.measurements !== '--') profile.measurements = bio.measurements; + if (bio.fake_boobs) profile.naturalBoobs = bio.fake_boobs.toLowerCase() === 'no'; - if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1)); - if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1)); - if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase(); - if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes'; - if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes'; + if (bio.height) profile.height = Number(bio.height.match(/\(\d+/)[0].slice(1)); + if (bio.weight) profile.weight = Number(bio.weight.match(/\(\d+/)[0].slice(1)); + if (bio.hair_color) profile.hairColor = bio.hair_color; + if (bio.eyes) profile.eyeColor = bio.eye_color; + if (bio.piercings) profile.hasPiercings = bio.piercings.toLowerCase() === 'yes'; + if (bio.tattoos) profile.hasTattoos = bio.tattoos.toLowerCase() === 'yes'; if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src; profile.social = Array.from(document.querySelectorAll('.socialList a'), (el) => el.href).filter((link) => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason diff --git a/src/scrapers/releases.js b/src/scrapers/releases.js index 29ec6443..38f2ca2a 100644 --- a/src/scrapers/releases.js +++ b/src/scrapers/releases.js @@ -2,7 +2,7 @@ const adultempire = require('./adultempire'); const angelogodshackoriginal = require('./angelogodshackoriginal'); -const archangel = require('./archangel'); +// const archangel = require('./archangel'); const assylum = require('./assylum'); const amateurallure = require('./amateurallure'); const americanpornstar = require('./americanpornstar'); @@ -92,7 +92,6 @@ module.exports = { sexyhub: aylo, // daringsex, // arch angel - archangel, // etc amateurallure, americanpornstar, diff --git a/tests/profiles.js b/tests/profiles.js index e491989b..0bb1d2b3 100644 --- a/tests/profiles.js +++ b/tests/profiles.js @@ -182,6 +182,8 @@ const actors = [ // missax { entity: 'missax', name: 'Alexis Fawx', fields: ['avatar', 'description'] }, { entity: 'allherluv', name: 'Krissy Lynn', fields: ['avatar', 'description'] }, + // full porn network + { entity: 'povperverts', name: 'Krissy Lynn', fields: ['avatar', 'description'] }, // etc. { entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] }, { entity: 'archangel', name: 'Summer Brielle', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'measurements', 'height', 'aliases'] }, @@ -205,6 +207,7 @@ const actors = [ { entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] }, { entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] }, { entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] }, + { entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] }, ]; const actorScrapers = scrapers.actors;