Moved Arch Angel to Full Porn Network and adapted scraper.

This commit is contained in:
DebaucheryLibrarian 2026-01-20 04:28:49 +01:00
parent b2116f728f
commit 2a4dce106e
8 changed files with 113 additions and 219 deletions

View File

@ -817,6 +817,8 @@ const sites = [
{
slug: 'archangel',
name: 'ArchAngel',
parent: 'fullpornnetwork',
independent: true,
url: 'https://www.archangelvideo.com',
},
// ASSYLUM

View File

@ -2,7 +2,6 @@
const adultempire = require('./adultempire');
const angelogodshackoriginal = require('./angelogodshackoriginal');
const archangel = require('./archangel');
const americanpornstar = require('./americanpornstar');
const aziani = require('./aziani');
const badoink = require('./badoink');
@ -178,20 +177,30 @@ module.exports = {
// naughty america
naughtyamerica,
tonightsgirlfriend: naughtyamerica,
// full porn network
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
archangel: fullpornnetwork,
baddaddypov: fullpornnetwork,
dtfsluts: fullpornnetwork,
girlfaction: fullpornnetwork,
hergape: fullpornnetwork,
homemadeanalwhores: fullpornnetwork,
jamesdeen: fullpornnetwork,
mugfucked: fullpornnetwork,
onlyprince: fullpornnetwork,
pervertgallery: fullpornnetwork,
povperverts: fullpornnetwork,
// etc
'18vr': badoink,
theflourishxxx: theflourish,
adultempire,
archangel,
allherluv: missax,
americanpornstar,
analbbc: fullpornnetwork,
analized: fullpornnetwork,
analviolation: fullpornnetwork,
angelogodshackoriginal,
asiam: modelmedia,
babevr: badoink,
baddaddypov: fullpornnetwork,
badoinkvr: badoink,
bamvisions,
bang,
@ -205,17 +214,12 @@ module.exports = {
cumlouder,
dorcelclub: dorcel,
doubleviewcasting: firstanalquest,
dtfsluts: fullpornnetwork,
exploitedx, // only from known URL that will specify site
firstanalquest,
freeones,
girlfaction: fullpornnetwork,
hergape: fullpornnetwork,
hitzefrei,
homemadeanalwhores: fullpornnetwork,
hookuphotshot,
inthecrack,
jamesdeen: fullpornnetwork,
jerkaoke: modelmedia,
julesjordan,
karups,
@ -233,17 +237,13 @@ module.exports = {
mariskax,
missax,
mylf: teamskeet,
mugfucked: fullpornnetwork,
nebraskacoeds: elevatedx,
onlyprince: fullpornnetwork,
pascalssubsluts,
pervcity,
dpdiva: pervcity,
pervertgallery: fullpornnetwork,
porncz,
pornhub,
pornworld,
povperverts: fullpornnetwork,
private: privateNetwork,
realvr: badoink,
rickysroom,

View File

@ -1,107 +0,0 @@
'use strict';
const unprint = require('unprint');
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
const tryUrls = require('../utils/try-urls');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('a');
release.entryId = slugify(new URL(release.url).pathname.match(/trailers\/(.*)/)[1]);
release.title = query.content('h2 a');
release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
release.poster = query.img('.thumbnail img');
release.teaser = query.video('.thumbnail img', { attribute: 'data-vid' }); // not a mistake, video source is on img tag
return release;
});
}
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/porn-categories/movies/?sort=most-recent&page=${page}`;
const res = await unprint.get(url, { selectAll: '.content div[data-setid]' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query }, { url }) {
const release = {};
release.entryId = slugify(new URL(url).pathname.match(/trailers\/(.*)/)[1]);
release.title = query.content('h1.title_bar');
release.description = query.content('.description-text');
release.date = query.date('//label[contains(text(), \'Date\')]/following-sibling::p[1]', 'YYYY-MM-DD');
release.actors = query.all('.text a[href*="/models"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.tags = query.contents('.text a[href*="categories/"]');
release.poster = query.poster('#preview video');
release.trailer = query.video('#preview video source');
return release;
}
function scrapeProfile({ query }, { url }) {
const profile = { url };
const bio = Object.fromEntries(query.all('.model-details > div').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'h2'), '_'),
unprint.query.text(bioEl),
]));
profile.avatar = [
query.img('.model_bio_thumb', { attribute: 'src0_3x' }),
query.img('.model_bio_thumb', { attribute: 'src0_2x' }),
query.img('.model_bio_thumb', { attribute: 'src0_1x' }),
];
profile.description = [query.content('.model-bio-text'), bio.funfact].filter(Boolean).join(' ');
profile.aliases = bio.alias?.split(/[,\n]/).map((alias) => alias.trim());
profile.age = parseInt(bio.age, 10) || null;
profile.dateOfBirth = unprint.extractDate(bio.age, 'MM/DD/YYYY');
profile.measurements = bio.measurements;
profile.height = Number(bio.height.match(/(\d+)\s*cm/)?.[1]) || convert(bio.height, 'cm');
return profile;
}
async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) {
const { res, url } = await tryUrls([
actorUrl,
`${entity.url}/models/${slugify(actorName, '')}.html`,
`${entity.url}/models/${slugify(actorName, '-')}.html`,
]);
if (res.ok) {
return scrapeProfile(res.context, { entity, include, url });
}
return res.status;
}
module.exports = {
fetchLatest,
fetchProfile,
scrapeScene,
};

View File

@ -1,112 +1,115 @@
'use strict';
const { get, geta, ctxa } = require('../utils/q');
const slugify = require('../utils/slugify');
const unprint = require('unprint');
function scrapeAll(scenes, site) {
return scenes.map(({ _el, qu }) => {
const slugify = require('../utils/slugify');
const { convert } = require('../utils/convert');
const tryUrls = require('../utils/try-urls');
function scrapeAll(scenes, channel) {
return scenes.map(({ query }) => {
const release = {};
// release.entryId = el.dataset.setid || qu.q('.update_thumb', 'id').match(/\w+-\w+-(\d+)-\d+/)[1];
release.url = `${site.url}${qu.url('.scene-title a')}`;
release.entryId = new URL(release.url).pathname
.toLowerCase()
.replace(/\/$/, '')
.split('/')
.slice(-1)[0];
release.url = query.url('a');
release.entryId = slugify(new URL(release.url).pathname.match(/trailers\/(.*)/)[1]);
release.title = qu.q('.scene-title', true);
// release.description = qu.q('.title', 'title');
release.title = query.content('h2 a');
release.duration = query.duration('.video-data');
// release.date = qu.date('.video-data > span:last-child', 'YYYY-MM-DD');
const minutes = qu.q('.scene-details', true).match(/(\d+) minutes/)[1];
release.duration = Number(minutes) * 60;
release.actors = query.all('a[href*="models/"], a[href*="sets.php"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: channel.url }),
}));
release.actors = qu.text('.update-models').trim().split(/\s*,\s*/g);
release.poster = query.img('.thumbnail img');
const poster = qu.img('.scene-thumb img');
const teaser = query.video('.thumbnail img', { attribute: 'data-vid' }); // not a mistake, video source is on img tag
if (poster) {
release.poster = [
poster.replace('-1x', '-2x'),
poster,
];
if (!teaser?.includes('blur')) { // seemingly global SFW
release.teaser = teaser;
}
return release;
});
}
function scrapeScene({ qu }, url, site) {
const release = { url };
async function fetchLatest(channel, page = 1) {
const url = `${channel.url}/porn-categories/movies/?page=${page}&sort=most-recent`; // parameter order matters for some reason!
const res = await unprint.get(url, { selectAll: '.content div[data-setid]' });
release.entryId = new URL(url).pathname
.toLowerCase()
.replace(/\/$/, '')
.split('/')
.slice(-1)[0];
if (res.ok) {
return scrapeAll(res.context, channel);
}
release.title = qu.q('h4.text-center', true);
release.description = qu.q('p.hide-for-small-only', true);
return res.status;
}
release.actors = qu.all('a[href*="/model"]', true);
release.tags = qu.all('a[href*="/category"]', true);
function scrapeScene({ query }, { url, entity }) {
const release = {};
const trailer = qu.video('source');
if (trailer) release.trailer = { src: `${site.url}${trailer}` };
release.entryId = slugify(new URL(url).pathname.match(/trailers\/(.*)/)[1]);
release.title = query.content('h1.title_bar');
release.description = query.content('.description-text, #description');
release.date = query.date('//label[contains(text(), \'Date\')]/following-sibling::p[1]', 'YYYY-MM-DD')
|| query.date('//label[contains(text(), \'Date Added\')]/following-sibling::text()[1]', 'YYYY-MM-DD');
release.actors = query.all('#preview a[href*="/models"]').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null, { origin: entity.origin }),
}));
release.tags = query.contents('#preview a[href*="categories/"]');
release.poster = query.poster('#preview video');
release.trailer = query.video('#preview video source');
return release;
}
function scrapeProfile({ el, qu }, actorName) {
if (slugify(qu.q('h1', true)) !== slugify(actorName)) {
// no 404 when actor is not found
return null;
}
function scrapeProfile({ query }, { url }) {
const profile = { url };
const profile = {};
const bio = Object.fromEntries(query.all('.model-details > div').map((bioEl) => [
slugify(unprint.query.content(bioEl, 'h2'), '_'),
unprint.query.text(bioEl),
]));
const description = qu.q('h4 + p', true);
if (description) profile.description = description;
profile.avatar = [
query.img('.model_bio_thumb', { attribute: 'src0_3x' }),
query.img('.model_bio_thumb', { attribute: 'src0_2x' }),
query.img('.model_bio_thumb', { attribute: 'src0_1x' }),
query.img('.model_bio_thumb'),
].filter(Boolean);
const avatar = qu.img('main img');
profile.description = [query.content('.model-bio-text, #performer-description'), bio.funfact].filter(Boolean).join(' ');
profile.aliases = bio.alias?.split(/[,\n]/).map((alias) => alias.trim());
if (avatar) {
profile.avatar = [
avatar.replace('set-1x', 'set-2x'),
avatar,
];
}
profile.releases = scrapeAll(ctxa(el, '.update, .scene-update'));
profile.age = parseInt(bio.age, 10) || null;
profile.dateOfBirth = unprint.extractDate(bio.age, 'MM/DD/YYYY');
profile.measurements = bio.measurements;
profile.height = Number(bio.height?.match(/(\d+)\s*cm/)?.[1]) || convert(bio.height, 'cm');
return profile;
}
async function fetchLatest(site, page = 1) {
const url = `${site.url}/1/scenes/recent/${page}/`;
const res = await geta(url, '.latest-updates .update, .scene-update');
async function fetchProfile({ name: actorName, url: actorUrl }, { entity, include }) {
const { res, url } = await tryUrls([
actorUrl,
`${entity.url}/models/${slugify(actorName, '')}.html`,
`${entity.url}/models/${slugify(actorName, '-')}.html`,
]);
return res.ok ? scrapeAll(res.items, site) : res.status;
}
if (res.ok) {
return scrapeProfile(res.context, { entity, include, url });
}
async function fetchScene(url, site) {
const res = await get(url, 'main');
return res.ok && res.item ? scrapeScene(res.item, url, site) : res.status;
}
async function fetchProfile({ name: actorName }, { site }) {
const actorSlug = slugify(actorName, '');
const url = `${site.url}/1/model/${actorSlug}`;
const res = await get(url);
return res.ok ? scrapeProfile(res.item, actorName) : res.status;
return res.status;
}
module.exports = {
fetchLatest,
fetchScene,
fetchProfile,
scrapeScene,
};

View File

@ -94,7 +94,7 @@ function scrapeProfile(data) {
profile.gender = bio.gender;
profile.dateOfBirth = bio.birthdate;
profile.dateOfBirth = unprint.extractDate(bio.birthdate, 'YYYY-MM-DD');
profile.age = bio.age;
profile.placeOfBirth = bio.born;

View File

@ -4,20 +4,13 @@ const { JSDOM } = require('jsdom');
const moment = require('moment');
const http = require('../utils/http');
const ethnicityMap = {
White: 'Caucasian',
};
const hairMap = {
Brunette: 'brown',
};
const slugify = require('../utils/slugify');
async function scrapeProfile(html, _url, actorName) {
const { document } = new JSDOM(html).window;
const entries = Array.from(document.querySelectorAll('.infoPiece'), (el) => el.textContent.replace(/\n|\t/g, '').split(':'));
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [key.trim()]: value.trim() } : acc), {});
const bio = entries.reduce((acc, [key, value]) => (key ? { ...acc, [slugify(key, '_')]: value.trim() } : acc), {});
const profile = {
name: actorName,
@ -26,25 +19,26 @@ async function scrapeProfile(html, _url, actorName) {
const descriptionString = document.querySelector('div[itemprop="description"]') || document.querySelector('.longBio');
const avatarEl = document.querySelector('#getAvatar') || document.querySelector('.thumbImage img');
if (bio.Gender) profile.gender = bio.Gender.toLowerCase();
if (bio.ethnicity) profile.ethnicity = ethnicityMap[bio.Ethnicity] || bio.Ethnicity;
if (bio.gender) profile.gender = bio.gender;
if (bio.ethnicity) profile.ethnicity = bio.ethnicity;
if (descriptionString) profile.description = descriptionString.textContent;
if (bio.Birthday && !/-0001/.test(bio.Birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.Born) profile.birthdate = moment.utc(bio.Born, 'YYYY-MM-DD').toDate();
if (bio.birthday && !/-0001/.test(bio.birthday)) profile.birthdate = moment.utc(bio.Birthday, 'MMM D, YYYY').toDate(); // birthyear sometimes -0001, see Spencer Bradley as of january 2020
if (bio.born) profile.birthdate = moment.utc(bio.born, 'YYYY-MM-DD').toDate();
profile.birthPlace = bio['Birth Place'] || bio.Birthplace;
profile.residencePlace = bio['City and Country'];
profile.birthPlace = bio.birth_place || bio.birthplace;
profile.residencePlace = bio.city_and_country;
if (bio.Measurements && bio.Measurements !== '--') [profile.bust, profile.waist, profile.hip] = bio.Measurements.split('-');
if (bio['Fake Boobs']) profile.naturalBoobs = bio['Fake Boobs'] === 'No';
if (bio.measurements && bio.measurements !== '--') profile.measurements = bio.measurements;
if (bio.fake_boobs) profile.naturalBoobs = bio.fake_boobs.toLowerCase() === 'no';
if (bio.Height) profile.height = Number(bio.Height.match(/\(\d+/)[0].slice(1));
if (bio.Weight) profile.weight = Number(bio.Weight.match(/\(\d+/)[0].slice(1));
if (bio['Hair Color']) profile.hair = hairMap[bio['Hair Color']] || bio['Hair Color'].toLowerCase();
if (bio.Piercings) profile.hasPiercings = bio.Piercings === 'Yes';
if (bio.Tattoos) profile.hasTattoos = bio.Tattoos === 'Yes';
if (bio.height) profile.height = Number(bio.height.match(/\(\d+/)[0].slice(1));
if (bio.weight) profile.weight = Number(bio.weight.match(/\(\d+/)[0].slice(1));
if (bio.hair_color) profile.hairColor = bio.hair_color;
if (bio.eyes) profile.eyeColor = bio.eye_color;
if (bio.piercings) profile.hasPiercings = bio.piercings.toLowerCase() === 'yes';
if (bio.tattoos) profile.hasTattoos = bio.tattoos.toLowerCase() === 'yes';
if (avatarEl && !/default\//.test(avatarEl.src)) profile.avatar = avatarEl.src;
profile.social = Array.from(document.querySelectorAll('.socialList a'), (el) => el.href).filter((link) => link !== 'https://www.twitter.com/'); // PH links to Twitter itself for some reason

View File

@ -2,7 +2,7 @@
const adultempire = require('./adultempire');
const angelogodshackoriginal = require('./angelogodshackoriginal');
const archangel = require('./archangel');
// const archangel = require('./archangel');
const assylum = require('./assylum');
const amateurallure = require('./amateurallure');
const americanpornstar = require('./americanpornstar');
@ -92,7 +92,6 @@ module.exports = {
sexyhub: aylo,
// daringsex,
// arch angel
archangel,
// etc
amateurallure,
americanpornstar,

View File

@ -182,6 +182,8 @@ const actors = [
// missax
{ entity: 'missax', name: 'Alexis Fawx', fields: ['avatar', 'description'] },
{ entity: 'allherluv', name: 'Krissy Lynn', fields: ['avatar', 'description'] },
// full porn network
{ entity: 'povperverts', name: 'Krissy Lynn', fields: ['avatar', 'description'] },
// etc.
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
{ entity: 'archangel', name: 'Summer Brielle', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'measurements', 'height', 'aliases'] },
@ -205,6 +207,7 @@ const actors = [
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
];
const actorScrapers = scrapers.actors;