Refactored Cum Louder scraper with unprint, changed entry ID to URL slug due unreliable ID.

This commit is contained in:
DebaucheryLibrarian
2026-02-08 00:58:49 +01:00
parent 79ff9eb58b
commit 4843f0ebc7
2 changed files with 75 additions and 67 deletions

View File

@@ -1,8 +1,8 @@
'use strict';
const unprint = require('unprint');
const { decode } = require('html-entities');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(items, _channel) {
@@ -12,13 +12,13 @@ function scrapeAll(items, _channel) {
const { date, precision } = query.dateAgo('.fecha');
const poster = query.img('.thumb');
release.entryId = query.number(null, /\d+/, 'onclick');
release.url = query.url(null, 'href', { origin: 'https://www.cumlouder.com' });
release.url = query.url(null, { origin: 'https://www.cumlouder.com' });
release.entryId = new URL(release.url).pathname.match(/video\/([\w-]+)/)?.[1];
release.date = date;
release.datePrecision = precision;
release.title = query.cnt('h2');
release.title = query.content('h2');
release.duration = query.duration('.minutos');
release.poster = [
@@ -30,26 +30,36 @@ function scrapeAll(items, _channel) {
});
}
function scrapeScene({ query }, channel, html) {
async function fetchLatest(channel, page) {
const res = await unprint.get(`${channel.url}/${page}/`, { selectAll: '.muestra-escena' });
if (res.ok) {
return scrapeAll(res.context, channel);
}
return res.status;
}
function scrapeScene({ query, html }, url) {
const release = {};
const { date, precision } = query.dateAgo('.sub-video .added');
release.entryId = html.match(/cumlouder_(\d+)/)?.[1];
release.entryId = new URL(url).pathname.match(/video\/([\w-]+)/)?.[1];
release.title = query.cnt('.video-top h1');
release.title = query.content('.video-top h1');
release.description = query.text('.sub-video p');
release.date = date;
release.datePrecision = precision;
release.actors = query.all('.sub-video .pornstar-link').map((el) => ({
name: query.cnt(el, null),
url: query.url(el, null, 'href', { origin: 'https://www.cumlouder.com' }),
name: unprint.query.content(el, null),
url: unprint.query.url(el, null, { origin: 'https://www.cumlouder.com' }),
}));
release.duration = query.duration('.video-top .duracion');
release.tags = query.cnts('.video-top .tag-link');
release.tags = query.contents('.video-top .tag-link');
release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1];
release.video = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]); // no trailers but full-length videos
@@ -59,55 +69,47 @@ function scrapeScene({ query }, channel, html) {
return release;
}
function scrapeProfile({ query, el }, channel) {
async function fetchScene(url, channel) {
const res = await unprint.get(url);
if (res.ok) {
return scrapeScene(res.context, url, channel);
}
return res.status;
}
function scrapeProfile({ query }, channel) {
const profile = {};
const bio = query.all('.data-bio li').reduce((acc, bioEl) => ({
...acc,
[slugify(query.cnt(bioEl, 'strong'), '_')]: query.text(bioEl),
[slugify(unprint.query.content(bioEl, 'strong'), '_')]: unprint.query.text(bioEl),
}), {});
profile.nationality = bio.nationality;
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
profile.height = Number(bio.height) * 100;
profile.weight = parseInt(bio.weight, 10);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
profile.description = query.cnt('.data-bio p:last-of-type');
profile.description = query.content('.data-bio p:last-of-type');
profile.avatar = query.img('.thumb-bio');
profile.scenes = scrapeAll(qu.initAll(el, '.muestra-escena'), channel);
profile.socials = query.urls('a.twitter-timeline');
profile.scenes = scrapeAll(unprint.initAll(query.all('.muestra-escena')), channel);
return profile;
}
async function fetchLatest(channel, page) {
const res = await qu.getAll(`${channel.url}/${page}/`, '.muestra-escena');
if (res.ok) {
return scrapeAll(res.items, channel);
}
return res.status;
}
async function fetchScene(url, channel) {
const res = await qu.get(url);
if (res.ok) {
return scrapeScene(res.item, channel, res.html);
}
return res.status;
}
async function fetchProfile(actor, channel) {
const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas');
const res = await unprint.get(`https://www.cumlouder.com/girl/${actor.slug}/`, { select: '.listado-escenas' });
if (res.ok) {
return scrapeProfile(res.item, channel);
return scrapeProfile(res.context, channel);
}
return res.status;

View File

@@ -217,45 +217,47 @@ const actors = [
{ entity: 'modelmediaasia', name: 'Li WeiWei', fields: ['avatar', 'entryId', 'gender', 'alias', 'height', 'weight', 'bust', 'waist', 'hip', 'socials'] },
{ entity: 'delphine', name: 'Bridgette B', fields: ['avatar', 'measurements', 'birthPlace'] },
// etc.
{ entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
{ entity: 'analvids', name: 'Veronica Leal', fields: ['avatar', 'gender', 'birthCountry', 'nationality', 'age', 'aliases', 'nationality'] },
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
{ entity: 'bangbros', name: 'Kira Perez', fields: ['avatar', 'gender', 'ethnicity', 'hairColor'] },
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
{ entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] },
{ entity: 'cumlouder', name: 'Valentina Nappi', fields: ['avatar', 'nationality', 'dateOfBirth', 'height', 'weight', 'eyes', 'hairColor', 'description', 'socials'] },
{ entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] },
{ entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
{ entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
{ entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] },
{ entity: 'hookuphotshot', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
{ entity: 'inthecrack', name: 'Vicki Chase', fields: ['dateOfBirth', 'height', 'weight', 'ethnicity', 'birthPlace'] },
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
{ entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] },
{ entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] },
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
{ entity: 'pornworld', name: 'Veronica Leal', fields: ['avatar', 'nationality', 'age'] },
{ entity: 'private', name: 'Cherry Kiss', fields: ['avatar', 'description', 'nationality', 'measurements', 'height', 'weight', 'hairColor', 'eye', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings'] },
{ entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
{ entity: 'teenmegaworld', name: 'Sheri Vi', fields: ['avatar', 'description', 'hairColor', 'eyes'] },
{ entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] },
{ entity: 'theflourishxxx', name: 'XWifeKaren', fields: ['avatar', 'description'] },
{ entity: 'tokyohot', name: 'Mai Kawana', url: 'https://my.tokyo-hot.com/cast/2099/', fields: ['avatar', 'birthPlace', 'height', 'cup', 'bust', 'waist', 'hip', 'hairStyle', 'shoeSize', 'bloodType'] },
{ entity: 'rickysroom', name: 'Liz Jordan', fields: ['avatar', 'description', 'birthPlace', 'dateOfBirth', 'measurements', 'height', 'weight', 'eyes', 'hairColor'] },
{ entity: 'cherrypimps', name: 'Andi Avalon', fields: ['avatar', 'height', 'weight', 'dateOfBirth', 'birthPlace', 'ethnicity', 'measurements', 'hair', 'eyes', 'hasTattoos', 'age'] },
{ entity: 'testedefudelidade', name: 'May Akemi', fields: ['avatar'] },
{ entity: 'sexlikereal', name: 'Agatha Vega', fields: ['avatar', 'birthPlace', 'height', 'weight', 'description'] },
{ entity: 'porncz', name: 'Kama Oxi', fields: ['avatar', 'gender', 'birthCountry', 'ethnicity', 'age', 'hairColor', 'cup', 'naturalBoobs', 'hasTattoos'] },
{ entity: 'score', name: 'Vanessa Blue', fields: ['avatar', 'gender', 'placeOfResidence', 'ethnicity', 'height', 'weight', 'measurements', 'hairColor', 'dateOfBirth'] },
{ entity: 'pierrewoodman', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
{ entity: 'wakeupnfuck', name: 'Abby Lee Brazil', fields: ['avatar', 'nationality'] },
{ entity: 'dorcelclub', name: 'Clea Gaultier', fields: ['avatar'] },
{ entity: 'hitzefrei', name: 'Jolee Love', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'measurements', 'height', 'weight', 'eyes', 'hair', 'description'] },
{ entity: 'mariskax', name: 'Honey Demon', fields: ['avatar', 'gender', 'dateOfBirth', 'placeOfBirth', 'measurements', 'height', 'weight', 'hairColor', 'eyes'] },
{ entity: 'pornhub', name: 'Lexi Luna', fields: ['avatar', 'gender', 'ethnicity', 'description', 'birthPlace', 'measurements', 'naturalBoobs', 'height', 'weight', 'hairColor', 'hasPiercings', 'hasTattoos'] },
{ entity: 'fullpornnetwork', name: 'Kenzie Reeves', fields: ['avatar', 'description'] },
{ entity: 'meidenvanholland', name: 'Izzy Bizzy Bang Bang', fields: ['avatar', 'description'] },
{ entity: 'karups', name: 'Peach Lollypop', fields: ['avatar'] },
{ entity: 'boyfun', name: 'Amahd Passer', fields: ['avatar', 'age', 'height', 'weight', 'penisLength', 'isCircumcised'] },
{ entity: 'bang', name: 'Riley Reid', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'ethnicity', 'hairColor', 'eyes'] },
{ entity: 'littlecapricedreams', name: 'Littlecaprice', fields: ['avatar', 'nationality', 'cup', 'measurements', 'height', 'description'] }, // sic
{ entity: 'pascalssubsluts', name: 'Zlata Shine', fields: ['avatar', 'gender', 'nationality', 'hairColor', 'height', 'description'] },
{ entity: 'nebraskacoeds', name: 'Mary Beth Haglin', fields: ['avatar'] },
{ entity: 'firstanalquest', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
{ entity: 'doubleviewcasting', name: 'Abigaile Johnson', fields: ['avatar', 'dateOfBirth', 'birthPlace', 'weight', 'height', 'measurements'] },
{ entity: 'boobpedia', name: 'Paige British', fields: ['avatar'] },
{ entity: 'angelogodshackoriginal', name: 'Emily Pink', fields: ['avatar'] },
{ entity: 'bradmontana', name: 'Alicia Ribeiro', fields: ['avatar', 'gender'] },
{ entity: 'adultempire', name: 'Abella Danger', fields: ['avatar', 'description', 'measurements', 'eyes', 'height', 'weight'] },
{ entity: 'freeones', name: 'Sophia Locke', fields: ['avatar', 'description', 'dateOfBirth', 'age', 'birthPlace', 'nationality', 'ethnicity', 'eyes', 'hairColor', 'bust', 'cup', 'waist', 'hip', 'height', 'weight', 'foot', 'socials', 'hasTattoos', 'tattoos', 'hasPiercings', 'piercings', 'naturalBoobs'] },
];
const actorScrapers = scrapers.actors;
const source = argv.source?.[0] || null;
const sources = argv.sources || null;
async function validateUrl(url, mime = 'image/') {
if (!url) {
@@ -336,7 +338,7 @@ async function init() {
return;
}
if (source && source !== entitySlug) {
if (sources && !sources.includes(entitySlug)) {
// console.log('____', entitySlug);
return;
}
@@ -363,8 +365,10 @@ async function init() {
assert.fail('profile not found');
}
console.log(omit(profile, ['scenes']));
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
if (argv.inspect) {
console.log(omit(profile, ['scenes']));
console.log('Untested fields', Object.entries(profile).filter(([field, value]) => !actor.fields.includes(field) && typeof value !== 'undefined' && value !== null).map(([field]) => `'${field}'`).join(', '));
}
await Promise.all(actor.fields.map(async (field) => {
assert.ok(
@@ -378,6 +382,8 @@ async function init() {
});
}, Promise.resolve());
console.log(actors.length);
await knex.destroy();
}