Refactored Cum Louder scraper with unprint, changed entry ID to URL slug due unreliable ID.
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
'use strict';
|
||||
|
||||
const unprint = require('unprint');
|
||||
const { decode } = require('html-entities');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeAll(items, _channel) {
|
||||
@@ -12,13 +12,13 @@ function scrapeAll(items, _channel) {
|
||||
const { date, precision } = query.dateAgo('.fecha');
|
||||
const poster = query.img('.thumb');
|
||||
|
||||
release.entryId = query.number(null, /\d+/, 'onclick');
|
||||
release.url = query.url(null, 'href', { origin: 'https://www.cumlouder.com' });
|
||||
release.url = query.url(null, { origin: 'https://www.cumlouder.com' });
|
||||
release.entryId = new URL(release.url).pathname.match(/video\/([\w-]+)/)?.[1];
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
|
||||
release.title = query.cnt('h2');
|
||||
release.title = query.content('h2');
|
||||
release.duration = query.duration('.minutos');
|
||||
|
||||
release.poster = [
|
||||
@@ -30,26 +30,36 @@ function scrapeAll(items, _channel) {
|
||||
});
|
||||
}
|
||||
|
||||
function scrapeScene({ query }, channel, html) {
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await unprint.get(`${channel.url}/${page}/`, { selectAll: '.muestra-escena' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeScene({ query, html }, url) {
|
||||
const release = {};
|
||||
|
||||
const { date, precision } = query.dateAgo('.sub-video .added');
|
||||
|
||||
release.entryId = html.match(/cumlouder_(\d+)/)?.[1];
|
||||
release.entryId = new URL(url).pathname.match(/video\/([\w-]+)/)?.[1];
|
||||
|
||||
release.title = query.cnt('.video-top h1');
|
||||
release.title = query.content('.video-top h1');
|
||||
release.description = query.text('.sub-video p');
|
||||
|
||||
release.date = date;
|
||||
release.datePrecision = precision;
|
||||
|
||||
release.actors = query.all('.sub-video .pornstar-link').map((el) => ({
|
||||
name: query.cnt(el, null),
|
||||
url: query.url(el, null, 'href', { origin: 'https://www.cumlouder.com' }),
|
||||
name: unprint.query.content(el, null),
|
||||
url: unprint.query.url(el, null, { origin: 'https://www.cumlouder.com' }),
|
||||
}));
|
||||
|
||||
release.duration = query.duration('.video-top .duracion');
|
||||
release.tags = query.cnts('.video-top .tag-link');
|
||||
release.tags = query.contents('.video-top .tag-link');
|
||||
|
||||
release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1];
|
||||
release.video = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]); // no trailers but full-length videos
|
||||
@@ -59,55 +69,47 @@ function scrapeScene({ query }, channel, html) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query, el }, channel) {
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await unprint.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.context, url, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeProfile({ query }, channel) {
|
||||
const profile = {};
|
||||
|
||||
const bio = query.all('.data-bio li').reduce((acc, bioEl) => ({
|
||||
...acc,
|
||||
[slugify(query.cnt(bioEl, 'strong'), '_')]: query.text(bioEl),
|
||||
[slugify(unprint.query.content(bioEl, 'strong'), '_')]: unprint.query.text(bioEl),
|
||||
}), {});
|
||||
|
||||
profile.nationality = bio.nationality;
|
||||
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
|
||||
profile.dateOfBirth = unprint.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
|
||||
|
||||
profile.height = Number(bio.height) * 100;
|
||||
profile.weight = parseInt(bio.weight, 10);
|
||||
profile.eyes = bio.eye_color;
|
||||
profile.hairColor = bio.hair_color;
|
||||
|
||||
profile.description = query.cnt('.data-bio p:last-of-type');
|
||||
profile.description = query.content('.data-bio p:last-of-type');
|
||||
profile.avatar = query.img('.thumb-bio');
|
||||
|
||||
profile.scenes = scrapeAll(qu.initAll(el, '.muestra-escena'), channel);
|
||||
profile.socials = query.urls('a.twitter-timeline');
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.muestra-escena')), channel);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(channel, page) {
|
||||
const res = await qu.getAll(`${channel.url}/${page}/`, '.muestra-escena');
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.items, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, channel, res.html);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile(actor, channel) {
|
||||
const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas');
|
||||
const res = await unprint.get(`https://www.cumlouder.com/girl/${actor.slug}/`, { select: '.listado-escenas' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.item, channel);
|
||||
return scrapeProfile(res.context, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
|
||||
Reference in New Issue
Block a user