Finished Cum Louder scraper, updated Vixen scraper. Added tag posters.

This commit is contained in:
DebaucheryLibrarian
2021-08-17 19:25:10 +02:00
parent 715e44cf21
commit f00e37490c
30 changed files with 34 additions and 22 deletions

View File

@@ -218,7 +218,7 @@ async function fetchEntitiesBySlug(entitySlugs, sort = 'asc') {
const entitiesBySlug = entities.rows.reduce((accEntities, { entity }) => ({
...accEntities,
[entity.slug]: accEntities[entity.slug] || curateEntity(entity, true),
[urlToSiteSlug(entity.url)]: accEntities[entity.slug] || curateEntity(entity, true),
[urlToSiteSlug(entity.url)]: accEntities[urlToSiteSlug(entity.url)] || curateEntity(entity, true),
}), {});
return entitiesBySlug;

View File

@@ -3,6 +3,7 @@
const { decode } = require('html-entities');
const qu = require('../utils/qu');
const slugify = require('../utils/slugify');
function scrapeAll(items, _channel) {
return items.map(({ query }) => {
@@ -25,8 +26,6 @@ function scrapeAll(items, _channel) {
poster,
];
console.log(release);
return release;
});
}
@@ -53,21 +52,33 @@ function scrapeScene({ query }, channel, html) {
release.tags = query.cnts('.video-top .tag-link');
release.poster = query.poster() || html.match(/urlImg\s*=\s*'(.*)';/)?.[1];
release.trailer = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]);
release.video = query.video() || decode(html.match(/urlVideo\s*=\s*'(.*)';/)?.[1]); // no trailers but full-length videos
release.shootId = release.poster?.match(/\/rc(\d+)/)?.[1] || release.trailer?.match(/\/episodio_(\d+)/)?.[1];
release.shootId = release.poster?.match(/\/rc(\d+)/)?.[1] || release.video?.match(/\/episodio_(\d+)/)?.[1];
console.log(release);
return release;
}
function scrapeProfile({ query }) {
function scrapeProfile({ query, el }, channel) {
const profile = {};
const bio = query.all('.data-bio li').reduce((acc, bioEl) => ({
...acc,
[slugify(query.cnt(bioEl, 'strong'), '_')]: query.text(bioEl),
}), {});
profile.nationality = bio.nationality;
profile.dateOfBirth = qu.extractDate(bio.date_of_birth, 'DD-MM-YYYY');
profile.height = Number(bio.height) * 100;
profile.weight = parseInt(bio.weight, 10);
profile.eyes = bio.eye_color;
profile.hairColor = bio.hair_color;
profile.description = query.cnt('.data-bio p:last-of-type');
profile.avatar = query.img('.thumb-bio');
console.log(profile);
profile.scenes = scrapeAll(qu.initAll(el, '.muestra-escena'), channel);
return profile;
}
@@ -92,11 +103,11 @@ async function fetchScene(url, channel) {
return res.status;
}
async function fetchProfile(actor) {
async function fetchProfile(actor, channel) {
const res = await qu.get(`https://www.cumlouder.com/girl/${actor.slug}/`, '.listado-escenas');
if (res.ok) {
return scrapeProfile(res.item);
return scrapeProfile(res.item, channel);
}
return res.status;

View File

@@ -204,7 +204,7 @@ function scrapeUpcoming(scene, site) {
release.poster = getPosterFallbacks(scene.images.poster);
release.teaser = getTeaserFallbacks(scene.previews.poster);
release.entryId = (release.poster[0] || release.teaser[0])?.match(/\/(\d+)/)?.[1];
release.entryId = (release.poster[0] || release.teaser[0])?.src?.match(/\/(\d+)/)?.[1];
return [release];
}