Improved Little Caprice Dreams scraper.

This commit is contained in:
DebaucheryLibrarian 2020-11-28 00:46:30 +01:00
parent 9a183c7ffb
commit 71c884fe48
5 changed files with 82 additions and 11 deletions

View File

@ -166,6 +166,7 @@ module.exports = {
'hitzefrei', 'hitzefrei',
'porncz', 'porncz',
'czechav', 'czechav',
'littlecapricedreams',
'gangbangcreampie', 'gangbangcreampie',
'gloryholesecrets', 'gloryholesecrets',
'aziani', 'aziani',

View File

@ -661,7 +661,7 @@ async function scrapeActors(argNames) {
logger.info(`Scraping profiles for ${actorNames.length} actors`); logger.info(`Scraping profiles for ${actorNames.length} actors`);
const sources = argv.actorsSources || config.profiles || Object.keys(scrapers.actors); const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
const entitySlugs = sources.flat(); const entitySlugs = sources.flat();
const [entities, existingActorEntries] = await Promise.all([ const [entities, existingActorEntries] = await Promise.all([

View File

@ -72,7 +72,7 @@ const { argv } = yargs
.option('actor-sources', { .option('actor-sources', {
describe: 'Use these scrapers for actor data', describe: 'Use these scrapers for actor data',
type: 'array', type: 'array',
alias: ['actor-source', 'profile-sources', 'profile-source', 'source'], alias: ['actor-source', 'profile-sources', 'profile-source', 'source', 'sources'],
}) })
.option('movie-scenes', { .option('movie-scenes', {
describe: 'Fetch all scenes for a movie', describe: 'Fetch all scenes for a movie',

View File

@ -17,13 +17,21 @@ function matchChannel(release, channel) {
serieNames.superprivate = serieNames.superprivatex; serieNames.superprivate = serieNames.superprivatex;
serieNames.nasst = serieNames.nassty; serieNames.nasst = serieNames.nassty;
serieNames.sexlesson = serieNames.sexlessons; serieNames.sexlesson = serieNames.sexlessons;
serieNames['sex lesson'] = serieNames.sexlessons;
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0]; // ensure longest key matches first
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
const serie = serieName && serieNames[slugify(serieName, '')]; const serie = serieName && serieNames[slugify(serieName, '')];
return serie?.slug || null; if (serie) {
// title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})[\\s:-]*`, 'i'), ''), return {
slug: serie.slug,
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-:/]+\\s*`, 'ig'), ''),
};
}
return null;
} }
function scrapeAll(scenes, channel) { function scrapeAll(scenes, channel) {
@ -41,9 +49,10 @@ function scrapeAll(scenes, channel) {
referer: channel.url, referer: channel.url,
}; };
release.channel = matchChannel(release, channel); return {
...release,
return release; ...matchChannel(release, channel),
};
}); });
} }
@ -100,9 +109,32 @@ async function scrapeScene({ query }, url, channel, include) {
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]')); release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
} }
release.channel = matchChannel(release, channel); return {
...release,
...matchChannel(release, channel),
};
}
return release; function scrapeProfile({ query }, url) {
const profile = {};
const bio = query.cnts('div p').reduce((acc, item) => {
const [key, value] = item.split(/\s*:\s*/);
return {
...acc,
[slugify(key, '_')]: value.trim(),
};
}, {});
profile.avatar = {
src: query.img('.model-page'),
referer: url,
};
console.log(bio);
console.log(profile);
return profile;
} }
async function fetchLatest(channel) { async function fetchLatest(channel) {
@ -128,7 +160,44 @@ async function fetchScene(url, channel, baseRelease, include) {
return res.status; return res.status;
} }
async function getActorUrl(baseActor) {
if (baseActor.url) {
return baseActor.url;
}
const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models');
if (!overviewRes.ok) {
return overviewRes.status;
}
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
if (!actorItem) {
return null;
}
return actorItem.query.url('a');
}
async function fetchProfile(baseActor, entity) {
const actorUrl = await getActorUrl(baseActor);
if (!actorUrl) {
return null;
}
const actorRes = await qu.get(actorUrl, '#main-content');
if (actorRes.ok) {
return scrapeProfile(actorRes.item, actorUrl, entity);
}
return actorRes.status;
}
module.exports = { module.exports = {
fetchLatest, fetchLatest,
fetchScene, fetchScene,
fetchProfile,
}; };

View File

@ -223,6 +223,7 @@ const scrapers = {
killergram, killergram,
kink, kink,
legalporno, legalporno,
littlecapricedreams,
men, men,
metrohd, metrohd,
milehighmedia, milehighmedia,