Improved Little Caprice Dreams scraper.
This commit is contained in:
parent
9a183c7ffb
commit
71c884fe48
|
@ -166,6 +166,7 @@ module.exports = {
|
||||||
'hitzefrei',
|
'hitzefrei',
|
||||||
'porncz',
|
'porncz',
|
||||||
'czechav',
|
'czechav',
|
||||||
|
'littlecapricedreams',
|
||||||
'gangbangcreampie',
|
'gangbangcreampie',
|
||||||
'gloryholesecrets',
|
'gloryholesecrets',
|
||||||
'aziani',
|
'aziani',
|
||||||
|
|
|
@ -661,7 +661,7 @@ async function scrapeActors(argNames) {
|
||||||
|
|
||||||
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
logger.info(`Scraping profiles for ${actorNames.length} actors`);
|
||||||
|
|
||||||
const sources = argv.actorsSources || config.profiles || Object.keys(scrapers.actors);
|
const sources = argv.profileSources || config.profiles || Object.keys(scrapers.actors);
|
||||||
const entitySlugs = sources.flat();
|
const entitySlugs = sources.flat();
|
||||||
|
|
||||||
const [entities, existingActorEntries] = await Promise.all([
|
const [entities, existingActorEntries] = await Promise.all([
|
||||||
|
|
|
@ -72,7 +72,7 @@ const { argv } = yargs
|
||||||
.option('actor-sources', {
|
.option('actor-sources', {
|
||||||
describe: 'Use these scrapers for actor data',
|
describe: 'Use these scrapers for actor data',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
alias: ['actor-source', 'profile-sources', 'profile-source', 'source'],
|
alias: ['actor-source', 'profile-sources', 'profile-source', 'source', 'sources'],
|
||||||
})
|
})
|
||||||
.option('movie-scenes', {
|
.option('movie-scenes', {
|
||||||
describe: 'Fetch all scenes for a movie',
|
describe: 'Fetch all scenes for a movie',
|
||||||
|
|
|
@ -17,13 +17,21 @@ function matchChannel(release, channel) {
|
||||||
serieNames.superprivate = serieNames.superprivatex;
|
serieNames.superprivate = serieNames.superprivatex;
|
||||||
serieNames.nasst = serieNames.nassty;
|
serieNames.nasst = serieNames.nassty;
|
||||||
serieNames.sexlesson = serieNames.sexlessons;
|
serieNames.sexlesson = serieNames.sexlessons;
|
||||||
serieNames['sex lesson'] = serieNames.sexlessons;
|
|
||||||
|
|
||||||
const serieName = release.title.match(new RegExp(Object.keys(serieNames).join('|'), 'i'))?.[0];
|
// ensure longest key matches first
|
||||||
|
const serieKeys = Object.keys(serieNames).sort((nameA, nameB) => nameB.length - nameA.length);
|
||||||
|
|
||||||
|
const serieName = release.title.match(new RegExp(serieKeys.join('|'), 'i'))?.[0];
|
||||||
const serie = serieName && serieNames[slugify(serieName, '')];
|
const serie = serieName && serieNames[slugify(serieName, '')];
|
||||||
|
|
||||||
return serie?.slug || null;
|
if (serie) {
|
||||||
// title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})[\\s:–-]*`, 'i'), ''),
|
return {
|
||||||
|
slug: serie.slug,
|
||||||
|
title: release.title.replace(new RegExp(`(${serieName}|${serie.name}|${serie.slug})\\s*[-–:/]+\\s*`, 'ig'), ''),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeAll(scenes, channel) {
|
function scrapeAll(scenes, channel) {
|
||||||
|
@ -41,9 +49,10 @@ function scrapeAll(scenes, channel) {
|
||||||
referer: channel.url,
|
referer: channel.url,
|
||||||
};
|
};
|
||||||
|
|
||||||
release.channel = matchChannel(release, channel);
|
return {
|
||||||
|
...release,
|
||||||
return release;
|
...matchChannel(release, channel),
|
||||||
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,9 +109,32 @@ async function scrapeScene({ query }, url, channel, include) {
|
||||||
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
release.photos = await fetchPhotos(query.url('.vid_buttons a[href*="project/"]'));
|
||||||
}
|
}
|
||||||
|
|
||||||
release.channel = matchChannel(release, channel);
|
return {
|
||||||
|
...release,
|
||||||
|
...matchChannel(release, channel),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return release;
|
function scrapeProfile({ query }, url) {
|
||||||
|
const profile = {};
|
||||||
|
|
||||||
|
const bio = query.cnts('div p').reduce((acc, item) => {
|
||||||
|
const [key, value] = item.split(/\s*:\s*/);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[slugify(key, '_')]: value.trim(),
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
profile.avatar = {
|
||||||
|
src: query.img('.model-page'),
|
||||||
|
referer: url,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(bio);
|
||||||
|
console.log(profile);
|
||||||
|
return profile;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(channel) {
|
async function fetchLatest(channel) {
|
||||||
|
@ -128,7 +160,44 @@ async function fetchScene(url, channel, baseRelease, include) {
|
||||||
return res.status;
|
return res.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getActorUrl(baseActor) {
|
||||||
|
if (baseActor.url) {
|
||||||
|
return baseActor.url;
|
||||||
|
}
|
||||||
|
|
||||||
|
const overviewRes = await qu.getAll('https://www.littlecaprice-dreams.com/pornstars', '.models');
|
||||||
|
|
||||||
|
if (!overviewRes.ok) {
|
||||||
|
return overviewRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorItem = overviewRes.items.find(({ query }) => slugify(query.q('img', 'title')) === baseActor.slug);
|
||||||
|
|
||||||
|
if (!actorItem) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorItem.query.url('a');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchProfile(baseActor, entity) {
|
||||||
|
const actorUrl = await getActorUrl(baseActor);
|
||||||
|
|
||||||
|
if (!actorUrl) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actorRes = await qu.get(actorUrl, '#main-content');
|
||||||
|
|
||||||
|
if (actorRes.ok) {
|
||||||
|
return scrapeProfile(actorRes.item, actorUrl, entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
return actorRes.status;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
fetchProfile,
|
||||||
};
|
};
|
||||||
|
|
|
@ -223,6 +223,7 @@ const scrapers = {
|
||||||
killergram,
|
killergram,
|
||||||
kink,
|
kink,
|
||||||
legalporno,
|
legalporno,
|
||||||
|
littlecapricedreams,
|
||||||
men,
|
men,
|
||||||
metrohd,
|
metrohd,
|
||||||
milehighmedia,
|
milehighmedia,
|
||||||
|
|
Loading…
Reference in New Issue