forked from DebaucheryLibrarian/traxxx
Refactoring deep scrape. Added tag posters.
This commit is contained in:
@@ -90,7 +90,7 @@ async function scrapeProfile({ qu }, site, withScenes) {
|
||||
|
||||
const bio = qu.all('.stats li', true).reduce((acc, row) => {
|
||||
const [key, value] = row.split(':');
|
||||
return { ...acc, [slugify(key, { delimiter: '_' })]: value.trim() };
|
||||
return { ...acc, [slugify(key, '_')]: value.trim() };
|
||||
}, {});
|
||||
|
||||
if (bio.height) profile.height = feetInchesToCm(bio.height);
|
||||
@@ -133,7 +133,7 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site, include) {
|
||||
const actorSlugA = slugify(actorName, { delimiter: '' });
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const resA = await get(`${site.url}/models/${actorSlugA}.html`);
|
||||
|
||||
@@ -43,7 +43,7 @@ function scrapeAll(html, site, upcoming) {
|
||||
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
|
||||
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
|
||||
|
||||
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
|
||||
const channel = slugify($(element).find('.collection').attr('title'), '');
|
||||
|
||||
return acc.concat({
|
||||
url,
|
||||
|
||||
@@ -61,7 +61,7 @@ function scrapeProfile({ q, qa, qtx }) {
|
||||
|
||||
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
|
||||
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
|
||||
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
|
||||
@@ -122,7 +122,7 @@ async function fetchScene(url, site, release) {
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const actorSlug2 = slugify(actorName, { delimiter: '' });
|
||||
const actorSlug2 = slugify(actorName, '');
|
||||
|
||||
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
|
||||
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]
|
||||
|
||||
@@ -74,7 +74,7 @@ async function fetchActorReleases(urls) {
|
||||
async function scrapeProfile(html, _url, actorName) {
|
||||
const { qu } = ex(html);
|
||||
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
|
||||
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
|
||||
const values = qu.all('.about-info').map((el) => {
|
||||
if (el.children.length > 0) {
|
||||
return Array.from(el.children, child => child.textContent.trim()).join(', ');
|
||||
|
||||
@@ -79,7 +79,7 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug) {
|
||||
const actorSlug = slugify(actorName, { delimiter: '' });
|
||||
const actorSlug = slugify(actorName, '');
|
||||
const url = scraperSlug === 'povperverts'
|
||||
? `https://povperverts.net/models/${actorSlug}.html`
|
||||
: `https://${scraperSlug}.com/models/${actorSlug}.html`;
|
||||
|
||||
@@ -233,7 +233,7 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml) {
|
||||
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
|
||||
|
||||
const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim() || $('.siteNameSpan').text()?.trim().toLowerCase().replace('.com', '');
|
||||
if (channel) release.channel = slugify(channel, { delimiter: '' });
|
||||
if (channel) release.channel = slugify(channel, '');
|
||||
|
||||
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/
|
||||
|
||||
|
||||
@@ -193,7 +193,7 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
|
||||
if (channel) {
|
||||
release.channel = {
|
||||
force: true,
|
||||
slug: slugify(channel, { delimiter: '' }),
|
||||
slug: slugify(channel, ''),
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -239,7 +239,7 @@ function scrapeProfile({ el, qu }, site) {
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
@@ -272,7 +272,7 @@ function scrapeProfileT1({ el, qu }, site) {
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
@@ -308,7 +308,7 @@ function scrapeProfileTour({ el, qu }, site) {
|
||||
|
||||
return {
|
||||
...acc,
|
||||
[slugify(key, { delimiter: '_' })]: value.trim(),
|
||||
[slugify(key, '_')]: value.trim(),
|
||||
};
|
||||
}, {});
|
||||
|
||||
@@ -382,7 +382,7 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName, scraperSlug, site) {
|
||||
const actorSlugA = slugify(actorName, { delimiter: '' });
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName);
|
||||
|
||||
const t1 = site.parameters?.t1 ? 't1/' : '';
|
||||
|
||||
@@ -384,8 +384,8 @@ async function fetchMovie(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSlugA = slugify(actorName, { delimiter: '-' });
|
||||
const actorSlugB = slugify(actorName, { delimiter: '' });
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
|
||||
@@ -98,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) {
|
||||
}
|
||||
|
||||
const siteName = data.collections[0]?.name || data.brand;
|
||||
release.channel = slugify(siteName, { delimiter: '' });
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ function scrapeProfile({ qu }, _actorName, origin) {
|
||||
const keys = qu.all('.model-profile h5', true);
|
||||
const values = qu.all('.model-profile h5 + p', true);
|
||||
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
|
||||
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
|
||||
|
||||
profile.age = Number(bio.age);
|
||||
profile.description = qu.q('.model-bio', true);
|
||||
|
||||
@@ -95,7 +95,7 @@ async function scrapeScene(html, url, site) {
|
||||
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
|
||||
|
||||
const siteElement = $('.content-wrapper .logos-sites a');
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), { delimiter: '' });
|
||||
if (siteElement) release.channel = slugify(siteElement.text(), '');
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -108,7 +108,7 @@ function scrapeProfile({ html, q, qa, qtx }) {
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
|
||||
return { ...acc, [slugify(key, { delimiter: '_' })]: trimmedValue };
|
||||
return { ...acc, [slugify(key, '_')]: trimmedValue };
|
||||
}, {});
|
||||
|
||||
const description = q('.model-facts-long', true);
|
||||
@@ -176,7 +176,7 @@ async function fetchScene(url, site) {
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
const actorSearchSlug = slugify(actorName, { delimiter: '+' });
|
||||
const actorSearchSlug = slugify(actorName, '+');
|
||||
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
|
||||
const modelRes = await geta(url, '.model h3 a');
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ async function scrapeProfile(html, actorUrl, withReleases) {
|
||||
|
||||
const bio = qa('.stat').reduce((acc, el) => {
|
||||
const prop = q(el, '.label', true).slice(0, -1);
|
||||
const key = slugify(prop, { delimiter: '_' });
|
||||
const key = slugify(prop, '_');
|
||||
const value = q(el, '.value', true);
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user