Refactoring deep scrape. Added tag posters.

This commit is contained in:
2020-03-16 04:10:52 +01:00
parent c8ebe7892a
commit 0f09fd53eb
31 changed files with 851 additions and 589 deletions

View File

@@ -90,7 +90,7 @@ async function scrapeProfile({ qu }, site, withScenes) {
const bio = qu.all('.stats li', true).reduce((acc, row) => {
const [key, value] = row.split(':');
return { ...acc, [slugify(key, { delimiter: '_' })]: value.trim() };
return { ...acc, [slugify(key, '_')]: value.trim() };
}, {});
if (bio.height) profile.height = feetInchesToCm(bio.height);
@@ -133,7 +133,7 @@ async function fetchScene(url, site) {
}
async function fetchProfile(actorName, scraperSlug, site, include) {
const actorSlugA = slugify(actorName, { delimiter: '' });
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const resA = await get(`${site.url}/models/${actorSlugA}.html`);

View File

@@ -43,7 +43,7 @@ function scrapeAll(html, site, upcoming) {
const poster = `https:${$(element).find('.card-main-img').attr('data-src')}`;
const photos = $(element).find('.card-overlay .image-under').map((photoIndex, photoElement) => `https:${$(photoElement).attr('data-src')}`).toArray();
const channel = slugify($(element).find('.collection').attr('title'), { delimiter: '' });
const channel = slugify($(element).find('.collection').attr('title'), '');
return acc.concat({
url,

View File

@@ -61,7 +61,7 @@ function scrapeProfile({ q, qa, qtx }) {
const keys = qa('.model-descr_line:not(.model-descr_rait) p.text span', true);
const values = qa('.model-descr_line:not(.model-descr_rait) p.text').map(el => qtx(el));
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
if (bio.height) profile.height = Number(bio.height.match(/\((\d+)cm\)/)[1]);
if (bio.weight) profile.weight = Number(bio.weight.match(/\((\d+)kg\)/)[1]);
@@ -122,7 +122,7 @@ async function fetchScene(url, site, release) {
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName);
const actorSlug2 = slugify(actorName, { delimiter: '' });
const actorSlug2 = slugify(actorName, '');
const [url, url2] = ['cherrypimps', 'wildoncam'].includes(scraperSlug)
? [`https://${scraperSlug}.com/models/${actorSlug}.html`, `https://${scraperSlug}.com/models/${actorSlug2}.html`]

View File

@@ -74,7 +74,7 @@ async function fetchActorReleases(urls) {
async function scrapeProfile(html, _url, actorName) {
const { qu } = ex(html);
const keys = qu.all('.about-title', true).map(key => slugify(key, { delimiter: '_' }));
const keys = qu.all('.about-title', true).map(key => slugify(key, '_'));
const values = qu.all('.about-info').map((el) => {
if (el.children.length > 0) {
return Array.from(el.children, child => child.textContent.trim()).join(', ');

View File

@@ -79,7 +79,7 @@ async function fetchScene(url, site) {
}
async function fetchProfile(actorName, scraperSlug) {
const actorSlug = slugify(actorName, { delimiter: '' });
const actorSlug = slugify(actorName, '');
const url = scraperSlug === 'povperverts'
? `https://povperverts.net/models/${actorSlug}.html`
: `https://${scraperSlug}.com/models/${actorSlug}.html`;

View File

@@ -233,7 +233,7 @@ async function scrapeScene(html, url, site, baseRelease, mobileHtml) {
release.tags = hasTrans ? [...rawTags, 'transsexual'] : rawTags;
const channel = data?.productionCompany?.name || $('.studioLink a, .siteLink a').attr('title')?.trim() || $('.siteNameSpan').text()?.trim().toLowerCase().replace('.com', '');
if (channel) release.channel = slugify(channel, { delimiter: '' });
if (channel) release.channel = slugify(channel, '');
if (videoData.picPreview && new URL(videoData.picPreview).pathname.length > 1) release.poster = videoData.picPreview; // sometimes links to just https://images02-fame.gammacdn.com/

View File

@@ -193,7 +193,7 @@ function scrapeSceneT1({ html, qu }, site, url, baseRelease, channelRegExp) {
if (channel) {
release.channel = {
force: true,
slug: slugify(channel, { delimiter: '' }),
slug: slugify(channel, ''),
};
}
}
@@ -239,7 +239,7 @@ function scrapeProfile({ el, qu }, site) {
return {
...acc,
[slugify(key, { delimiter: '_' })]: value.trim(),
[slugify(key, '_')]: value.trim(),
};
}, {});
@@ -272,7 +272,7 @@ function scrapeProfileT1({ el, qu }, site) {
return {
...acc,
[slugify(key, { delimiter: '_' })]: value.trim(),
[slugify(key, '_')]: value.trim(),
};
}, {});
@@ -308,7 +308,7 @@ function scrapeProfileTour({ el, qu }, site) {
return {
...acc,
[slugify(key, { delimiter: '_' })]: value.trim(),
[slugify(key, '_')]: value.trim(),
};
}, {});
@@ -382,7 +382,7 @@ async function fetchScene(url, site, baseRelease, beforeFetchLatest) {
}
async function fetchProfile(actorName, scraperSlug, site) {
const actorSlugA = slugify(actorName, { delimiter: '' });
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName);
const t1 = site.parameters?.t1 ? 't1/' : '';

View File

@@ -384,8 +384,8 @@ async function fetchMovie(url, site) {
}
async function fetchProfile(actorName) {
const actorSlugA = slugify(actorName, { delimiter: '-' });
const actorSlugB = slugify(actorName, { delimiter: '' });
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;

View File

@@ -98,7 +98,7 @@ function scrapeScene(data, url, _site, networkName) {
}
const siteName = data.collections[0]?.name || data.brand;
release.channel = slugify(siteName, { delimiter: '' });
release.channel = slugify(siteName, '');
release.url = url || `https://www.${networkName || data.brand}.com/scene/${entryId}/`;

View File

@@ -94,7 +94,7 @@ function scrapeProfile({ qu }, _actorName, origin) {
const keys = qu.all('.model-profile h5', true);
const values = qu.all('.model-profile h5 + p', true);
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, { delimiter: '_' })]: values[index] }), {});
const bio = keys.reduce((acc, key, index) => ({ ...acc, [slugify(key, '_')]: values[index] }), {});
profile.age = Number(bio.age);
profile.description = qu.q('.model-bio', true);

View File

@@ -95,7 +95,7 @@ async function scrapeScene(html, url, site) {
release.movie = $('a[data-track="FULL MOVIE"]').attr('href');
const siteElement = $('.content-wrapper .logos-sites a');
if (siteElement) release.channel = slugify(siteElement.text(), { delimiter: '' });
if (siteElement) release.channel = slugify(siteElement.text(), '');
return release;
}
@@ -108,7 +108,7 @@ function scrapeProfile({ html, q, qa, qtx }) {
const trimmedValue = value.trim();
if (trimmedValue.length === 0 || trimmedValue === '-') return acc;
return { ...acc, [slugify(key, { delimiter: '_' })]: trimmedValue };
return { ...acc, [slugify(key, '_')]: trimmedValue };
}, {});
const description = q('.model-facts-long', true);
@@ -176,7 +176,7 @@ async function fetchScene(url, site) {
}
async function fetchProfile(actorName) {
const actorSearchSlug = slugify(actorName, { delimiter: '+' });
const actorSearchSlug = slugify(actorName, '+');
const url = `https://www.private.com/search.php?query=${actorSearchSlug}`;
const modelRes = await geta(url, '.model h3 a');

View File

@@ -155,7 +155,7 @@ async function scrapeProfile(html, actorUrl, withReleases) {
const bio = qa('.stat').reduce((acc, el) => {
const prop = q(el, '.label', true).slice(0, -1);
const key = slugify(prop, { delimiter: '_' });
const key = slugify(prop, '_');
const value = q(el, '.value', true);
return {