Enabled pagination on network page.
This commit is contained in:
@@ -16,7 +16,7 @@ function scrape(html, site) {
|
||||
return sceneElements.map((element) => {
|
||||
const sceneLinkElement = $(element).find('.thmb_lnk');
|
||||
const title = sceneLinkElement.attr('title');
|
||||
const url = site.legacy
|
||||
const url = site.parameters?.legacy
|
||||
? `https://${site.url}{sceneLinkElement.attr('href')}`
|
||||
: `https://bangbros.com${sceneLinkElement.attr('href')}`;
|
||||
const shootId = sceneLinkElement.attr('id') && sceneLinkElement.attr('id').split('-')[1];
|
||||
@@ -150,14 +150,14 @@ function scrapeSceneLegacy({ qu }, url) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function scrapeProfile(html) {
|
||||
function scrapeProfile(html, scope) {
|
||||
const { q } = ex(html);
|
||||
const profile = {};
|
||||
|
||||
const avatar = q('.profilePic img', 'src');
|
||||
if (avatar) profile.avatar = `https:${avatar}`;
|
||||
|
||||
profile.releases = scrape(html);
|
||||
profile.releases = scrape(html, scope.network);
|
||||
|
||||
return profile;
|
||||
}
|
||||
@@ -221,7 +221,7 @@ async function fetchScene(url, site, release) {
|
||||
return scrapeScene(res.item.html, url, site);
|
||||
}
|
||||
|
||||
async function fetchProfile(actorName) {
|
||||
async function fetchProfile(actorName, scope) {
|
||||
const actorSlug = slugify(actorName);
|
||||
const url = `https://bangbros.com/search/${actorSlug}`;
|
||||
const res = await bhttp.get(url);
|
||||
@@ -233,7 +233,7 @@ async function fetchProfile(actorName) {
|
||||
const actorRes = await bhttp.get(actorUrl);
|
||||
|
||||
if (actorRes.statusCode === 200) {
|
||||
return scrapeProfile(actorRes.body.toString());
|
||||
return scrapeProfile(actorRes.body.toString(), scope);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ async function scrapeScene(html, url, _site) {
|
||||
const siteElement = $('.niche-site-logo');
|
||||
// const siteUrl = `https://www.brazzers.com${siteElement.attr('href').slice(0, -1)}`;
|
||||
const siteName = siteElement.attr('title');
|
||||
release.channel = siteName.replace(/\s+/g, '').toLowerCase();
|
||||
release.channel = slugify(siteName, '');
|
||||
|
||||
release.tags = $('.tag-card-container a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
release.photos = $('.carousel-thumb a').map((photoIndex, photoElement) => `https:${$(photoElement).attr('href')}`).toArray();
|
||||
|
||||
@@ -59,16 +59,17 @@ function scrapeScene(html, url, site) {
|
||||
const originalUrl = `${protocol}//${hostname}${pathname}`;
|
||||
|
||||
const entryId = originalUrl.split('-').slice(-1)[0];
|
||||
const title = sceneElement.find('h1.scene-title.grey-text').text();
|
||||
const title = sceneElement.find('h1.scene-title').text();
|
||||
const description = sceneElement.find('.synopsis').contents().slice(2).text().replace(/[\s\n]+/g, ' ').trim();
|
||||
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text(), 'MMM D, YYYY').toDate();
|
||||
const actors = $('a.scene-title.grey-text.link').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
const date = moment.utc(sceneElement.find('span.entry-date').text()?.match(/\w+ \d{1,2}, \d{4}/), 'MMM D, YYYY').toDate();
|
||||
const actors = $('.performer-list a, h1 a.scene-title').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
const duration = Number(sceneElement.find('.duration-ratings .duration').text().slice(10, -4)) * 60;
|
||||
|
||||
const poster = `https:${$('video, dl8-video').attr('poster')}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => `https:${$(el).attr('href')}`).toArray();
|
||||
const posterPath = $('video, dl8-video').attr('poster') || $('img.start-card').attr('src');
|
||||
const poster = posterPath && `https:${posterPath}`;
|
||||
const photos = $('.contain-scene-images.desktop-only a').map((index, el) => $(el).attr('href')).toArray().filter(Boolean).map(photo => `https:${photo}`);
|
||||
|
||||
const trailerEl = $('source');
|
||||
const trailerSrc = trailerEl.attr('src');
|
||||
@@ -90,10 +91,10 @@ function scrapeScene(html, url, site) {
|
||||
tags,
|
||||
photos,
|
||||
poster,
|
||||
trailer: {
|
||||
trailer: trailerSrc ? {
|
||||
src: trailerSrc,
|
||||
type: trailerType,
|
||||
},
|
||||
} : null,
|
||||
rating: null,
|
||||
site,
|
||||
channel,
|
||||
|
||||
@@ -25,7 +25,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
shoot_id: release.shootId || null,
|
||||
studio_id: release.studio?.id || null,
|
||||
url: release.url,
|
||||
date: release.date,
|
||||
date: Number(release.date) ? release.date : null,
|
||||
slug,
|
||||
description: release.description,
|
||||
duration: release.duration,
|
||||
@@ -47,7 +47,7 @@ function curateReleaseEntry(release, batchId, existingRelease) {
|
||||
}
|
||||
|
||||
async function attachChannelSites(releases) {
|
||||
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork));
|
||||
const releasesWithoutSite = releases.filter(release => release.channel && (!release.site || release.site.isNetwork || release.site.slug !== release.channel));
|
||||
|
||||
const channelSites = await knex('sites')
|
||||
.leftJoin('networks', 'networks.id', 'sites.network_id')
|
||||
@@ -58,10 +58,6 @@ async function attachChannelSites(releases) {
|
||||
|
||||
const releasesWithChannelSite = await Promise.all(releases
|
||||
.map(async (release) => {
|
||||
if (release.site && !release.site.isNetwork) {
|
||||
return release;
|
||||
}
|
||||
|
||||
if (release.channel && channelSitesBySlug[release.channel]) {
|
||||
const curatedSite = await curateSite(channelSitesBySlug[release.channel]);
|
||||
|
||||
@@ -71,6 +67,11 @@ async function attachChannelSites(releases) {
|
||||
};
|
||||
}
|
||||
|
||||
if (release.site && !release.site.isNetwork) {
|
||||
return release;
|
||||
}
|
||||
|
||||
|
||||
if (release.site && release.site.isNetwork) {
|
||||
return {
|
||||
...release,
|
||||
@@ -129,6 +130,10 @@ function attachReleaseIds(releases, storedReleases) {
|
||||
|
||||
function filterInternalDuplicateReleases(releases) {
|
||||
const releasesBySiteIdAndEntryId = releases.reduce((acc, release) => {
|
||||
if (!release.site) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
if (!acc[release.site.id]) {
|
||||
acc[release.site.id] = {};
|
||||
}
|
||||
@@ -221,7 +226,7 @@ async function storeReleases(releases) {
|
||||
|
||||
const curatedNewReleaseEntries = uniqueReleases.map(release => curateReleaseEntry(release, batchId));
|
||||
|
||||
const storedReleases = await knex('releases').insert(curatedNewReleaseEntries).returning('*');
|
||||
const storedReleases = await knex.batchInsert('releases', curatedNewReleaseEntries).returning('*');
|
||||
// TODO: update duplicate releases
|
||||
|
||||
const storedReleaseEntries = Array.isArray(storedReleases) ? storedReleases : [];
|
||||
|
||||
@@ -25,7 +25,7 @@ async function actorPosters(actorNames) {
|
||||
const source = path.join(config.media.path, poster.path);
|
||||
|
||||
const directory = path.join(config.media.path, 'extracted', poster.actor_name);
|
||||
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_')} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
|
||||
const target = path.join(directory, `${poster.actor_name} - ${poster.network_name}: ${poster.site_name} - ${poster.title.replace(/[/.]/g, '_') || poster.actor_name} (${moment.utc(poster.date).format('YYYY-MM-DD')})-${poster.index}.jpeg`);
|
||||
await fs.mkdir(path.join(directory), { recursive: true });
|
||||
|
||||
const file = await fs.readFile(source);
|
||||
|
||||
@@ -1,14 +1,45 @@
|
||||
'use strict';
|
||||
|
||||
const substitutes = {
|
||||
à: 'a',
|
||||
á: 'a',
|
||||
ä: 'a',
|
||||
å: 'a',
|
||||
æ: 'ae',
|
||||
ç: 'c',
|
||||
è: 'e',
|
||||
é: 'e',
|
||||
ë: 'e',
|
||||
ì: 'i',
|
||||
í: 'i',
|
||||
ï: 'i',
|
||||
ǹ: 'n',
|
||||
ń: 'n',
|
||||
ñ: 'n',
|
||||
ò: 'o',
|
||||
ó: 'o',
|
||||
ö: 'o',
|
||||
ø: 'o',
|
||||
œ: 'oe',
|
||||
ß: 'ss',
|
||||
ù: 'u',
|
||||
ú: 'u',
|
||||
ü: 'u',
|
||||
ỳ: 'y',
|
||||
ý: 'y',
|
||||
ÿ: 'y',
|
||||
};
|
||||
|
||||
function slugify(string, delimiter = '-', {
|
||||
encode = false,
|
||||
removeAccents = true,
|
||||
limit = 1000,
|
||||
} = {}) {
|
||||
if (!string || typeof string !== 'string') {
|
||||
return string;
|
||||
}
|
||||
|
||||
const slugComponents = string.trim().toLowerCase().match(/\w+/g);
|
||||
const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ]+/g);
|
||||
|
||||
if (!slugComponents) {
|
||||
return '';
|
||||
@@ -18,6 +49,12 @@ function slugify(string, delimiter = '-', {
|
||||
const accSlug = `${acc}${index > 0 ? delimiter : ''}${component}`;
|
||||
|
||||
if (accSlug.length < limit) {
|
||||
if (removeAccents) {
|
||||
return accSlug.replace(/[à-ÿ]/g, (match) => {
|
||||
return substitutes[match] || '';
|
||||
});
|
||||
}
|
||||
|
||||
return accSlug;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user