Added actor photos to Brazzers scene scrape. Added no-video poster to Score. Not flattening actor avatar fallbacks.

This commit is contained in:
ThePendulum 2020-03-04 17:21:40 +01:00
parent 6733777f63
commit 6c3cba1b87
5 changed files with 31 additions and 15 deletions

View File

@ -93,7 +93,7 @@ async function extractItem(source) {
return null;
}
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) {
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
if (!source) return null;
try {
@ -106,7 +106,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
// fallbacks provided
return source.reduce(
(outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)),
(outcome, sourceX, sourceIndexX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource, sourceIndexX)),
Promise.reject(new Error()),
);
}
@ -164,7 +164,11 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
if (attempt < 3) {
await Promise.delay(5000);
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1);
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource);
}
if (originalSource && sourceIndex < originalSource.length) {
throw error;
}
return null;

View File

@ -308,7 +308,7 @@ function accumulateActors(releases) {
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
if (actor.avatar) {
acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar);
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([actor.avatar]); // don't flatten fallbacks
}
});

View File

@ -53,38 +53,38 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft
return accReleases;
}
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site }));
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
const uniqueReleases = latestReleases
const uniqueReleases = latestReleasesWithSite
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
if (
uniqueReleases.length > 0
// && (oldestReleaseOnPage || page < argv.pages)
&& ((oldestReleaseOnPage
? moment(oldestReleaseOnPage).isAfter(afterDate)
: accReleases.length + uniqueReleases.length < argv.nullDateLimit)
: accReleases.length + uniqueReleases.length <= argv.nullDateLimit)
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
) {
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleases), page + 1);
}
if (argv.latest && uniqueReleases.length >= argv.latest) {
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last);
if (argv.last && uniqueReleases.length >= argv.last) {
return accReleases.concat(uniqueReleases).slice(0, argv.last);
}
if (oldestReleaseOnPage) {
return accReleases.concat(uniqueReleasesWithSite);
return accReleases.concat(uniqueReleases);
}
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit);
return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit);
}
async function scrapeUpcomingReleases(scraper, site, preflight) {

View File

@ -81,9 +81,19 @@ async function scrapeScene(html, url, _site) {
.trim();
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
return {
name: $(actorElement).attr('title'),
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
};
}).toArray();
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.likes = Number($('.label-rating .like').text());
release.dislikes = Number($('.label-rating .dislike').text());

View File

@ -95,9 +95,11 @@ async function scrapeScene(html, url, site) {
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
release.duration = ql(durationEl);
release.poster = qp('video') || qi('.flowplayer img'); // _800.jpg is larger than _xl.jpg in landscape
release.poster = qp('video') || qi('.flowplayer img') || qi('img'); // _800.jpg is larger than _xl.jpg in landscape
const photosUrl = qu('.stat a[href*=photos]');
console.log(release.poster);
if (photosUrl) {
release.photos = await fetchPhotos(photosUrl);
} else {