Added actor photos to Brazzers scene scrape. Added no-video poster to Score. Not flattening actor avatar fallbacks.
This commit is contained in:
parent
6733777f63
commit
6c3cba1b87
10
src/media.js
10
src/media.js
|
@ -93,7 +93,7 @@ async function extractItem(source) {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) {
|
||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
||||
if (!source) return null;
|
||||
|
||||
try {
|
||||
|
@ -106,7 +106,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
|||
|
||||
// fallbacks provided
|
||||
return source.reduce(
|
||||
(outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)),
|
||||
(outcome, sourceX, sourceIndexX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource, sourceIndexX)),
|
||||
Promise.reject(new Error()),
|
||||
);
|
||||
}
|
||||
|
@ -164,7 +164,11 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
|||
|
||||
if (attempt < 3) {
|
||||
await Promise.delay(5000);
|
||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1);
|
||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource);
|
||||
}
|
||||
|
||||
if (originalSource && sourceIndex < originalSource.length) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
@ -308,7 +308,7 @@ function accumulateActors(releases) {
|
|||
|
||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||
if (actor.avatar) {
|
||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar);
|
||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([actor.avatar]); // don't flatten fallbacks
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -53,38 +53,38 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft
|
|||
return accReleases;
|
||||
}
|
||||
|
||||
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site }));
|
||||
|
||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
|
||||
|
||||
const uniqueReleases = latestReleases
|
||||
const uniqueReleases = latestReleasesWithSite
|
||||
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
||||
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
||||
|
||||
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
|
||||
|
||||
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
|
||||
|
||||
if (
|
||||
uniqueReleases.length > 0
|
||||
// && (oldestReleaseOnPage || page < argv.pages)
|
||||
&& ((oldestReleaseOnPage
|
||||
? moment(oldestReleaseOnPage).isAfter(afterDate)
|
||||
: accReleases.length + uniqueReleases.length < argv.nullDateLimit)
|
||||
: accReleases.length + uniqueReleases.length <= argv.nullDateLimit)
|
||||
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
||||
) {
|
||||
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
||||
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
|
||||
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||
}
|
||||
|
||||
if (argv.latest && uniqueReleases.length >= argv.latest) {
|
||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last);
|
||||
if (argv.last && uniqueReleases.length >= argv.last) {
|
||||
return accReleases.concat(uniqueReleases).slice(0, argv.last);
|
||||
}
|
||||
|
||||
if (oldestReleaseOnPage) {
|
||||
return accReleases.concat(uniqueReleasesWithSite);
|
||||
return accReleases.concat(uniqueReleases);
|
||||
}
|
||||
|
||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit);
|
||||
return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit);
|
||||
}
|
||||
|
||||
async function scrapeUpcomingReleases(scraper, site, preflight) {
|
||||
|
|
|
@ -81,9 +81,19 @@ async function scrapeScene(html, url, _site) {
|
|||
.trim();
|
||||
|
||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||
|
||||
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||
|
||||
return {
|
||||
name: $(actorElement).attr('title'),
|
||||
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||
};
|
||||
}).toArray();
|
||||
|
||||
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||
|
||||
release.likes = Number($('.label-rating .like').text());
|
||||
release.dislikes = Number($('.label-rating .dislike').text());
|
||||
|
||||
|
|
|
@ -95,9 +95,11 @@ async function scrapeScene(html, url, site) {
|
|||
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||
release.duration = ql(durationEl);
|
||||
|
||||
release.poster = qp('video') || qi('.flowplayer img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
release.poster = qp('video') || qi('.flowplayer img') || qi('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||
const photosUrl = qu('.stat a[href*=photos]');
|
||||
|
||||
console.log(release.poster);
|
||||
|
||||
if (photosUrl) {
|
||||
release.photos = await fetchPhotos(photosUrl);
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue