Added actor photos to Brazzers scene scrape. Added no-video poster to Score. Not flattening actor avatar fallbacks.
This commit is contained in:
parent
6733777f63
commit
6c3cba1b87
10
src/media.js
10
src/media.js
|
@ -93,7 +93,7 @@ async function extractItem(source) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null) {
|
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
|
||||||
if (!source) return null;
|
if (!source) return null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -106,7 +106,7 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
||||||
|
|
||||||
// fallbacks provided
|
// fallbacks provided
|
||||||
return source.reduce(
|
return source.reduce(
|
||||||
(outcome, sourceX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource)),
|
(outcome, sourceX, sourceIndexX) => outcome.catch(async () => fetchItem(sourceX, index, existingItemsBySource, domain, role, attempt, originalSource, sourceIndexX)),
|
||||||
Promise.reject(new Error()),
|
Promise.reject(new Error()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -164,7 +164,11 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
|
||||||
|
|
||||||
if (attempt < 3) {
|
if (attempt < 3) {
|
||||||
await Promise.delay(5000);
|
await Promise.delay(5000);
|
||||||
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1);
|
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (originalSource && sourceIndex < originalSource.length) {
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -308,7 +308,7 @@ function accumulateActors(releases) {
|
||||||
|
|
||||||
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
if (actor.name) acc[actorSlug] = { ...acc[actorSlug], ...actor }; // actor input contains profile info
|
||||||
if (actor.avatar) {
|
if (actor.avatar) {
|
||||||
acc[actorSlug].avatars = acc[actorSlug].avatars.concat(actor.avatar);
|
acc[actorSlug].avatars = acc[actorSlug].avatars.concat([actor.avatar]); // don't flatten fallbacks
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -53,38 +53,38 @@ async function scrapeUniqueReleases(scraper, site, preflight, afterDate = getAft
|
||||||
return accReleases;
|
return accReleases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const latestReleasesWithSite = latestReleases.map(release => ({ ...release, site }));
|
||||||
|
|
||||||
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
const oldestReleaseOnPage = latestReleases.slice(-1)[0].date;
|
||||||
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
|
const duplicateReleaseIds = argv.redownload ? new Set() : await findDuplicateReleaseIds(latestReleases, accReleases);
|
||||||
|
|
||||||
const uniqueReleases = latestReleases
|
const uniqueReleases = latestReleasesWithSite
|
||||||
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
.filter(release => !duplicateReleaseIds.has(String(release.entryId)) // release is already in database
|
||||||
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
&& (argv.last || !release.date || moment(release.date).isAfter(afterDate))); // release is older than specified date limit
|
||||||
|
|
||||||
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
|
logger.verbose(`${site.name}: Scraped page ${page}, ${uniqueReleases.length} unique recent releases`);
|
||||||
|
|
||||||
const uniqueReleasesWithSite = uniqueReleases.map(release => ({ ...release, site }));
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
uniqueReleases.length > 0
|
uniqueReleases.length > 0
|
||||||
// && (oldestReleaseOnPage || page < argv.pages)
|
// && (oldestReleaseOnPage || page < argv.pages)
|
||||||
&& ((oldestReleaseOnPage
|
&& ((oldestReleaseOnPage
|
||||||
? moment(oldestReleaseOnPage).isAfter(afterDate)
|
? moment(oldestReleaseOnPage).isAfter(afterDate)
|
||||||
: accReleases.length + uniqueReleases.length < argv.nullDateLimit)
|
: accReleases.length + uniqueReleases.length <= argv.nullDateLimit)
|
||||||
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
|| (argv.last && accReleases.length + uniqueReleases.length < argv.last))
|
||||||
) {
|
) {
|
||||||
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
// oldest release on page is newer that specified date range, or latest count has not yet been met, fetch next page
|
||||||
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleasesWithSite), page + 1);
|
return scrapeUniqueReleases(scraper, site, preflight, afterDate, accReleases.concat(uniqueReleases), page + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argv.latest && uniqueReleases.length >= argv.latest) {
|
if (argv.last && uniqueReleases.length >= argv.last) {
|
||||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.last);
|
return accReleases.concat(uniqueReleases).slice(0, argv.last);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oldestReleaseOnPage) {
|
if (oldestReleaseOnPage) {
|
||||||
return accReleases.concat(uniqueReleasesWithSite);
|
return accReleases.concat(uniqueReleases);
|
||||||
}
|
}
|
||||||
|
|
||||||
return accReleases.concat(uniqueReleasesWithSite).slice(0, argv.nullDateLimit);
|
return accReleases.concat(uniqueReleases).slice(0, argv.nullDateLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeUpcomingReleases(scraper, site, preflight) {
|
async function scrapeUpcomingReleases(scraper, site, preflight) {
|
||||||
|
|
|
@ -81,9 +81,19 @@ async function scrapeScene(html, url, _site) {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
release.date = moment.utc($('.more-scene-info .scene-date').text(), 'MMMM DD, YYYY').toDate();
|
||||||
release.actors = $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
|
||||||
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
release.duration = Number($('.scene-length[itemprop="duration"]').attr('content').slice(1, -1)) * 60;
|
||||||
|
|
||||||
|
const actorsFromCards = $('.featured-model .card-image a').map((actorIndex, actorElement) => {
|
||||||
|
const avatar = `https:${$(actorElement).find('img').attr('data-src')}`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: $(actorElement).attr('title'),
|
||||||
|
avatar: [avatar.replace('medium.jpg', 'large.jpg'), avatar],
|
||||||
|
};
|
||||||
|
}).toArray();
|
||||||
|
|
||||||
|
release.actors = actorsFromCards || $('.related-model a').map((actorIndex, actorElement) => $(actorElement).text()).toArray();
|
||||||
|
|
||||||
release.likes = Number($('.label-rating .like').text());
|
release.likes = Number($('.label-rating .like').text());
|
||||||
release.dislikes = Number($('.label-rating .dislike').text());
|
release.dislikes = Number($('.label-rating .dislike').text());
|
||||||
|
|
||||||
|
|
|
@ -95,9 +95,11 @@ async function scrapeScene(html, url, site) {
|
||||||
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
const durationEl = qa('value').find(el => /\d{1,3}:\d{2}/.test(el.textContent));
|
||||||
release.duration = ql(durationEl);
|
release.duration = ql(durationEl);
|
||||||
|
|
||||||
release.poster = qp('video') || qi('.flowplayer img'); // _800.jpg is larger than _xl.jpg in landscape
|
release.poster = qp('video') || qi('.flowplayer img') || qi('img'); // _800.jpg is larger than _xl.jpg in landscape
|
||||||
const photosUrl = qu('.stat a[href*=photos]');
|
const photosUrl = qu('.stat a[href*=photos]');
|
||||||
|
|
||||||
|
console.log(release.poster);
|
||||||
|
|
||||||
if (photosUrl) {
|
if (photosUrl) {
|
||||||
release.photos = await fetchPhotos(photosUrl);
|
release.photos = await fetchPhotos(photosUrl);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue