Photo plucker will use discarded photos as fallback. Returning high res photo sources from LegalPorno.

This commit is contained in:
2020-03-10 04:42:15 +01:00
parent 6bfc5e4378
commit db63be8f92
2 changed files with 100 additions and 48 deletions

View File

@@ -76,53 +76,63 @@ async function scrapeScene(html, url, site, useGallery) {
const playerObject = $('script:contains("new VideoPlayer")').html();
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
const release = { url };
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
const actors = $(actorsElement)
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
const description = $('meta[name="description"]')?.attr('content')?.trim() || (descriptionElement && $(descriptionElement).find('dd').text().trim());
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
/* disable thumbnail as fallback, usually enough high res photos available
return [
`${origin}${pathname}`,
source,
];
*/
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
release.trailer = {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
};
const studioName = $('.watchpage-studioname').first().text().trim();
const studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
const tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
return {
url,
shootId,
entryId,
title,
description,
date,
actors,
duration,
poster,
photos,
trailer: {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
},
tags,
site,
studio,
};
return release;
}
async function scrapeProfile(html, _url, actorName) {