Added media to LegalPorno scraper.

This commit is contained in:
ThePendulum 2019-10-29 01:47:16 +01:00
parent 5b7880a37d
commit a4d936523b
4 changed files with 67 additions and 10 deletions

View File

@ -1,4 +1,5 @@
$primary: #ff886c;
/* $primary: #ff886c; */
$primary: #ff6c88;
$text: #222;
$text-contrast: #fff;

View File

@ -1,3 +1,4 @@
/* $primary: #ff886c; */
.filters-bar[data-v-5533e378] {
display: block;
background: rgba(0, 0, 0, 0.1);
@ -27,7 +28,7 @@
}
.filters .toggle.active[data-v-5533e378] {
color: #fff;
background: #ff886c;
background: #ff6c88;
}
.filter[data-v-5533e378] {
display: inline-block;
@ -142,6 +143,7 @@
width: 300px;
}
/* $primary: #ff886c; */
.banner[data-v-2bc41e74] {
background: #222;
white-space: nowrap;
@ -182,6 +184,7 @@
width: .6rem;
}
/* $primary: #ff886c; */
.noselect {
user-select: none;
-webkit-user-select: none;
@ -206,12 +209,13 @@ body {
display: inline-block; }
.heading {
color: #ff886c;
color: #ff6c88;
margin: 0 0 1rem 0; }
/* $primary: #ff886c; */
.header[data-v-10b7ec04] {
color: #fff;
background: #ff886c;
background: #ff6c88;
padding: 1rem;
}
.logo-link[data-v-10b7ec04] {
@ -241,6 +245,7 @@ body {
overflow-y: auto;
}
/* $primary: #ff886c; */
.icon {
fill: #222;
display: inline-block;

View File

@ -142,10 +142,11 @@ async function storePhotos(release, releaseEntry) {
async function storePoster(release, releaseEntry) {
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
const { pathname } = new URL(release.poster);
const mimetype = mime.getType(pathname);
const res = await bhttp.get(release.poster);
const { pathname } = new URL(release.poster);
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`);
await fs.writeFile(path.join(config.photoPath, filepath), res.body);

View File

@ -15,6 +15,28 @@ function extractTitle(originalTitle) {
return { shootId, title };
}
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
}
function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray();
@ -29,19 +51,27 @@ function scrapeLatest(html, site) {
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
return {
url,
shootId,
entryId,
title,
date,
poster,
site,
};
});
}
async function scrapeScene(html, url, site) {
async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new VideoPlayer")').html();
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
@ -56,6 +86,15 @@ async function scrapeScene(html, url, site) {
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = await matchTags(rawTags);
@ -67,6 +106,13 @@ async function scrapeScene(html, url, site) {
date,
actors,
duration,
poster,
photos,
trailer: {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
},
tags,
site,
};
@ -79,9 +125,13 @@ async function fetchLatest(site, page = 1) {
}
async function fetchScene(url, site) {
const res = await bhttp.get(url);
const useGallery = true;
return scrapeScene(res.body.toString(), url, site);
const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
return scrapeScene(res.body.toString(), url, site, useGallery);
}
module.exports = {