Added media to LegalPorno scraper.

This commit is contained in:
ThePendulum 2019-10-29 01:47:16 +01:00
parent 5b7880a37d
commit a4d936523b
4 changed files with 67 additions and 10 deletions

View File

@ -1,4 +1,5 @@
$primary: #ff886c; /* $primary: #ff886c; */
$primary: #ff6c88;
$text: #222; $text: #222;
$text-contrast: #fff; $text-contrast: #fff;

View File

@ -1,3 +1,4 @@
/* $primary: #ff886c; */
.filters-bar[data-v-5533e378] { .filters-bar[data-v-5533e378] {
display: block; display: block;
background: rgba(0, 0, 0, 0.1); background: rgba(0, 0, 0, 0.1);
@ -27,7 +28,7 @@
} }
.filters .toggle.active[data-v-5533e378] { .filters .toggle.active[data-v-5533e378] {
color: #fff; color: #fff;
background: #ff886c; background: #ff6c88;
} }
.filter[data-v-5533e378] { .filter[data-v-5533e378] {
display: inline-block; display: inline-block;
@ -142,6 +143,7 @@
width: 300px; width: 300px;
} }
/* $primary: #ff886c; */
.banner[data-v-2bc41e74] { .banner[data-v-2bc41e74] {
background: #222; background: #222;
white-space: nowrap; white-space: nowrap;
@ -182,6 +184,7 @@
width: .6rem; width: .6rem;
} }
/* $primary: #ff886c; */
.noselect { .noselect {
user-select: none; user-select: none;
-webkit-user-select: none; -webkit-user-select: none;
@ -206,12 +209,13 @@ body {
display: inline-block; } display: inline-block; }
.heading { .heading {
color: #ff886c; color: #ff6c88;
margin: 0 0 1rem 0; } margin: 0 0 1rem 0; }
/* $primary: #ff886c; */
.header[data-v-10b7ec04] { .header[data-v-10b7ec04] {
color: #fff; color: #fff;
background: #ff886c; background: #ff6c88;
padding: 1rem; padding: 1rem;
} }
.logo-link[data-v-10b7ec04] { .logo-link[data-v-10b7ec04] {
@ -241,6 +245,7 @@ body {
overflow-y: auto; overflow-y: auto;
} }
/* $primary: #ff886c; */
.icon { .icon {
fill: #222; fill: #222;
display: inline-block; display: inline-block;

View File

@ -142,10 +142,11 @@ async function storePhotos(release, releaseEntry) {
async function storePoster(release, releaseEntry) { async function storePoster(release, releaseEntry) {
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`); console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
const { pathname } = new URL(release.poster);
const mimetype = mime.getType(pathname);
const res = await bhttp.get(release.poster); const res = await bhttp.get(release.poster);
const { pathname } = new URL(release.poster);
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`); const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`);
await fs.writeFile(path.join(config.photoPath, filepath), res.body); await fs.writeFile(path.join(config.photoPath, filepath), res.body);

View File

@ -15,6 +15,28 @@ function extractTitle(originalTitle) {
return { shootId, title }; return { shootId, title };
} }
function getPoster(posterElement, sceneId) {
const posterStyle = posterElement.attr('style');
if (posterStyle) {
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
}
const posterRange = posterElement.attr('data-casting');
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
if (typeof posterTimeRange === 'number') {
// poster time is already a single time value
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
}
const [max, min] = posterTimeRange.split('-');
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
}
function scrapeLatest(html, site) { function scrapeLatest(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('.thumbnails > div').toArray(); const scenesElements = $('.thumbnails > div').toArray();
@ -29,19 +51,27 @@ function scrapeLatest(html, site) {
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate(); const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
const sceneId = $(element).attr('data-content');
const posterElement = $(element).find('.thumbnail-avatar');
const poster = getPoster(posterElement, sceneId);
return { return {
url, url,
shootId, shootId,
entryId, entryId,
title, title,
date, date,
poster,
site, site,
}; };
}); });
} }
async function scrapeScene(html, url, site) { async function scrapeScene(html, url, site, useGallery) {
const $ = cheerio.load(html, { normalizeWhitespace: true }); const $ = cheerio.load(html, { normalizeWhitespace: true });
const playerObject = $('script:contains("new VideoPlayer")').html();
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
const originalTitle = $('h1.watchpage-title').text().trim(); const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle); const { shootId, title } = extractTitle(originalTitle);
@ -56,6 +86,15 @@ async function scrapeScene(html, url, site) {
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds(); const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray(); const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const tags = await matchTags(rawTags); const tags = await matchTags(rawTags);
@ -67,6 +106,13 @@ async function scrapeScene(html, url, site) {
date, date,
actors, actors,
duration, duration,
poster,
photos,
trailer: {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
},
tags, tags,
site, site,
}; };
@ -79,9 +125,13 @@ async function fetchLatest(site, page = 1) {
} }
async function fetchScene(url, site) { async function fetchScene(url, site) {
const res = await bhttp.get(url); const useGallery = true;
return scrapeScene(res.body.toString(), url, site); const res = useGallery
? await bhttp.get(`${url}/gallery#gallery`)
: await bhttp.get(`${url}/screenshots#screenshots`);
return scrapeScene(res.body.toString(), url, site, useGallery);
} }
module.exports = { module.exports = {