Added media to LegalPorno scraper.
This commit is contained in:
parent
5b7880a37d
commit
a4d936523b
|
@ -1,4 +1,5 @@
|
|||
$primary: #ff886c;
|
||||
/* $primary: #ff886c; */
|
||||
$primary: #ff6c88;
|
||||
|
||||
$text: #222;
|
||||
$text-contrast: #fff;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
/* $primary: #ff886c; */
|
||||
.filters-bar[data-v-5533e378] {
|
||||
display: block;
|
||||
background: rgba(0, 0, 0, 0.1);
|
||||
|
@ -27,7 +28,7 @@
|
|||
}
|
||||
.filters .toggle.active[data-v-5533e378] {
|
||||
color: #fff;
|
||||
background: #ff886c;
|
||||
background: #ff6c88;
|
||||
}
|
||||
.filter[data-v-5533e378] {
|
||||
display: inline-block;
|
||||
|
@ -142,6 +143,7 @@
|
|||
width: 300px;
|
||||
}
|
||||
|
||||
/* $primary: #ff886c; */
|
||||
.banner[data-v-2bc41e74] {
|
||||
background: #222;
|
||||
white-space: nowrap;
|
||||
|
@ -182,6 +184,7 @@
|
|||
width: .6rem;
|
||||
}
|
||||
|
||||
/* $primary: #ff886c; */
|
||||
.noselect {
|
||||
user-select: none;
|
||||
-webkit-user-select: none;
|
||||
|
@ -206,12 +209,13 @@ body {
|
|||
display: inline-block; }
|
||||
|
||||
.heading {
|
||||
color: #ff886c;
|
||||
color: #ff6c88;
|
||||
margin: 0 0 1rem 0; }
|
||||
|
||||
/* $primary: #ff886c; */
|
||||
.header[data-v-10b7ec04] {
|
||||
color: #fff;
|
||||
background: #ff886c;
|
||||
background: #ff6c88;
|
||||
padding: 1rem;
|
||||
}
|
||||
.logo-link[data-v-10b7ec04] {
|
||||
|
@ -241,6 +245,7 @@ body {
|
|||
overflow-y: auto;
|
||||
}
|
||||
|
||||
/* $primary: #ff886c; */
|
||||
.icon {
|
||||
fill: #222;
|
||||
display: inline-block;
|
||||
|
|
|
@ -142,10 +142,11 @@ async function storePhotos(release, releaseEntry) {
|
|||
async function storePoster(release, releaseEntry) {
|
||||
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||
|
||||
const { pathname } = new URL(release.poster);
|
||||
const mimetype = mime.getType(pathname);
|
||||
|
||||
const res = await bhttp.get(release.poster);
|
||||
|
||||
const { pathname } = new URL(release.poster);
|
||||
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
|
||||
|
||||
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`);
|
||||
await fs.writeFile(path.join(config.photoPath, filepath), res.body);
|
||||
|
||||
|
|
|
@ -15,6 +15,28 @@ function extractTitle(originalTitle) {
|
|||
return { shootId, title };
|
||||
}
|
||||
|
||||
function getPoster(posterElement, sceneId) {
|
||||
const posterStyle = posterElement.attr('style');
|
||||
|
||||
if (posterStyle) {
|
||||
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
}
|
||||
|
||||
const posterRange = posterElement.attr('data-casting');
|
||||
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||
|
||||
if (typeof posterTimeRange === 'number') {
|
||||
// poster time is already a single time value
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||
}
|
||||
|
||||
const [max, min] = posterTimeRange.split('-');
|
||||
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||
|
||||
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||
}
|
||||
|
||||
function scrapeLatest(html, site) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const scenesElements = $('.thumbnails > div').toArray();
|
||||
|
@ -29,19 +51,27 @@ function scrapeLatest(html, site) {
|
|||
|
||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||
|
||||
const sceneId = $(element).attr('data-content');
|
||||
const posterElement = $(element).find('.thumbnail-avatar');
|
||||
|
||||
const poster = getPoster(posterElement, sceneId);
|
||||
|
||||
return {
|
||||
url,
|
||||
shootId,
|
||||
entryId,
|
||||
title,
|
||||
date,
|
||||
poster,
|
||||
site,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapeScene(html, url, site) {
|
||||
async function scrapeScene(html, url, site, useGallery) {
|
||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||
const playerObject = $('script:contains("new VideoPlayer")').html();
|
||||
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
|
||||
|
||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||
const { shootId, title } = extractTitle(originalTitle);
|
||||
|
@ -56,6 +86,15 @@ async function scrapeScene(html, url, site) {
|
|||
|
||||
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||
|
||||
const posterStyle = $('#player').attr('style');
|
||||
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||
|
||||
const photos = useGallery
|
||||
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||
|
||||
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
|
||||
|
||||
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||
const tags = await matchTags(rawTags);
|
||||
|
||||
|
@ -67,6 +106,13 @@ async function scrapeScene(html, url, site) {
|
|||
date,
|
||||
actors,
|
||||
duration,
|
||||
poster,
|
||||
photos,
|
||||
trailer: {
|
||||
src: trailer.src,
|
||||
type: trailer.type,
|
||||
quality: trailer.quality === 'vga' ? 480 : 720,
|
||||
},
|
||||
tags,
|
||||
site,
|
||||
};
|
||||
|
@ -79,9 +125,13 @@ async function fetchLatest(site, page = 1) {
|
|||
}
|
||||
|
||||
async function fetchScene(url, site) {
|
||||
const res = await bhttp.get(url);
|
||||
const useGallery = true;
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site);
|
||||
const res = useGallery
|
||||
? await bhttp.get(`${url}/gallery#gallery`)
|
||||
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||
|
||||
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
Loading…
Reference in New Issue