Added media to LegalPorno scraper.
This commit is contained in:
parent
5b7880a37d
commit
a4d936523b
|
@ -1,4 +1,5 @@
|
||||||
$primary: #ff886c;
|
/* $primary: #ff886c; */
|
||||||
|
$primary: #ff6c88;
|
||||||
|
|
||||||
$text: #222;
|
$text: #222;
|
||||||
$text-contrast: #fff;
|
$text-contrast: #fff;
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
/* $primary: #ff886c; */
|
||||||
.filters-bar[data-v-5533e378] {
|
.filters-bar[data-v-5533e378] {
|
||||||
display: block;
|
display: block;
|
||||||
background: rgba(0, 0, 0, 0.1);
|
background: rgba(0, 0, 0, 0.1);
|
||||||
|
@ -27,7 +28,7 @@
|
||||||
}
|
}
|
||||||
.filters .toggle.active[data-v-5533e378] {
|
.filters .toggle.active[data-v-5533e378] {
|
||||||
color: #fff;
|
color: #fff;
|
||||||
background: #ff886c;
|
background: #ff6c88;
|
||||||
}
|
}
|
||||||
.filter[data-v-5533e378] {
|
.filter[data-v-5533e378] {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
|
@ -142,6 +143,7 @@
|
||||||
width: 300px;
|
width: 300px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* $primary: #ff886c; */
|
||||||
.banner[data-v-2bc41e74] {
|
.banner[data-v-2bc41e74] {
|
||||||
background: #222;
|
background: #222;
|
||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
|
@ -182,6 +184,7 @@
|
||||||
width: .6rem;
|
width: .6rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* $primary: #ff886c; */
|
||||||
.noselect {
|
.noselect {
|
||||||
user-select: none;
|
user-select: none;
|
||||||
-webkit-user-select: none;
|
-webkit-user-select: none;
|
||||||
|
@ -206,12 +209,13 @@ body {
|
||||||
display: inline-block; }
|
display: inline-block; }
|
||||||
|
|
||||||
.heading {
|
.heading {
|
||||||
color: #ff886c;
|
color: #ff6c88;
|
||||||
margin: 0 0 1rem 0; }
|
margin: 0 0 1rem 0; }
|
||||||
|
|
||||||
|
/* $primary: #ff886c; */
|
||||||
.header[data-v-10b7ec04] {
|
.header[data-v-10b7ec04] {
|
||||||
color: #fff;
|
color: #fff;
|
||||||
background: #ff886c;
|
background: #ff6c88;
|
||||||
padding: 1rem;
|
padding: 1rem;
|
||||||
}
|
}
|
||||||
.logo-link[data-v-10b7ec04] {
|
.logo-link[data-v-10b7ec04] {
|
||||||
|
@ -241,6 +245,7 @@ body {
|
||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* $primary: #ff886c; */
|
||||||
.icon {
|
.icon {
|
||||||
fill: #222;
|
fill: #222;
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
|
|
|
@ -142,10 +142,11 @@ async function storePhotos(release, releaseEntry) {
|
||||||
async function storePoster(release, releaseEntry) {
|
async function storePoster(release, releaseEntry) {
|
||||||
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
console.log(`Storing poster for (${release.site.name}, ${releaseEntry.id}) "${release.title}"`);
|
||||||
|
|
||||||
const { pathname } = new URL(release.poster);
|
|
||||||
const mimetype = mime.getType(pathname);
|
|
||||||
|
|
||||||
const res = await bhttp.get(release.poster);
|
const res = await bhttp.get(release.poster);
|
||||||
|
|
||||||
|
const { pathname } = new URL(release.poster);
|
||||||
|
const mimetype = res.headers['content-type'] || mime.getType(pathname) || 'image/jpeg';
|
||||||
|
|
||||||
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`);
|
const filepath = path.join(release.site.slug, releaseEntry.id.toString(), `poster.${mime.getExtension(mimetype)}`);
|
||||||
await fs.writeFile(path.join(config.photoPath, filepath), res.body);
|
await fs.writeFile(path.join(config.photoPath, filepath), res.body);
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,28 @@ function extractTitle(originalTitle) {
|
||||||
return { shootId, title };
|
return { shootId, title };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getPoster(posterElement, sceneId) {
|
||||||
|
const posterStyle = posterElement.attr('style');
|
||||||
|
|
||||||
|
if (posterStyle) {
|
||||||
|
return posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const posterRange = posterElement.attr('data-casting');
|
||||||
|
const posterRangeData = posterRange ? JSON.parse(posterRange) : null;
|
||||||
|
const posterTimeRange = posterRangeData[Math.floor(Math.random() * posterRangeData.length)];
|
||||||
|
|
||||||
|
if (typeof posterTimeRange === 'number') {
|
||||||
|
// poster time is already a single time value
|
||||||
|
return `https://legalporno.com/casting/${sceneId}/${posterTimeRange}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [max, min] = posterTimeRange.split('-');
|
||||||
|
const posterTime = Math.floor(Math.random() * (Number(max) - Number(min) + 1) + Number(min));
|
||||||
|
|
||||||
|
return `https://legalporno.com/casting/${sceneId}/${posterTime}`;
|
||||||
|
}
|
||||||
|
|
||||||
function scrapeLatest(html, site) {
|
function scrapeLatest(html, site) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
const scenesElements = $('.thumbnails > div').toArray();
|
const scenesElements = $('.thumbnails > div').toArray();
|
||||||
|
@ -29,19 +51,27 @@ function scrapeLatest(html, site) {
|
||||||
|
|
||||||
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
const date = moment.utc($(element).attr('release'), 'YYYY/MM/DD').toDate();
|
||||||
|
|
||||||
|
const sceneId = $(element).attr('data-content');
|
||||||
|
const posterElement = $(element).find('.thumbnail-avatar');
|
||||||
|
|
||||||
|
const poster = getPoster(posterElement, sceneId);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
shootId,
|
shootId,
|
||||||
entryId,
|
entryId,
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
|
poster,
|
||||||
site,
|
site,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeScene(html, url, site) {
|
async function scrapeScene(html, url, site, useGallery) {
|
||||||
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
const $ = cheerio.load(html, { normalizeWhitespace: true });
|
||||||
|
const playerObject = $('script:contains("new VideoPlayer")').html();
|
||||||
|
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
|
||||||
|
|
||||||
const originalTitle = $('h1.watchpage-title').text().trim();
|
const originalTitle = $('h1.watchpage-title').text().trim();
|
||||||
const { shootId, title } = extractTitle(originalTitle);
|
const { shootId, title } = extractTitle(originalTitle);
|
||||||
|
@ -56,6 +86,15 @@ async function scrapeScene(html, url, site) {
|
||||||
|
|
||||||
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
|
||||||
|
|
||||||
|
const posterStyle = $('#player').attr('style');
|
||||||
|
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
|
||||||
|
|
||||||
|
const photos = useGallery
|
||||||
|
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
|
||||||
|
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
|
||||||
|
|
||||||
|
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
|
||||||
|
|
||||||
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
const rawTags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
|
||||||
const tags = await matchTags(rawTags);
|
const tags = await matchTags(rawTags);
|
||||||
|
|
||||||
|
@ -67,6 +106,13 @@ async function scrapeScene(html, url, site) {
|
||||||
date,
|
date,
|
||||||
actors,
|
actors,
|
||||||
duration,
|
duration,
|
||||||
|
poster,
|
||||||
|
photos,
|
||||||
|
trailer: {
|
||||||
|
src: trailer.src,
|
||||||
|
type: trailer.type,
|
||||||
|
quality: trailer.quality === 'vga' ? 480 : 720,
|
||||||
|
},
|
||||||
tags,
|
tags,
|
||||||
site,
|
site,
|
||||||
};
|
};
|
||||||
|
@ -79,9 +125,13 @@ async function fetchLatest(site, page = 1) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const res = await bhttp.get(url);
|
const useGallery = true;
|
||||||
|
|
||||||
return scrapeScene(res.body.toString(), url, site);
|
const res = useGallery
|
||||||
|
? await bhttp.get(`${url}/gallery#gallery`)
|
||||||
|
: await bhttp.get(`${url}/screenshots#screenshots`);
|
||||||
|
|
||||||
|
return scrapeScene(res.body.toString(), url, site, useGallery);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
Loading…
Reference in New Issue