Added American Pornstar. Improved Jules Jordan scraper to accomodate for American Pornstar. Changed entity logo mogrify settings to ensure both minimum height and width.

This commit is contained in:
DebaucheryLibrarian
2020-09-05 01:56:54 +02:00
parent 3ddba0816e
commit e90bb63a8f
2508 changed files with 139 additions and 34 deletions

View File

@@ -7,7 +7,7 @@ const cheerio = require('cheerio');
const { JSDOM } = require('jsdom');
const moment = require('moment');
const { get, geta, ctxa, parseDate } = require('../utils/q');
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
const http = require('../utils/http');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@@ -134,16 +134,16 @@ function getEntryId(html) {
return null;
}
function scrapeAll(scenes, site) {
function scrapeAll(scenes, site, entryIdFromTitle) {
return scenes.map(({ el, qu }) => {
const release = {};
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
release.date = qu.date('.update_date', 'MM/DD/YYYY');
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
release.actors = qu.all('.update_models a', true);
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
@@ -153,11 +153,22 @@ function scrapeAll(scenes, site) {
? dvdPhotos
: Array.from({ length: photoCount }).map((value, index) => {
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
const prefixedSrc = prefixUrl(src, site.url);
return src ? {
src: /^http/.test(src) ? src : `${site.url}${src}`,
referer: site.url,
} : null;
if (src) {
return [
{
src: prefixedSrc.replace(/.jpg$/, '-full.jpg'),
referer: site.url,
},
{
src: prefixedSrc,
referer: site.url,
},
];
}
return null;
}).filter(Boolean);
const teaserScript = qu.html('script');
@@ -301,11 +312,11 @@ function scrapeMovie({ el, query }, url, site) {
};
}
function scrapeProfile(html, url, actorName) {
function scrapeProfile(html, url, actorName, entity) {
const { document } = new JSDOM(html).window;
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img');
const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb');
const profile = {
name: actorName,
@@ -336,7 +347,9 @@ function scrapeProfile(html, url, actorName) {
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
].filter(Boolean);
]
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map(avatar => prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
@@ -346,7 +359,7 @@ function scrapeProfile(html, url, actorName) {
return profile;
}
async function fetchLatest(site, page = 1) {
async function fetchLatest(site, page = 1, entryIdFromTitle = false) {
const url = site.parameters?.latest
? util.format(site.parameters.latest, page)
: `${site.url}/trial/categories/movies_${page}_d.html`;
@@ -354,7 +367,7 @@ async function fetchLatest(site, page = 1) {
// const res = await bhttp.get(url);
const res = await geta(url, '.update_details');
return res.ok ? scrapeAll(res.items, site) : res.status;
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
}
async function fetchUpcoming(site) {
@@ -382,30 +395,35 @@ async function fetchMovie(url, site) {
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
}
async function fetchProfile({ name: actorName }) {
const actorSlugA = slugify(actorName, '-');
const actorSlugB = slugify(actorName, '');
async function fetchProfile({ name: actorName, url }, entity) {
const actorSlugA = slugify(actorName, '');
const actorSlugB = slugify(actorName, '-');
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
const urls = [
url,
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`,
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`,
];
const resA = await bhttp.get(urlA);
return urls.reduce(async (chain, profileUrl) => {
const profile = await chain;
if (resA.statusCode === 200) {
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
if (profile) {
return profile;
}
return profile;
}
if (!profileUrl) {
return null;
}
const resB = await bhttp.get(urlB);
const res = await http.get(profileUrl);
if (resB.statusCode === 200) {
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
if (res.statusCode === 200) {
return scrapeProfile(res.body.toString(), profileUrl, actorName, entity);
}
return profile;
}
return null;
return null;
}, Promise.resolve());
}
module.exports = {