Added American Pornstar. Improved Jules Jordan scraper to accomodate for American Pornstar. Changed entity logo mogrify settings to ensure both minimum height and width.
This commit is contained in:
65
src/scrapers/americanpornstar.js
Normal file
65
src/scrapers/americanpornstar.js
Normal file
@@ -0,0 +1,65 @@
|
||||
'use strict';
|
||||
|
||||
const { fetchLatest, fetchProfile } = require('./julesjordan');
|
||||
|
||||
const qu = require('../utils/q');
|
||||
const slugify = require('../utils/slugify');
|
||||
|
||||
function scrapeScene({ query }, channel) {
|
||||
const release = {};
|
||||
const title = query.cnt('.update_title', false);
|
||||
|
||||
release.title = title.trim();
|
||||
release.description = query.cnt('.latest_update_description');
|
||||
|
||||
release.date = query.date('.update_date', 'MM/DD/YYYY');
|
||||
release.actors = query.all('.tour_update_models a').map(actorEl => ({
|
||||
name: query.cnt(actorEl),
|
||||
url: query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.entryId = slugify(release.title);
|
||||
|
||||
release.duration = Number(query.cnt('.update_counts_preview_table').match(/(\d+) min/)[1]) * 60;
|
||||
|
||||
const poster = query.img('.large_update_thumb', 'src', { origin: channel.url });
|
||||
const trailer = query.q('.update_image a', 'onclick')?.match(/'(.+)'/)?.[1];
|
||||
|
||||
release.poster = [
|
||||
poster.replace(/.jpg$/, '-full.jpg'),
|
||||
poster,
|
||||
];
|
||||
|
||||
release.photos = query.imgs('.small_update_thumb', 'src', { origin: channel.url }).map(img => [
|
||||
img.replace(/.jpg$/, '-full.jpg'),
|
||||
img,
|
||||
]);
|
||||
|
||||
if (trailer) {
|
||||
release.trailer = qu.prefixUrl(encodeURI(trailer), channel.url);
|
||||
}
|
||||
|
||||
release.tags = query.cnts('.tour_update_tags a');
|
||||
|
||||
return release;
|
||||
}
|
||||
|
||||
async function fetchLatestLocal(channel, page) {
|
||||
return fetchLatest(channel, page, true);
|
||||
}
|
||||
|
||||
async function fetchScene(url, channel) {
|
||||
const res = await qu.get(url);
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeScene(res.item, channel);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest: fetchLatestLocal,
|
||||
fetchScene,
|
||||
fetchProfile, // don't scrape scenes, no URL
|
||||
};
|
||||
@@ -7,7 +7,7 @@ const cheerio = require('cheerio');
|
||||
const { JSDOM } = require('jsdom');
|
||||
const moment = require('moment');
|
||||
|
||||
const { get, geta, ctxa, parseDate } = require('../utils/q');
|
||||
const { get, geta, ctxa, parseDate, prefixUrl } = require('../utils/q');
|
||||
const http = require('../utils/http');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
@@ -134,16 +134,16 @@ function getEntryId(html) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site) {
|
||||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
return scenes.map(({ el, qu }) => {
|
||||
const release = {};
|
||||
|
||||
release.entryId = el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
|
||||
release.url = qu.url('.update_title a, .dvd_info > a, a ~ a');
|
||||
release.title = qu.q('.update_title a, .dvd_info > a, a ~ a', true);
|
||||
release.date = qu.date('.update_date', 'MM/DD/YYYY');
|
||||
|
||||
release.entryId = (entryIdFromTitle && slugify(release.title)) || el.dataset.setid || qu.q('.rating_box')?.dataset.id;
|
||||
|
||||
release.actors = qu.all('.update_models a', true);
|
||||
|
||||
const dvdPhotos = qu.imgs('.dvd_preview_thumb');
|
||||
@@ -153,11 +153,22 @@ function scrapeAll(scenes, site) {
|
||||
? dvdPhotos
|
||||
: Array.from({ length: photoCount }).map((value, index) => {
|
||||
const src = qu.img('a img.thumbs', `src${index}_1x`) || qu.img('a img.thumbs', `src${index}`) || qu.img('a img.thumbs');
|
||||
const prefixedSrc = prefixUrl(src, site.url);
|
||||
|
||||
return src ? {
|
||||
src: /^http/.test(src) ? src : `${site.url}${src}`,
|
||||
referer: site.url,
|
||||
} : null;
|
||||
if (src) {
|
||||
return [
|
||||
{
|
||||
src: prefixedSrc.replace(/.jpg$/, '-full.jpg'),
|
||||
referer: site.url,
|
||||
},
|
||||
{
|
||||
src: prefixedSrc,
|
||||
referer: site.url,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return null;
|
||||
}).filter(Boolean);
|
||||
|
||||
const teaserScript = qu.html('script');
|
||||
@@ -301,11 +312,11 @@ function scrapeMovie({ el, query }, url, site) {
|
||||
};
|
||||
}
|
||||
|
||||
function scrapeProfile(html, url, actorName) {
|
||||
function scrapeProfile(html, url, actorName, entity) {
|
||||
const { document } = new JSDOM(html).window;
|
||||
|
||||
const bio = document.querySelector('.model_bio').textContent;
|
||||
const avatarEl = document.querySelector('.model_bio_pic img');
|
||||
const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb');
|
||||
|
||||
const profile = {
|
||||
name: actorName,
|
||||
@@ -336,7 +347,9 @@ function scrapeProfile(html, url, actorName) {
|
||||
avatarEl.getAttribute('src0_1x'),
|
||||
avatarEl.getAttribute('src0'),
|
||||
avatarEl.getAttribute('src'),
|
||||
].filter(Boolean);
|
||||
]
|
||||
.filter(avatar => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
|
||||
.map(avatar => prefixUrl(avatar, entity.url));
|
||||
|
||||
if (avatarSources.length) profile.avatar = avatarSources;
|
||||
}
|
||||
@@ -346,7 +359,7 @@ function scrapeProfile(html, url, actorName) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1) {
|
||||
async function fetchLatest(site, page = 1, entryIdFromTitle = false) {
|
||||
const url = site.parameters?.latest
|
||||
? util.format(site.parameters.latest, page)
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
@@ -354,7 +367,7 @@ async function fetchLatest(site, page = 1) {
|
||||
// const res = await bhttp.get(url);
|
||||
const res = await geta(url, '.update_details');
|
||||
|
||||
return res.ok ? scrapeAll(res.items, site) : res.status;
|
||||
return res.ok ? scrapeAll(res.items, site, entryIdFromTitle) : res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site) {
|
||||
@@ -382,30 +395,35 @@ async function fetchMovie(url, site) {
|
||||
return res.ok ? scrapeMovie(res.item, url, site) : res.status;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName }) {
|
||||
const actorSlugA = slugify(actorName, '-');
|
||||
const actorSlugB = slugify(actorName, '');
|
||||
async function fetchProfile({ name: actorName, url }, entity) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName, '-');
|
||||
|
||||
const urlA = `https://julesjordan.com/trial/models/${actorSlugA}.html`;
|
||||
const urlB = `https://julesjordan.com/trial/models/${actorSlugB}.html`;
|
||||
const urls = [
|
||||
url,
|
||||
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`,
|
||||
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`,
|
||||
];
|
||||
|
||||
const resA = await bhttp.get(urlA);
|
||||
return urls.reduce(async (chain, profileUrl) => {
|
||||
const profile = await chain;
|
||||
|
||||
if (resA.statusCode === 200) {
|
||||
const profile = scrapeProfile(resA.body.toString(), urlA, actorName);
|
||||
if (profile) {
|
||||
return profile;
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
if (!profileUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const resB = await bhttp.get(urlB);
|
||||
const res = await http.get(profileUrl);
|
||||
|
||||
if (resB.statusCode === 200) {
|
||||
const profile = scrapeProfile(resB.body.toString(), urlB, actorName);
|
||||
if (res.statusCode === 200) {
|
||||
return scrapeProfile(res.body.toString(), profileUrl, actorName, entity);
|
||||
}
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;
|
||||
}, Promise.resolve());
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -4,6 +4,7 @@ const adulttime = require('./adulttime');
|
||||
const assylum = require('./assylum');
|
||||
const aziani = require('./aziani');
|
||||
const amateurallure = require('./amateurallure');
|
||||
const americanpornstar = require('./americanpornstar');
|
||||
const babes = require('./babes');
|
||||
const bamvisions = require('./bamvisions');
|
||||
const bang = require('./bang');
|
||||
@@ -80,6 +81,7 @@ module.exports = {
|
||||
'21sextury': sextury,
|
||||
adulttime,
|
||||
amateurallure,
|
||||
americanpornstar,
|
||||
amateureuro: porndoe,
|
||||
assylum,
|
||||
aziani,
|
||||
@@ -158,6 +160,7 @@ module.exports = {
|
||||
'21sextury': sextury,
|
||||
allanal: mikeadriano,
|
||||
amateureuro: porndoe,
|
||||
americanpornstar,
|
||||
analbbc: fullpornnetwork,
|
||||
analized: fullpornnetwork,
|
||||
analviolation: fullpornnetwork,
|
||||
|
||||
@@ -40,6 +40,10 @@ function formatDate(dateValue, format, inputFormat) {
|
||||
}
|
||||
|
||||
function prefixUrl(urlValue, origin, protocol = 'https') {
|
||||
if (/^http/.test(urlValue)) {
|
||||
return urlValue;
|
||||
}
|
||||
|
||||
if (protocol && /^\/\//.test(urlValue)) {
|
||||
return `${protocol}:${urlValue}`;
|
||||
}
|
||||
@@ -48,7 +52,7 @@ function prefixUrl(urlValue, origin, protocol = 'https') {
|
||||
return `${origin}${urlValue}`;
|
||||
}
|
||||
|
||||
return urlValue;
|
||||
return `${origin}/${urlValue}`;
|
||||
}
|
||||
|
||||
function q(context, selector, attrArg, applyTrim = true) {
|
||||
|
||||
@@ -39,13 +39,18 @@ const substitutes = {
|
||||
function slugify(string, delimiter = '-', {
|
||||
encode = false,
|
||||
removeAccents = true,
|
||||
removePunctuation = false,
|
||||
limit = 1000,
|
||||
} = {}) {
|
||||
if (!string || typeof string !== 'string') {
|
||||
return string;
|
||||
}
|
||||
|
||||
const slugComponents = string.trim().toLowerCase().match(/[A-Za-zÀ-ÖØ-öø-ÿ0-9]+/g);
|
||||
const slugComponents = string
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(removePunctuation && /[.,:;'"]/g, '')
|
||||
.match(/[A-Za-zÀ-ÖØ-öø-ÿ0-9]+/g);
|
||||
|
||||
if (!slugComponents) {
|
||||
return '';
|
||||
|
||||
Reference in New Issue
Block a user