368 lines
13 KiB
JavaScript
Executable File
368 lines
13 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
const util = require('util');
|
|
const unprint = require('unprint');
|
|
|
|
const argv = require('../argv');
|
|
const { heightToCm } = require('../utils/convert');
|
|
const slugify = require('../utils/slugify');
|
|
const tryUrls = require('../utils/try-urls');
|
|
|
|
function getEntryId(url) {
|
|
if (!url) {
|
|
return null;
|
|
}
|
|
|
|
const entryId = new URL(url).pathname.split('/').at(-1).match('(.*?)_vids.html');
|
|
|
|
if (entryId) {
|
|
return slugify(entryId[1]);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function scrapeAll(scenes, site) {
|
|
return scenes.map(({ query }) => {
|
|
const release = {};
|
|
const title = query.content('.jj-card-title, .content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|
|
|| query.content('a[title*=" "]');
|
|
|
|
release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
|
|
release.url = query.url('.jj-card-thumb, .content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
|
|
release.date = query.date('.jj-card-date, .update_date', ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
|
|
|
|
release.entryId = getEntryId(release.url);
|
|
|
|
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null),
|
|
}));
|
|
|
|
const dvdPhotos = query.imgs('.dvd_preview_thumb');
|
|
const photoCount = Number(query.attribute('a img.thumbs', 'cnt')) || 1;
|
|
|
|
[release.poster, ...release.photos] = dvdPhotos.length
|
|
? dvdPhotos
|
|
: Array.from({ length: photoCount }).map((_value, index) => {
|
|
const src = query.img('a img.thumbs', { attribute: `src${index}_1x` }) || query.img('a img.thumbs', { attribute: `src${index}` }) || query.img('a img.thumbs');
|
|
const prefixedSrc = unprint.prefixUrl(src, site.url);
|
|
|
|
if (src) {
|
|
return Array.from(new Set([
|
|
prefixedSrc.replace(/.jpg$/, '-full.jpg'),
|
|
prefixedSrc.replace(/-1x.jpg$/, '-4x.jpg'),
|
|
prefixedSrc.replace(/-1x.jpg$/, '-2x.jpg'),
|
|
prefixedSrc,
|
|
])).map((source) => ({
|
|
src: source,
|
|
referer: site.url,
|
|
verifyType: 'image',
|
|
}));
|
|
}
|
|
|
|
return null;
|
|
}).filter(Boolean);
|
|
|
|
release.teaser = query.video('.jj-card-video', { attribute: 'data-src' });
|
|
|
|
if (!release.teaser) {
|
|
const teaserScript = query.html('script');
|
|
|
|
if (teaserScript) {
|
|
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
|
}
|
|
}
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) {
|
|
const url = site.parameters?.latest
|
|
? util.format(site.parameters.latest, page)
|
|
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
|
|
|
// const res = await http.get(url);
|
|
const res = await unprint.get(url, { selectAll: '.scenes-listing-grid .jj-content-card' });
|
|
|
|
if (res.ok) {
|
|
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
/* disable until we have entryId solution
|
|
function scrapeUpcoming(scenes, _channel) {
|
|
return scenes.map(({ query, html }) => {
|
|
const release = {};
|
|
|
|
release.title = query.text('.overlay-text', { join: false })?.[0];
|
|
release.date = query.date('.overlay-text', ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
|
|
|
release.actors = query.all('.update_models a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null),
|
|
}));
|
|
|
|
release.poster = query.img('img') || query.img('img', { attribute: 'src0_1x' });
|
|
release.teaser = html.match(/src=['"](https:\/\/.*\.mp4)['"]/)?.[1];
|
|
|
|
release.entryId = channel.parameters?.entryIdFromTitle
|
|
? getEntryIdFromTitle(release)
|
|
: getEntryId(html);
|
|
|
|
return release;
|
|
});
|
|
}
|
|
|
|
async function fetchUpcoming(site) {
|
|
if (site.parameters?.upcoming === false) return null;
|
|
|
|
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
|
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' });
|
|
|
|
if (res.ok) {
|
|
return scrapeUpcoming(res.context, site);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
*/
|
|
|
|
function extractLegacyTrailer(html, context) {
|
|
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
|
|
|
if (trailerLines.length) {
|
|
return trailerLines.map((trailerLine) => {
|
|
// const src = trailerLine.match(/path:"([\w-:/.&=?%]+)"/)?.[1];
|
|
const src = trailerLine.match(/path:"(.+)"/)?.[1];
|
|
const quality = trailerLine.match(/movie_height:'(\d+)/)?.[1];
|
|
|
|
return src && {
|
|
src: /^http/.test(src) ? src : `${context.entity.url}${src}`,
|
|
quality: quality && Number(quality.replace('558', '540')),
|
|
};
|
|
}).filter(Boolean);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
const qualities = [
|
|
'photos',
|
|
'1600watermarked',
|
|
'1280watermarked',
|
|
'1024watermarked',
|
|
'thumbs',
|
|
];
|
|
|
|
function getPhotos(query, release, context) {
|
|
// https://thumbs.julesjordan.com/members/content//upload/dl03/julesjordan/whitney_wright_dredd/1024watermarked/whitney_wright_julesjordan.com-20.jpg
|
|
// https://thumbs.julesjordan.com/members/content//upload/dl03/julesjordan/bambi_barton_manuel_ferrara/1024watermarked/bambi_barton_julesjordan_com-13.jpg
|
|
if (!release.actors?.length > 0) {
|
|
return null;
|
|
}
|
|
|
|
const photoCount = query.number('//div[contains(@class, "title-heading-content")][contains(text(), "Photos")]');
|
|
|
|
if (photoCount) {
|
|
// slug actor order is not always the same as actor list order, prefer trailer slug if available
|
|
const path = query.dataset('.movieformat_button', 'src')?.match(/:(.*)_trailer/)?.[1] || release.actors.map((actor) => slugify(actor.name || actor, '_')).join('_');
|
|
|
|
const derivedActorSlug = path.replace(`_${release.actors.slice(1).map(({ name }) => slugify(name, '_'))}`, '');
|
|
const actorSlug = derivedActorSlug === path // no replacement took place, so the slug is likely invalid
|
|
? slugify(release.actors[0].name || release.actors[0], '_')
|
|
: derivedActorSlug;
|
|
|
|
return Array.from({ length: photoCount }, (value, index) => qualities
|
|
.flatMap((quality) => [
|
|
`https://thumbs.${context.entity.slug}.com/trial/content//upload/dl03/${context.entity.slug}/${path}/${quality}/${actorSlug}_${context.entity.slug}_com-${index + 1}.jpg`,
|
|
`https://thumbs.${context.entity.slug}.com/trial/content//upload/dl03/${context.entity.slug}/${path}/${quality}/${actorSlug}_${context.entity.slug}.com-${index + 1}.jpg`, // .com instead of _com
|
|
]).map((src) => ({ src, attempts: 1 })));
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function scrapeScene({ html, query }, context) {
|
|
const release = {};
|
|
|
|
release.entryId = getEntryId(context.url);
|
|
|
|
release.title = query.content('.scene-title, .title_bar_hilite, .movie_title');
|
|
release.description = query.content('.scene-desc, .update_description') || query.text('//div[./span[contains(text(), "Description")]]');
|
|
|
|
release.date = query.date(['.meta-item:nth-child(2) .val, .update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
|
|
|
|
release.actors = query.all('.meta-item .update_models a, .backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
|
|
name: unprint.query.content(actorEl),
|
|
url: unprint.query.url(actorEl, null),
|
|
}));
|
|
|
|
release.tags = query.contents('.scene-cats a, .update_tags a, .player-scene-description a[href*="/categories"]');
|
|
release.director = release.tags?.find((tag) => ['mike john', 'van styles'].includes(tag?.trim().toLowerCase()));
|
|
|
|
const posterPath = query.poster('#video-player', { forceGetAttribute: true }) // without getAttribute, missing poster is returned as page URL
|
|
|| html.match(/useimage = "(.*)"/)?.[1];
|
|
|
|
if (posterPath) {
|
|
const poster = /^http/.test(posterPath) ? posterPath : `${context.entity.url}${posterPath}`;
|
|
|
|
if (poster) {
|
|
release.poster = {
|
|
src: poster,
|
|
referer: context.entity.url,
|
|
};
|
|
}
|
|
}
|
|
|
|
if (query.exists('source[data-bitrate="trailer"]')) {
|
|
release.trailer = [
|
|
query.video('source[data-bitrate="trailer_1080" i]'),
|
|
query.video('source[data-bitrate="trailer_720" i]'),
|
|
query.video('source[data-bitrate="trailer" i]'), // also seems to be 720p
|
|
query.video('source[data-bitrate="trailer_mobile" i]'), // also seems to be 720p
|
|
].filter(Boolean);
|
|
} else if (context.include.trailers && context.entity.slug !== 'manuelferrara') {
|
|
release.trailer = extractLegacyTrailer(html, context);
|
|
}
|
|
|
|
if (release.trailer?.includes('_sfw')) {
|
|
release.trailer = null;
|
|
}
|
|
|
|
// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
|
|
if (argv.jjFullPhotos) {
|
|
release.photos = getPhotos(query, release, context);
|
|
} else {
|
|
// base release photos are usually better, but deep photos have additional thumbs
|
|
// the filenames are not chronological, so sorting after appending only worsens the mix.
|
|
// #images img selects a list of images that is present on every page; the JJ website removes the ones that failed to load with JS (lol)
|
|
release.photos = [
|
|
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
|
|
...query.imgs('.tp-photos-strip img, #images img'),
|
|
...query.imgs('img.update_thumb', { attribute: 'src0_1x' }),
|
|
].filter(Boolean).map((source) => Array.from(new Set([
|
|
source.replace(/.jpg$/, '-full.jpg'),
|
|
source.replace(/-1x.jpg$/, '-4x.jpg'),
|
|
source.replace(/-1x.jpg$/, '-2x.jpg'),
|
|
source,
|
|
])).map((fallbackSource) => ({
|
|
src: fallbackSource,
|
|
referer: context.entity.url,
|
|
verifyType: 'image',
|
|
})));
|
|
}
|
|
|
|
if (query.exists('.player-scene-description a[href*="/dvd"]')) {
|
|
release.movie = {
|
|
url: query.url('.player-scene-description a[href*="/dvd"]'),
|
|
title: query.content('.player-scene-description a[href*="/dvd"]'),
|
|
};
|
|
|
|
release.movie.entryId = new URL(release.movie.url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase();
|
|
}
|
|
|
|
release.stars = query.number('.avg_rating');
|
|
|
|
release.qualities = query.contents('.res-item .res-lbl').map((resolution) => Number(resolution.match(/\d+$/)?.[0])).filter(Boolean);
|
|
|
|
return release;
|
|
}
|
|
|
|
function scrapeMovie({ query }, { url }) {
|
|
const movie = {};
|
|
|
|
movie.entryId = new URL(url).pathname.split('/').slice(-1)[0]?.replace('.html', '').toLowerCase();
|
|
movie.title = query.attribute('meta[property="og:title"]', 'content');
|
|
|
|
movie.covers = [query.img('img.dvd_box')]; // -2x etc is likely upscaled
|
|
|
|
const sceneTitles = query.contents('.title-heading-content-black-dvd');
|
|
|
|
const scenes = query.all('.grid-container-scene').map((sceneEl, index) => {
|
|
const scene = {};
|
|
|
|
scene.url = unprint.query.url(sceneEl, 'a[href*="/scenes"]');
|
|
scene.title = sceneTitles[index];
|
|
|
|
scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY');
|
|
scene.actors = unprint.query.contents(sceneEl, '.update_models a');
|
|
|
|
scene.entryId = getEntryId(scene.url);
|
|
|
|
return scene;
|
|
});
|
|
|
|
movie.scenes = scenes?.sort((sceneA, sceneB) => sceneA.date - sceneB.date);
|
|
|
|
movie.date = movie.scenes?.[0]?.date;
|
|
movie.datePrecision = 'month';
|
|
|
|
return movie;
|
|
}
|
|
|
|
function scrapeProfile({ query }, url, entity) {
|
|
const profile = { url };
|
|
|
|
profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span') // the spaces are important to avoid selecting a similar comment
|
|
|| query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::text()');
|
|
|
|
profile.height = heightToCm(query.content('//span[contains(text(), "Height")]/following-sibling::span'));
|
|
profile.measurements = query.content('//span[contains(text(), "Measurements")]/following-sibling::span');
|
|
|
|
const age = query.content('//span[contains(text(), "Age")]/following-sibling::span')?.trim();
|
|
|
|
if (age && /\w+ \d+, \d{4}/.test(age)) {
|
|
profile.dateOfBirth = unprint.extractDate(age, 'MMMM D, YYYY');
|
|
} else {
|
|
profile.age = Number(age) || null;
|
|
}
|
|
|
|
profile.avatar = [
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_3x' }),
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_2x' }),
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_1x' }),
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0' }),
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src' }),
|
|
// ...query.sourceSet('.model_bio_pic img, .model_bio_thumb', { origin: entity.url }),
|
|
].filter(Boolean).map((src) => ({
|
|
src,
|
|
referer: entity.url,
|
|
verifyType: 'image',
|
|
}));
|
|
|
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.mbp-scenes-grid .jj-content-card, .grid-item')), entity, true);
|
|
|
|
return profile;
|
|
}
|
|
|
|
async function fetchProfile({ name: actorName, url: actorUrl }, entity) {
|
|
const actorSlugA = slugify(actorName, '');
|
|
const actorSlugB = slugify(actorName, '-');
|
|
|
|
const urls = [
|
|
actorUrl,
|
|
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`,
|
|
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`,
|
|
];
|
|
|
|
const { res, url } = await tryUrls(urls);
|
|
|
|
if (res.ok) {
|
|
return scrapeProfile(res.context, url, entity);
|
|
}
|
|
|
|
return res.status;
|
|
}
|
|
|
|
module.exports = {
|
|
fetchLatest,
|
|
fetchProfile,
|
|
// fetchUpcoming,
|
|
scrapeScene,
|
|
scrapeMovie,
|
|
};
|