Compare commits
4 Commits
f96e938417
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
818669011e | ||
|
|
f962d71d10 | ||
|
|
04278284af | ||
|
|
946afcb229 |
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "traxxx",
|
||||
"version": "1.252.5",
|
||||
"version": "1.252.7",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "traxxx",
|
||||
"version": "1.252.5",
|
||||
"version": "1.252.7",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "^3.458.0",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "traxxx",
|
||||
"version": "1.252.5",
|
||||
"version": "1.252.7",
|
||||
"description": "All the latest porn releases in one place",
|
||||
"main": "src/app.js",
|
||||
"scripts": {
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
'use strict';
|
||||
|
||||
/* eslint-disable newline-per-chained-call */
|
||||
const Promise = require('bluebird');
|
||||
const { CookieJar } = Promise.promisifyAll(require('tough-cookie'));
|
||||
const cookie = require('cookie');
|
||||
const moment = require('moment');
|
||||
// const unprint = require('unprint');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const qu = require('../utils/qu');
|
||||
const slugify = require('../utils/slugify');
|
||||
const http = require('../utils/http');
|
||||
const { inchesToCm, lbsToKg } = require('../utils/convert');
|
||||
|
||||
function getBasePath(parameters, channel, path = '/scene') {
|
||||
@@ -126,6 +121,119 @@ async function scrapeLatest(items, site, filterChannel, options) {
|
||||
};
|
||||
}
|
||||
|
||||
function getUrl(site) {
|
||||
const { searchParams, pathname } = new URL(site.url);
|
||||
|
||||
// if (search.match(/\?site=\d+/)) {
|
||||
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(site, _parameters, url) {
|
||||
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
|
||||
if (site.slug === 'aylo') {
|
||||
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
||||
return null;
|
||||
}
|
||||
|
||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
|
||||
? site.parent.url
|
||||
: (url || site.url);
|
||||
|
||||
const res = await unprint.get(sessionUrl, {
|
||||
headers: {
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
Connection: 'keep-alive',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
const instanceToken = res.cookies.instance_token;
|
||||
|
||||
if (instanceToken) {
|
||||
return { instanceToken };
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, options) {
|
||||
const url = getUrl(site);
|
||||
const { searchParams, pathname } = new URL(url);
|
||||
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
|
||||
|
||||
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
|
||||
? options.beforeNetwork
|
||||
: await getSession(site, options.parameters, url);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 24;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await unprint.get(apiUrl, {
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.data.result) {
|
||||
return scrapeLatest(res.data.result, site, false, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, _page, options) {
|
||||
const url = getUrl(site);
|
||||
const { instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||
|
||||
const res = await unprint.get(apiUrl, {
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.data.result) {
|
||||
return scrapeLatest(res.data.result, site, true, options);
|
||||
}
|
||||
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeRelease(data, url, channel, networkName, options) {
|
||||
if (Array.isArray(data)) {
|
||||
return null;
|
||||
@@ -192,139 +300,6 @@ function scrapeRelease(data, url, channel, networkName, options) {
|
||||
return release;
|
||||
}
|
||||
|
||||
function getUrl(site) {
|
||||
const { searchParams, pathname } = new URL(site.url);
|
||||
|
||||
// if (search.match(/\?site=\d+/)) {
|
||||
if (searchParams.has('site') || /\/site\/\d+/.test(pathname)) {
|
||||
return site.url;
|
||||
}
|
||||
|
||||
if (site.parameters?.native) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.extract) {
|
||||
return `${site.url}/scenes`;
|
||||
}
|
||||
|
||||
if (site.parameters?.siteId) {
|
||||
return `${site.parent.url}/scenes?site=${site.parameters.siteId}`;
|
||||
}
|
||||
|
||||
throw new Error(`Aylo site '${site.name}' (${site.url}) not supported`);
|
||||
}
|
||||
|
||||
async function getSession(site, parameters, url) {
|
||||
// if (site.slug === 'aylo' || site.parameters?.parentSession === false) {
|
||||
if (site.slug === 'aylo') {
|
||||
// most MG sites have a parent network to acquire a session from, don't try to acquire session from mindgeek.com for independent channels
|
||||
return null;
|
||||
}
|
||||
|
||||
const cookieJar = new CookieJar();
|
||||
const session = http.session({ cookieJar });
|
||||
|
||||
const sessionUrl = site.parameters?.siteId && !(site.parameters?.native || site.parameters?.childSession || site.parent?.parameters?.childSession || site.parameters?.parentSession === false)
|
||||
? site.parent.url
|
||||
: (url || site.url);
|
||||
|
||||
/*
|
||||
await unprint.browserRequest(sessionUrl, {
|
||||
browser: {
|
||||
headless: false,
|
||||
},
|
||||
async control() {
|
||||
await new Promise((resolve) => { setTimeout(() => resolve(), 10000); });
|
||||
},
|
||||
});
|
||||
*/
|
||||
|
||||
const res = await http.get(sessionUrl, {
|
||||
session,
|
||||
headers: {
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
Connection: 'keep-alive',
|
||||
},
|
||||
interval: parameters?.interval,
|
||||
concurrency: parameters?.concurrency,
|
||||
parse: false,
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
const cookieString = await cookieJar.getCookieStringAsync(sessionUrl);
|
||||
const { instance_token: instanceToken } = cookie.parse(cookieString);
|
||||
|
||||
if (instanceToken) {
|
||||
return { session, instanceToken };
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed to acquire Aylo session (${res.statusCode})`);
|
||||
}
|
||||
|
||||
async function fetchLatest(site, page = 1, options) {
|
||||
const url = getUrl(site);
|
||||
const { searchParams, pathname } = new URL(url);
|
||||
const siteId = searchParams.get('site') || Number(pathname.match(/\/site\/(\d+)\//)?.[1]);
|
||||
|
||||
if (!siteId && !site.parameters?.native && !site.parameters?.extract) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { instanceToken } = options.beforeNetwork?.instanceToken && !(options.parameters?.native || options.parameters?.childSession || options.parameters?.parentSession === false)
|
||||
? options.beforeNetwork
|
||||
: await getSession(site, options.parameters, url);
|
||||
|
||||
const beforeDate = moment().add('1', 'day').format('YYYY-MM-DD');
|
||||
const limit = 24;
|
||||
const apiUrl = site.parameters?.native || site.parameters?.extract
|
||||
? `https://site-api.project1service.com/v2/releases?dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`
|
||||
: `https://site-api.project1service.com/v2/releases?collectionId=${siteId}&dateReleased=<${beforeDate}&limit=${limit}&offset=${limit * (page - 1)}&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site, false, options);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchUpcoming(site, page, options) {
|
||||
const url = getUrl(site);
|
||||
const { session, instanceToken } = await getSession(site, options.parameters);
|
||||
|
||||
const apiUrl = 'https://site-api.project1service.com/v2/upcoming-releases';
|
||||
|
||||
const res = await http.get(apiUrl, {
|
||||
session,
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
Instance: instanceToken,
|
||||
Origin: site.url,
|
||||
Referer: url,
|
||||
'Accept-Language': 'en-US,en;', // somehow seems essential for some MG sites
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200 && res.body.result) {
|
||||
return scrapeLatest(res.body.result, site, true, options);
|
||||
}
|
||||
|
||||
return res.statusCode;
|
||||
}
|
||||
|
||||
async function fetchRelease(url, site, baseScene, options) {
|
||||
if (baseScene?.entryId && !baseScene.shallow && !options.parameters.forceDeep) {
|
||||
// overview and deep data is the same, don't hit server unnecessarily
|
||||
@@ -332,10 +307,9 @@ async function fetchRelease(url, site, baseScene, options) {
|
||||
}
|
||||
|
||||
const entryId = new URL(url).pathname.match(/\/(\d+)/)?.[1];
|
||||
const { session, instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
|
||||
const { instanceToken } = options.beforeFetchScenes || await getSession(site, options.parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
session,
|
||||
const res = await unprint.get(`https://site-api.project1service.com/v2/releases/${entryId}`, {
|
||||
interval: options.parameters.interval,
|
||||
concurrency: options.parameters.concurrency,
|
||||
headers: {
|
||||
@@ -344,16 +318,16 @@ async function fetchRelease(url, site, baseScene, options) {
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200 && res.body.result) {
|
||||
if (res.status === 200 && res.data.result) {
|
||||
return {
|
||||
scene: scrapeRelease(res.body.result, url, site, null, options),
|
||||
scene: scrapeRelease(res.data.result, url, site, null, options),
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function scrapeProfile(data, networkName, _releases = []) {
|
||||
function scrapeProfile(data, _networkName, _releases = []) {
|
||||
const profile = {
|
||||
description: data.bio,
|
||||
aliases: data.aliases.filter(Boolean),
|
||||
@@ -367,7 +341,7 @@ function scrapeProfile(data, networkName, _releases = []) {
|
||||
profile.measurements = data.measurements;
|
||||
}
|
||||
|
||||
profile.dateOfBirth = qu.parseDate(data.birthday);
|
||||
profile.dateOfBirth = unprint.extractDate(data.birthday);
|
||||
profile.birthPlace = data.birthPlace;
|
||||
profile.height = inchesToCm(data.height);
|
||||
profile.weight = lbsToKg(data.weight);
|
||||
@@ -406,10 +380,9 @@ function scrapeProfile(data, networkName, _releases = []) {
|
||||
|
||||
async function fetchProfile({ name: actorName }, { entity, parameters }, include) {
|
||||
// const url = `https://www.${networkOrNetworkSlug.slug || networkOrNetworkSlug}.com`;
|
||||
const { session, instanceToken } = await getSession(entity, parameters);
|
||||
const { instanceToken } = await getSession(entity, parameters);
|
||||
|
||||
const res = await http.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
session,
|
||||
const res = await unprint.get(`https://site-api.project1service.com/v1/actors/?search=${encodeURI(actorName)}`, {
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
@@ -418,14 +391,13 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
|
||||
},
|
||||
});
|
||||
|
||||
if (res.statusCode === 200) {
|
||||
const actorData = res.body.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
if (res.status === 200) {
|
||||
const actorData = res.data.result.find((actor) => actor.name.toLowerCase() === actorName.toLowerCase());
|
||||
|
||||
if (actorData) {
|
||||
const actorReleasesUrl = `https://site-api.project1service.com/v2/releases?actorId=${actorData.id}&limit=100&offset=0&orderBy=-dateReleased&type=scene`;
|
||||
|
||||
const actorReleasesRes = include.includeActorScenes && await http.get(actorReleasesUrl, {
|
||||
session,
|
||||
const actorReleasesRes = include.includeActorScenes && await unprint.get(actorReleasesUrl, {
|
||||
interval: parameters.interval,
|
||||
concurrency: parameters.concurrency,
|
||||
headers: {
|
||||
@@ -433,8 +405,8 @@ async function fetchProfile({ name: actorName }, { entity, parameters }, include
|
||||
},
|
||||
});
|
||||
|
||||
if (actorReleasesRes.statusCode === 200 && actorReleasesRes.body.result) {
|
||||
return scrapeProfile(actorData, entity.slug, actorReleasesRes.body.result);
|
||||
if (actorReleasesRes.status === 200 && actorReleasesRes.data.result) {
|
||||
return scrapeProfile(actorData, entity.slug, actorReleasesRes.data.result);
|
||||
}
|
||||
|
||||
return scrapeProfile(actorData, entity.slug, []);
|
||||
|
||||
@@ -1,44 +1,38 @@
|
||||
'use strict';
|
||||
|
||||
const util = require('util');
|
||||
const Promise = require('bluebird');
|
||||
const unprint = require('unprint');
|
||||
|
||||
const argv = require('../argv');
|
||||
const { heightToCm } = require('../utils/convert');
|
||||
const slugify = require('../utils/slugify');
|
||||
const tryUrls = require('../utils/try-urls');
|
||||
|
||||
function getEntryId(html) {
|
||||
const entryId = html.match(/showtagform\((\d+)\)/);
|
||||
|
||||
if (entryId) {
|
||||
return entryId[1];
|
||||
function getEntryId(url) {
|
||||
if (!url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const setIdIndex = html.indexOf('setid:"');
|
||||
const entryId = new URL(url).pathname.split('/').at(-1).match('(.*?)_vids.html');
|
||||
|
||||
if (setIdIndex) {
|
||||
return html.slice(setIdIndex, html.indexOf(',', setIdIndex)).match(/\d+/)?.[0];
|
||||
if (entryId) {
|
||||
return slugify(entryId[1]);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getEntryIdFromTitle(release) {
|
||||
// return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page
|
||||
// return slugify(release.title);
|
||||
return slugify([release.title, ...(release.actors?.map((actor) => actor.name || actor).toSorted() || [])]);
|
||||
}
|
||||
|
||||
function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
return scenes.map(({ element, query }) => {
|
||||
function scrapeAll(scenes, site) {
|
||||
return scenes.map(({ query }) => {
|
||||
const release = {};
|
||||
const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|
||||
const title = query.content('.jj-card-title, .content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|
||||
|| query.content('a[title*=" "]');
|
||||
|
||||
release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
|
||||
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
|
||||
release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
||||
release.url = query.url('.jj-card-thumb, .content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
|
||||
release.date = query.date('.jj-card-date, .update_date', ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
|
||||
|
||||
release.entryId = getEntryId(release.url);
|
||||
|
||||
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
@@ -70,17 +64,16 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
||||
return null;
|
||||
}).filter(Boolean);
|
||||
|
||||
const teaserScript = query.html('script');
|
||||
release.teaser = query.video('.jj-card-video', { attribute: 'data-src' });
|
||||
|
||||
if (teaserScript) {
|
||||
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
if (!release.teaser) {
|
||||
const teaserScript = query.html('script');
|
||||
|
||||
if (teaserScript) {
|
||||
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
|
||||
}
|
||||
}
|
||||
|
||||
release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release))
|
||||
|| element.dataset.setid
|
||||
|| query.element('.rating_box')?.dataset.id
|
||||
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
||||
|
||||
return release;
|
||||
});
|
||||
}
|
||||
@@ -91,7 +84,7 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
|
||||
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
||||
|
||||
// const res = await http.get(url);
|
||||
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
|
||||
const res = await unprint.get(url, { selectAll: '.scenes-listing-grid .jj-content-card' });
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
||||
@@ -100,7 +93,8 @@ async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle =
|
||||
return res.status;
|
||||
}
|
||||
|
||||
function scrapeUpcoming(scenes, channel) {
|
||||
/* disable until we have entryId solution
|
||||
function scrapeUpcoming(scenes, _channel) {
|
||||
return scenes.map(({ query, html }) => {
|
||||
const release = {};
|
||||
|
||||
@@ -135,6 +129,7 @@ async function fetchUpcoming(site) {
|
||||
|
||||
return res.status;
|
||||
}
|
||||
*/
|
||||
|
||||
function extractLegacyTrailer(html, context) {
|
||||
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
||||
@@ -194,17 +189,19 @@ function getPhotos(query, release, context) {
|
||||
async function scrapeScene({ html, query }, context) {
|
||||
const release = {};
|
||||
|
||||
release.title = query.content('.title_bar_hilite, .movie_title');
|
||||
release.description = query.content('.update_description') || query.text('//div[./span[contains(text(), "Description")]]');
|
||||
release.entryId = getEntryId(context.url);
|
||||
|
||||
release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
||||
release.title = query.content('.scene-title, .title_bar_hilite, .movie_title');
|
||||
release.description = query.content('.scene-desc, .update_description') || query.text('//div[./span[contains(text(), "Description")]]');
|
||||
|
||||
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
|
||||
release.date = query.date(['.meta-item:nth-child(2) .val, .update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD', 'MMMM D, YYYY']);
|
||||
|
||||
release.actors = query.all('.meta-item .update_models a, .backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
|
||||
name: unprint.query.content(actorEl),
|
||||
url: unprint.query.url(actorEl, null),
|
||||
}));
|
||||
|
||||
release.tags = query.contents('.update_tags a, .player-scene-description a[href*="/categories"]');
|
||||
release.tags = query.contents('.scene-cats a, .update_tags a, .player-scene-description a[href*="/categories"]');
|
||||
release.director = release.tags?.find((tag) => ['mike john', 'van styles'].includes(tag?.trim().toLowerCase()));
|
||||
|
||||
const posterPath = query.poster('#video-player', { forceGetAttribute: true }) // without getAttribute, missing poster is returned as page URL
|
||||
@@ -245,7 +242,7 @@ async function scrapeScene({ html, query }, context) {
|
||||
// #images img selects a list of images that is present on every page; the JJ website removes the ones that failed to load with JS (lol)
|
||||
release.photos = [
|
||||
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
|
||||
...query.imgs('#images img'),
|
||||
...query.imgs('.tp-photos-strip img, #images img'),
|
||||
...query.imgs('img.update_thumb', { attribute: 'src0_1x' }),
|
||||
].filter(Boolean).map((source) => Array.from(new Set([
|
||||
source.replace(/.jpg$/, '-full.jpg'),
|
||||
@@ -270,9 +267,7 @@ async function scrapeScene({ html, query }, context) {
|
||||
|
||||
release.stars = query.number('.avg_rating');
|
||||
|
||||
release.entryId = context.entity.parameters?.entryIdFromTitle
|
||||
? getEntryIdFromTitle(release)
|
||||
: getEntryId(html);
|
||||
release.qualities = query.contents('.res-item .res-lbl').map((resolution) => Number(resolution.match(/\d+$/)?.[0])).filter(Boolean);
|
||||
|
||||
return release;
|
||||
}
|
||||
@@ -296,7 +291,7 @@ function scrapeMovie({ query }, { url }) {
|
||||
scene.date = unprint.query.date(sceneEl, '//span[contains(@class, "dvd-scene-description") and span[contains(text(), "Date")]]', 'MM/DD/YYYY');
|
||||
scene.actors = unprint.query.contents(sceneEl, '.update_models a');
|
||||
|
||||
scene.entryId = getEntryIdFromTitle(scene);
|
||||
scene.entryId = getEntryId(scene.url);
|
||||
|
||||
return scene;
|
||||
});
|
||||
@@ -339,48 +334,34 @@ function scrapeProfile({ query }, url, entity) {
|
||||
verifyType: 'image',
|
||||
}));
|
||||
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true);
|
||||
profile.scenes = scrapeAll(unprint.initAll(query.all('.mbp-scenes-grid .jj-content-card, .grid-item')), entity, true);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
async function fetchProfile({ name: actorName, url }, entity) {
|
||||
async function fetchProfile({ name: actorName, url: actorUrl }, entity) {
|
||||
const actorSlugA = slugify(actorName, '');
|
||||
const actorSlugB = slugify(actorName, '-');
|
||||
|
||||
const urls = [
|
||||
url,
|
||||
actorUrl,
|
||||
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugA}.html`,
|
||||
`${entity.parameters?.profile || `${entity.url}/trial/models`}/${actorSlugB}.html`,
|
||||
];
|
||||
|
||||
return urls.reduce(async (chain, profileUrl) => {
|
||||
const profile = await chain;
|
||||
const { res, url } = await tryUrls(urls);
|
||||
|
||||
if (profile) {
|
||||
return profile;
|
||||
}
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, url, entity);
|
||||
}
|
||||
|
||||
if (!profileUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const res = await unprint.get(profileUrl, {
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
return scrapeProfile(res.context, profileUrl, entity);
|
||||
}
|
||||
|
||||
return null;
|
||||
}, Promise.resolve());
|
||||
return res.status;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchLatest,
|
||||
fetchProfile,
|
||||
fetchUpcoming,
|
||||
// fetchUpcoming,
|
||||
scrapeScene,
|
||||
scrapeMovie,
|
||||
};
|
||||
|
||||
@@ -213,7 +213,7 @@ const actors = [
|
||||
{ entity: 'naughtyamerica', name: 'Nicole Aniston', fields: ['avatar', 'description'] },
|
||||
{ entity: 'tonightsgirlfriend', name: 'Abella Danger', fields: ['avatar'] },
|
||||
// jules jordan scraper
|
||||
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'description', 'avatar'] },
|
||||
{ entity: 'julesjordan', name: 'Vanna Bardot', fields: ['height', 'dateOfBirth', 'measurements', 'avatar'] },
|
||||
{ entity: 'amateurallure', name: 'Ava Amira', fields: ['avatar', 'description'] },
|
||||
{ entity: 'swallowsalon', name: 'Abella Danger', fields: ['avatar'] },
|
||||
// exploitedx
|
||||
|
||||
Reference in New Issue
Block a user