|
|
|
|
@@ -5,7 +5,6 @@ const Promise = require('bluebird');
|
|
|
|
|
const unprint = require('unprint');
|
|
|
|
|
|
|
|
|
|
const argv = require('../argv');
|
|
|
|
|
const qu = require('../utils/qu');
|
|
|
|
|
const { heightToCm } = require('../utils/convert');
|
|
|
|
|
const slugify = require('../utils/slugify');
|
|
|
|
|
|
|
|
|
|
@@ -34,10 +33,11 @@ function getEntryIdFromTitle(release) {
|
|
|
|
|
function scrapeAll(scenes, site, entryIdFromTitle) {
|
|
|
|
|
return scenes.map(({ element, query }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
const title = query.content('.content_img div, .dvd_info > a, a.update_title, a[title] + a[title], .overlay-text') || query.content('a[title*=" "]');
|
|
|
|
|
const title = query.content('.content_img div, .dvd_info > a, a.update_title, .update_title a, a[title] + a[title], .overlay-text')
|
|
|
|
|
|| query.content('a[title*=" "]');
|
|
|
|
|
|
|
|
|
|
release.title = title?.slice(0, title.match(/starring:/i)?.index || Infinity).trim();
|
|
|
|
|
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]');
|
|
|
|
|
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, .update_title a, a[title]');
|
|
|
|
|
release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
|
|
|
|
|
|
|
|
|
|
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
|
|
|
|
|
@@ -50,9 +50,9 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
|
|
|
|
|
|
|
|
|
[release.poster, ...release.photos] = dvdPhotos.length
|
|
|
|
|
? dvdPhotos
|
|
|
|
|
: Array.from({ length: photoCount }).map((value, index) => {
|
|
|
|
|
: Array.from({ length: photoCount }).map((_value, index) => {
|
|
|
|
|
const src = query.img('a img.thumbs', { attribute: `src${index}_1x` }) || query.img('a img.thumbs', { attribute: `src${index}` }) || query.img('a img.thumbs');
|
|
|
|
|
const prefixedSrc = qu.prefixUrl(src, site.url);
|
|
|
|
|
const prefixedSrc = unprint.prefixUrl(src, site.url);
|
|
|
|
|
|
|
|
|
|
if (src) {
|
|
|
|
|
return Array.from(new Set([
|
|
|
|
|
@@ -81,12 +81,25 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
|
|
|
|
|
|| query.element('.rating_box')?.dataset.id
|
|
|
|
|
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
|
|
|
|
|
|
|
|
|
|
console.log(release.entryId);
|
|
|
|
|
|
|
|
|
|
return release;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) {
|
|
|
|
|
const url = site.parameters?.latest
|
|
|
|
|
? util.format(site.parameters.latest, page)
|
|
|
|
|
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
|
|
|
|
|
|
|
|
|
// const res = await http.get(url);
|
|
|
|
|
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeUpcoming(scenes, channel) {
|
|
|
|
|
return scenes.map(({ query, html }) => {
|
|
|
|
|
const release = {};
|
|
|
|
|
@@ -110,6 +123,19 @@ function scrapeUpcoming(scenes, channel) {
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchUpcoming(site) {
|
|
|
|
|
if (site.parameters?.upcoming === false) return null;
|
|
|
|
|
|
|
|
|
|
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
|
|
|
|
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' });
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeUpcoming(res.context, site);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractLegacyTrailer(html, context) {
|
|
|
|
|
const trailerLines = html.split('\n').filter((line) => /movie\["trailer\w*"\]\[/i.test(line));
|
|
|
|
|
|
|
|
|
|
@@ -206,6 +232,10 @@ async function scrapeScene({ html, query }, context) {
|
|
|
|
|
release.trailer = extractLegacyTrailer(html, context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (release.trailer?.includes('_sfw')) {
|
|
|
|
|
release.trailer = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
|
|
|
|
|
if (argv.jjFullPhotos) {
|
|
|
|
|
release.photos = getPhotos(query, release, context);
|
|
|
|
|
@@ -216,7 +246,8 @@ async function scrapeScene({ html, query }, context) {
|
|
|
|
|
release.photos = [
|
|
|
|
|
...context.baseRelease?.photos?.map((sources) => sources.at(-1).src) || [],
|
|
|
|
|
...query.imgs('#images img'),
|
|
|
|
|
].map((source) => Array.from(new Set([
|
|
|
|
|
...query.imgs('img.update_thumb', { attribute: 'src0_1x' }),
|
|
|
|
|
].filter(Boolean).map((source) => Array.from(new Set([
|
|
|
|
|
source.replace(/.jpg$/, '-full.jpg'),
|
|
|
|
|
source.replace(/-1x.jpg$/, '-4x.jpg'),
|
|
|
|
|
source.replace(/-1x.jpg$/, '-2x.jpg'),
|
|
|
|
|
@@ -278,10 +309,11 @@ function scrapeMovie({ query }, { url }) {
|
|
|
|
|
return movie;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function scrapeProfile({ query }, url, name, entity) {
|
|
|
|
|
function scrapeProfile({ query }, url, entity) {
|
|
|
|
|
const profile = { url };
|
|
|
|
|
|
|
|
|
|
profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span'); // the spaces are important to avoid selecting a similar comment
|
|
|
|
|
profile.description = query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::span') // the spaces are important to avoid selecting a similar comment
|
|
|
|
|
|| query.content('//comment()[contains(., " Bio Extra Field ")]/following-sibling::text()');
|
|
|
|
|
|
|
|
|
|
profile.height = heightToCm(query.content('//span[contains(text(), "Height")]/following-sibling::span'));
|
|
|
|
|
profile.measurements = query.content('//span[contains(text(), "Measurements")]/following-sibling::span');
|
|
|
|
|
@@ -300,41 +332,18 @@ function scrapeProfile({ query }, url, name, entity) {
|
|
|
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0_1x' }),
|
|
|
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src0' }),
|
|
|
|
|
query.img('.model_bio_pic img, .model_bio_thumb', { attribute: 'src' }),
|
|
|
|
|
].filter(Boolean);
|
|
|
|
|
// ...query.sourceSet('.model_bio_pic img, .model_bio_thumb', { origin: entity.url }),
|
|
|
|
|
].filter(Boolean).map((src) => ({
|
|
|
|
|
src,
|
|
|
|
|
referer: entity.url,
|
|
|
|
|
verifyType: 'image',
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
profile.scenes = scrapeAll(unprint.initAll(query.all('.grid-item')), entity, true);
|
|
|
|
|
|
|
|
|
|
return profile;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchLatest(site, page = 1, include, preData, entryIdFromTitle = false) {
|
|
|
|
|
const url = site.parameters?.latest
|
|
|
|
|
? util.format(site.parameters.latest, page)
|
|
|
|
|
: `${site.url}/trial/categories/movies_${page}_d.html`;
|
|
|
|
|
|
|
|
|
|
// const res = await http.get(url);
|
|
|
|
|
const res = await unprint.get(url, { selectAll: '.update_details, .grid-item' });
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeAll(res.context, site, typeof site.parameters?.entryIdFromTitle === 'boolean' ? site.parameters.entryIdFromTitle : entryIdFromTitle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchUpcoming(site) {
|
|
|
|
|
if (site.parameters?.upcoming === false) return null;
|
|
|
|
|
|
|
|
|
|
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
|
|
|
|
|
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/ancestor::div' });
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeUpcoming(res.context, site);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchProfile({ name: actorName, url }, entity) {
|
|
|
|
|
const actorSlugA = slugify(actorName, '');
|
|
|
|
|
const actorSlugB = slugify(actorName, '-');
|
|
|
|
|
@@ -356,10 +365,12 @@ async function fetchProfile({ name: actorName, url }, entity) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const res = await unprint.get(profileUrl);
|
|
|
|
|
const res = await unprint.get(profileUrl, {
|
|
|
|
|
followRedirects: false,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (res.ok) {
|
|
|
|
|
return scrapeProfile(res.context, profileUrl, actorName, entity);
|
|
|
|
|
return scrapeProfile(res.context, profileUrl, entity);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
|