Compare commits

..

No commits in common. "071b09709b5a8e1dcd0325f8a546d64adf448cc9" and "6bfc5e437886c35e931d9d363dbb810c68191c39" have entirely different histories.

4 changed files with 50 additions and 102 deletions

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.110.0",
"version": "1.109.1",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.110.0",
"version": "1.109.1",
"description": "All the latest porn releases in one place",
"main": "src/app.js",
"scripts": {

View File

@ -42,50 +42,11 @@ async function createThumbnail(buffer) {
return null;
}
function groupFallbacksByPriority(chunks) {
/*
Chunks naturally give priority to all of the first item's fallbacks, generally lower quality images.
This function ensures every item's first source is tried, before trying every item's second source, etc., example:
IN: [[1, 2, 3,], 10, [1, 2, 3, 4, 5], [1, 2, 3]]
OUT [[1, 1, 1], [2, 2, 2], [3, 3, 3], [4], [5]]
*/
return chunks.map(group => group.reduce((acc, item) => {
if (Array.isArray(item)) {
// place provided fallbacks at same index (priority) in parent array
item.forEach((fallback, fallbackIndex) => {
if (!acc[fallbackIndex]) {
acc[fallbackIndex] = [];
}
acc[fallbackIndex].push(fallback);
});
return acc;
}
// no fallbacks provided, first priority
if (!acc[0]) {
acc[0] = [];
}
acc[0].push(item);
return acc;
}, []).flat());
}
function pluckItems(items, specifiedLimit, asFallbacks = true) {
function pluckItems(items, specifiedLimit) {
const limit = specifiedLimit || config.media.limit;
if (!items || items.length <= limit) return items;
if (asFallbacks) {
const chunks = chunk(items, Math.ceil(items.length / limit));
const fallbacks = groupFallbacksByPriority(chunks);
return fallbacks;
}
const plucked = [1]
.concat(
Array.from({ length: limit - 1 }, (value, index) => Math.round((index + 1) * (items.length / (limit - 1)))),
@ -132,8 +93,8 @@ async function extractItem(source) {
return null;
}
async function fetchSource(source, domain, role) {
logger.silly(`Fetching ${domain} ${role} from ${source.src || source}`);
async function fetchSource(source, domain, role, originalSource) {
logger.verbose(`Fetching ${domain} ${role} from ${source.src || source}`);
// const res = await bhttp.get(source.src || source);
const res = await get(source.src || source, {
@ -150,7 +111,7 @@ async function fetchSource(source, domain, role) {
const hash = getHash(res.body);
const { entropy, size, width, height } = /image/.test(mimetype) ? await getMeta(res.body) : {};
logger.silly(`Fetched media item from ${source.src || source}`);
logger.verbose(`Fetched media item from ${source.src || source}`);
return {
file: res.body,
@ -162,7 +123,7 @@ async function fetchSource(source, domain, role) {
width: width || null,
height: height || null,
quality: source.quality || null,
source: source.src || source,
source: originalSource?.src || originalSource || source.src || source,
scraper: source.scraper,
copyright: source.copyright,
};
@ -172,11 +133,9 @@ async function fetchSource(source, domain, role) {
}
async function fetchItem(source, index, existingItemsBySource, domain, role, attempt = 1, originalSource = null, sourceIndex = 0) {
try {
if (!source) {
throw new Error(`Empty ${domain} ${role} source in ${originalSource}`);
}
if (!source) return null;
try {
if (Array.isArray(source)) {
if (source.every(sourceX => sourceX.quality)) {
// various video qualities provided
@ -201,18 +160,19 @@ async function fetchItem(source, index, existingItemsBySource, domain, role, att
return null;
}
return await fetchSource(source, domain, role, originalSource);
return fetchSource(source, domain, role, originalSource);
} catch (error) {
logger.warn(`Failed attempt ${attempt}/3 to fetch ${domain} ${role} ${index + 1} (${source.src || source}): ${error}`);
if (source && attempt < 3) {
// only retry if source is provided at all
/*
if (attempt < 3) {
await Promise.delay(5000);
return fetchItem(source, index, existingItemsBySource, domain, role, attempt + 1, originalSource, sourceIndex);
}
*/
if (originalSource && sourceIndex < originalSource.length - 1) {
throw error; // gets caught to try next source
if (originalSource && sourceIndex < originalSource.length) {
throw error;
}
return null;
@ -325,8 +285,6 @@ async function storeMedia(sources, domain, role, { entropyFilter = 2.5 } = {}) {
return {};
}
console.log(presentSources, presentSources.length);
// split up source list to prevent excessive RAM usage
const itemChunksBySource = await Promise.all(chunk(presentSources, 50).map(async (sourceChunk, index) => {
try {
@ -396,12 +354,12 @@ function associateTargetMedia(targetId, sources, mediaBySource, domain, role, pr
.map((source) => {
if (!source) return null;
if (Array.isArray(source)) {
const availableSource = source.find(fallbackSource => mediaBySource[fallbackSource.src || fallbackSource]);
return mediaBySource[availableSource];
}
const mediaItem = Array.isArray(source)
? mediaBySource[source.map(sourceX => sourceX.src || sourceX).toString()]
: mediaBySource[source.src || source];
return mediaBySource[source.src || source];
// return mediaItem && { [`${domain}_id`]: targetId, media_id: mediaItem.id };
return mediaItem;
})
.filter(Boolean)
// .sort((mediaItemA, mediaItemB) => mediaItemB.height - mediaItemA.height) // prefer high res images for primary item

View File

@ -76,63 +76,53 @@ async function scrapeScene(html, url, site, useGallery) {
const playerObject = $('script:contains("new VideoPlayer")').html();
const data = JSON.parse(playerObject.slice(playerObject.indexOf('{"swf":'), playerObject.indexOf('} );') + 1));
const release = { url };
const originalTitle = $('h1.watchpage-title').text().trim();
const { shootId, title } = extractTitle(originalTitle);
const entryId = new URL(url).pathname.split('/')[2];
release.shootId = shootId;
release.entryId = new URL(url).pathname.split('/')[2];
release.title = title;
release.date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const date = moment.utc($('span[title="Release date"] a').text(), 'YYYY-MM-DD').toDate();
const [actorsElement, tagsElement, descriptionElement] = $('.scene-description__row').toArray();
release.description = $('meta[name="description"]')?.attr('content')?.trim()
|| (descriptionElement && $(descriptionElement).find('dd').text().trim());
release.actors = $(actorsElement)
const actors = $(actorsElement)
.find('a[href*="com/model"]')
.map((actorIndex, actorElement) => $(actorElement).text()).toArray();
release.duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
release.tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
const description = $('meta[name="description"]')?.attr('content')?.trim() || (descriptionElement && $(descriptionElement).find('dd').text().trim());
const duration = moment.duration($('span[title="Runtime"]').text().trim()).asSeconds();
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
const photos = useGallery
? $('.gallery a img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray()
: $('.screenshots img').map((photoIndex, photoElement) => $(photoElement).attr('src')).toArray();
release.photos = photos.map((source) => {
// source without parameters sometimes serves larger preview photo
const { origin, pathname } = new URL(source);
return `${origin}${pathname}`;
/* disable thumbnail as fallback, usually enough high res photos available
return [
`${origin}${pathname}`,
source,
];
*/
});
const posterStyle = $('#player').attr('style');
const poster = posterStyle.slice(posterStyle.indexOf('(') + 1, -1);
release.poster = poster || release.photos.slice(Math.floor(release.photos.length / 3) * -1); // poster unavailable, try last 1/3rd of high res photos as fallback
const trailer = data.clip.qualities.find(clip => clip.quality === 'vga' || clip.quality === 'hd');
release.trailer = {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
};
const studioName = $('.watchpage-studioname').first().text().trim();
release.studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
const studio = studioName.replace(/[\s.']+/g, '').toLowerCase();
const tags = $(tagsElement).find('a').map((tagIndex, tagElement) => $(tagElement).text()).toArray();
return release;
return {
url,
shootId,
entryId,
title,
description,
date,
actors,
duration,
poster,
photos,
trailer: {
src: trailer.src,
type: trailer.type,
quality: trailer.quality === 'vga' ? 480 : 720,
},
tags,
site,
studio,
};
}
async function scrapeProfile(html, _url, actorName) {