Separated Filthy Kings into its channels, upgraded Gamma scraper to accomodate.

This commit is contained in:
DebaucheryLibrarian 2023-07-09 04:35:30 +02:00
parent c51577098a
commit 88a56794aa
58 changed files with 152 additions and 120 deletions

14
package-lock.json generated
View File

@ -80,7 +80,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.10.0",
"unprint": "^0.10.1",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",
@ -17538,9 +17538,9 @@
}
},
"node_modules/unprint": {
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.0.tgz",
"integrity": "sha512-HQ73xcI7wzWeK3j3EQ0wWDe1dpMmeKY4LpFg9bNPD5G8ZyqYm5hYx1KGu6J9NTV/T5tOW4Ec0zyHoCRT2Msimg==",
"version": "0.10.1",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.1.tgz",
"integrity": "sha512-2KtzIQKlOzXyDDyrCQQQXWuljC6kHjAhYZT1NRiDT2Lr1GgnwR+R9iVqbq6iz1Z1Oflt7ngpYW1MGHy3xDnduw==",
"dependencies": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",
@ -32378,9 +32378,9 @@
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
},
"unprint": {
"version": "0.10.0",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.0.tgz",
"integrity": "sha512-HQ73xcI7wzWeK3j3EQ0wWDe1dpMmeKY4LpFg9bNPD5G8ZyqYm5hYx1KGu6J9NTV/T5tOW4Ec0zyHoCRT2Msimg==",
"version": "0.10.1",
"resolved": "https://registry.npmjs.org/unprint/-/unprint-0.10.1.tgz",
"integrity": "sha512-2KtzIQKlOzXyDDyrCQQQXWuljC6kHjAhYZT1NRiDT2Lr1GgnwR+R9iVqbq6iz1Z1Oflt7ngpYW1MGHy3xDnduw==",
"requires": {
"axios": "^0.27.2",
"bottleneck": "^2.19.5",

View File

@ -139,7 +139,7 @@
"tunnel": "0.0.6",
"ua-parser-js": "^1.0.32",
"undici": "^4.13.0",
"unprint": "^0.10.0",
"unprint": "^0.10.1",
"url-pattern": "^1.0.3",
"v-tooltip": "^2.0.3",
"video.js": "^7.11.4",

Binary file not shown.

After

Width:  |  Height:  |  Size: 218 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 833 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
public/img/logos/filthykings/lazy/filthykings.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

BIN
public/img/logos/filthykings/lazy/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 615 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 421 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 356 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
public/img/logos/filthykings/thumbs/filthykings.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
public/img/logos/filthykings/thumbs/network.png Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -301,6 +301,19 @@ const networks = [
},
parent: 'mindgeek',
},
{
slug: 'filthykings',
name: 'Filthy Kings',
url: 'https://www.filthykings.com',
parent: 'gamma',
parameters: {
layout: 'api',
queryChannel: true,
scene: 'https://www.filthykings.com/en/video/filthykings',
referer: 'https://www.filthykings.com',
album: 'https://www.filthykings.com',
},
},
{
slug: 'firstanalquest',
name: 'First Anal Quest',

View File

@ -3728,11 +3728,80 @@ const sites = [
slug: 'filthykings',
name: 'Filthy Kings',
url: 'https://www.filthykings.com',
parent: 'gamma',
independent: true,
parameters: {
layout: 'api',
queryChannel: false,
filterNetwork: true,
},
parent: 'filthykings',
},
{
slug: 'itsanal',
name: 'Its Anal',
url: 'https://www.itsanal.com',
tags: ['anal'],
parent: 'filthykings',
},
{
slug: 'filthypov',
name: 'Filthy POV',
url: 'https://www.filthypov.com',
tags: ['pov'],
parameters: {
queryChannel: 'filthy-pov',
},
parent: 'filthykings',
},
{
slug: 'fkbts',
name: 'FK BTS',
url: 'https://www.filthykings.com',
tags: ['bts'],
parent: 'filthykings',
},
{
slug: 'fillupmymom',
name: 'Fill Up My Mom',
url: 'https://www.fillupmymom.com',
tags: ['milf'],
parameters: {
queryChannel: 'fill-up-my-mom',
},
parent: 'filthykings',
},
{
slug: 'filthymassage',
name: 'Filthy Massage',
url: 'https://www.filthyMassage.com',
tags: ['massage'],
parent: 'filthykings',
},
{
slug: 'filthytaboo',
name: 'Filthy Taboo',
url: 'https://www.filthytaboo.com',
parent: 'filthykings',
},
{
slug: 'filthynewbies',
name: 'Filthy Newbies',
url: 'https://www.filthykings.com',
parent: 'filthykings',
},
{
slug: 'filthyblowjobs',
name: 'Filthy Blowjobs',
url: 'https://www.filthykings.com',
tags: ['blowjob'],
parameters: {
queryChannel: 'filthyblowjob',
},
parent: 'filthykings',
},
{
slug: 'hotgirlsraw',
name: 'Hot Girls Raw',
url: 'https://www.hotgirlsraw.com',
parent: 'filthykings',
},
// FIRST ANAL QUEST
{

View File

@ -234,8 +234,6 @@ async function scrapeReleases(baseReleases, entitiesByHostname, type) {
const parameters = getRecursiveParameters(entity);
const preData = await entity.scraper.beforeFetchScenes(entity, parameters);
console.log('pre data', preData);
return [slug, { ...entity, preData }];
}

View File

@ -143,8 +143,8 @@ async function getPhotos(albumPath, site, includeThumbnails = true) {
}
}
async function getFullPhotos(entryId, site) {
const res = await http.get(`${site.url}/media/signPhotoset/${entryId}`, {
async function getFullPhotos(entryId, site, parameters) {
const res = await http.get(`${parameters.album || site.url}/media/signPhotoset/${entryId}`, {
headers: {
'X-Requested-With': 'XMLHttpRequest',
},
@ -193,8 +193,8 @@ async function getThumbs(entryId, site, parameters) {
async function getPhotosApi(entryId, site, parameters) {
const [photos, thumbs] = await Promise.all([
getFullPhotos(entryId, site, parameters),
getThumbs(entryId, site, parameters),
getFullPhotos(entryId, site, parameters).catch(() => { logger.error(`Gamma scraper failed to fetch photos for ${entryId}`); return []; }),
getThumbs(entryId, site, parameters).catch(() => { logger.error(`Gamma scraper failed to fetch photos for ${entryId}`); return []; }),
]);
return photos.concat(thumbs.slice(photos.length));
@ -217,14 +217,14 @@ function curateTitle(title, channel) {
return title.replace(new RegExp(`^\\s*${channel.name}\\s*[:|-]\\s`, 'i'), '');
}
async function scrapeApiReleases(json, site) {
return json.map((scene) => {
if (site.parameters?.extract && scene.sitename !== site.parameters.extract) {
return null;
async function scrapeApiReleases(json, site, options) {
return json.reduce((acc, scene) => {
if (options.parameters?.extract && scene.sitename !== options.parameters.extract) {
return acc;
}
if (site.parameters?.filterExclusive && scene.availableOnSite.length > 1) {
return null;
if (options.parameters?.filterExclusive && scene.availableOnSite.length > 1) {
return acc;
}
const release = {
@ -238,8 +238,11 @@ async function scrapeApiReleases(json, site) {
release.title = curateTitle(scene.title, site);
release.path = `/${scene.url_title}/${release.entryId}`;
if (site.parameters?.scene) release.url = `${site.parameters.scene}${release.path}`;
else if (site.url && site.parameters?.scene !== false) release.url = `${site.url}/en/video${release.path}`;
if (options.parameters?.scene) {
release.url = `${options.parameters.scene}${release.path}`;
} else if (site.url && options.parameters?.scene !== false) {
release.url = `${site.url}/en/video${release.path}`;
}
release.date = moment.utc(scene.release_date, 'YYYY-MM-DD').toDate();
release.director = scene.directors[0]?.name || null;
@ -276,10 +279,24 @@ async function scrapeApiReleases(json, site) {
];
}
if (options.parameters.filterNetwork && scene.mainChannel) {
return {
...acc,
unextracted: acc.unextracted.concat(release),
};
}
release.channel = slugify(scene.mainChannel?.id || scene.sitename, ''); // remove -
// release.movie = `${site.url}/en/movie/${scene.url_movie_title}/${scene.movie_id}`;
return release;
}).filter(Boolean);
return {
...acc,
scenes: acc.scenes.concat(release),
};
}, {
scenes: [],
unextracted: [],
});
}
function scrapeAll(scenes, site, networkUrl, hasTeaser = true) {
@ -476,7 +493,7 @@ async function scrapeReleaseApi(data, site, options, movieScenes) {
release.scenes = await Promise.all(movieScenes.map((movieScene) => scrapeReleaseApi(movieScene, site, options)));
}
release.channel = data.sitename;
release.channel = slugify(data.mainChannel?.id || data.sitename, ''); // remove -
release.qualities = data.download_sizes;
return release;
@ -638,11 +655,15 @@ async function fetchLatestApi(site, page = 1, options, preData, upcoming = false
const referer = options.parameters?.referer || `${options.parameters?.networkReferer ? site.parent.url : site.url}/en/videos`;
const { apiUrl } = await fetchApiCredentials(referer, site);
const params = `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]${options.parameters.queryChannel
? `&filters=channels.id:${options.parameters.queryChannel === true ? site.slug : options.parameters.queryChannel}`
: `&filters=sitename:${site.slug}`}`;
const res = await http.post(apiUrl, {
requests: [
{
indexName: 'all_scenes',
params: `query=&hitsPerPage=36&maxValuesPerFacet=100&page=${page - 1}&facetFilters=[["lesbian:"],["bisex:"],["shemale:"],["upcoming:${upcoming ? 1 : 0}"]]&filters=sitename:${site.slug}`, // OR channels.id:${site.slug}`,
params,
},
],
}, {
@ -654,7 +675,7 @@ async function fetchLatestApi(site, page = 1, options, preData, upcoming = false
});
if (res.status === 200 && res.body.results?.[0]?.hits) {
return scrapeApiReleases(res.body.results[0].hits, site);
return scrapeApiReleases(res.body.results[0].hits, site, options);
}
return res.status;

View File

@ -2,12 +2,10 @@
const util = require('util');
const Promise = require('bluebird');
const cheerio = require('cheerio');
const moment = require('moment');
const unprint = require('unprint');
const argv = require('../argv');
const qu = require('../utils/qu');
const http = require('../utils/http');
const { heightToCm } = require('../utils/convert');
const slugify = require('../utils/slugify');
@ -90,44 +88,23 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
});
}
function scrapeUpcoming(html, site) {
const $ = cheerio.load(html, { normalizeWhitespace: true });
const scenesElements = $('#coming_soon_carousel').find('.table').toArray();
return scenesElements.map((element) => {
function scrapeUpcoming(scenes, channel) {
return scenes.map(({ query, html }) => {
const release = {};
release.entryId = $(element).find('.upcoming_updates_thumb').attr('id').match(/\d+/)[0];
release.title = query.text('.overlay-text', { join: false })?.[0];
release.date = query.date('.overlay-text', 'MM/DD/YYYY');
const details = $(element).find('.update_details_comingsoon')
.eq(1)
.children()
.remove();
release.actors = query.all('.update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null),
}));
release.title = details
.end()
.text()
.trim();
release.poster = query.img('img') || query.img('img', { attribute: 'src0_1x' });
release.actors = details
.text()
.trim()
.split(', ');
release.entryId = channel.parameters?.entryIdFromTitle ? slugify(release.title) : getEntryId(html);
release.date = moment
.utc($(element).find('.update_date_comingsoon').text().slice(7), 'MM/DD/YYYY')
.toDate();
const photoElement = $(element).find('a img.thumbs');
const posterPath = photoElement.attr('src');
release.poster = posterPath.match(/^http/) ? posterPath : `${site.url}${posterPath}`;
const videoClass = $(element).find('.update_thumbnail div').attr('class');
const videoScript = $(element).find(`script:contains(${videoClass})`).html();
if (videoScript) {
release.teaser = videoScript.slice(videoScript.indexOf('https://'), videoScript.indexOf('.mp4') + 4);
}
// TODO: teaser
return release;
});
@ -230,8 +207,11 @@ async function scrapeScene({ html, query }, context) {
}
// release.photos = async () => await getPhotos(release.entryId, context.entity); // probably no longer works on any site
// release.photos = query.imgs('#images img');
release.photos = getPhotos(query, release, context);
if (argv.jjFullPhotos) {
release.photos = getPhotos(query, release, context);
} else {
release.photos = query.imgs('#images img');
}
if (query.exists('.update_dvds a')) {
release.movie = {
@ -271,55 +251,6 @@ function scrapeMovie({ el, query }, url, site) {
};
}
/*
function scrapeProfile(html, url, actorName, entity) {
const bio = document.querySelector('.model_bio').textContent;
const avatarEl = document.querySelector('.model_bio_pic img, .model_bio_thumb');
const profile = {
name: actorName,
};
const heightString = bio.match(/\d+ feet \d+ inches/);
const ageString = bio.match(/Age:\s*(\d{2})/);
const birthDateString = bio.match(/Age:\s*(\w+ \d{1,2}, \d{4})/);
const measurementsString = bio.match(/\w+-\d+-\d+/);
if (birthDateString) profile.birthdate = qu.parseDate(birthDateString[1], 'MMMM D, YYYY');
if (ageString) profile.age = Number(ageString[1]);
if (heightString) profile.height = heightToCm(heightString[0]);
if (measurementsString) {
const [bust, waist, hip] = measurementsString[0].split('-');
if (bust) profile.bust = bust;
if (waist) profile.waist = Number(waist);
if (hip) profile.hip = Number(hip);
}
if (avatarEl) {
const avatarSources = [
avatarEl.getAttribute('src0_3x'),
avatarEl.getAttribute('src0_2x'),
avatarEl.getAttribute('src0_1x'),
avatarEl.getAttribute('src0'),
avatarEl.getAttribute('src'),
]
.filter((avatar) => avatar && !/p\d+.jpe?g/.test(avatar)) // remove non-existing attributes and placeholder images
.map((avatar) => qu.prefixUrl(avatar, entity.url));
if (avatarSources.length) profile.avatar = avatarSources;
}
profile.releases = Array.from(document.querySelectorAll('.category_listing_block .update_details > a:first-child'), (el) => el.href);
console.log(profile);
return profile;
}
*/
function scrapeProfile({ query }, url, name, entity) {
const profile = { url };
@ -368,13 +299,13 @@ async function fetchUpcoming(site) {
if (site.parameters?.upcoming === false) return null;
const url = site.parameters?.upcoming ? util.format(site.parameters.upcoming) : `${site.url}/trial/index.php`;
const res = await http.get(url);
const res = await unprint.get(url, { selectAll: '//img[contains(@alt, "Coming Soon")]/parent::div' });
if (res.statusCode === 200) {
return scrapeUpcoming(res.body.toString(), site);
if (res.ok) {
return scrapeUpcoming(res.context, site);
}
return res.statusCode;
return res.status;
}
async function fetchMovie(url, site) {