forked from DebaucheryLibrarian/traxxx
Updated Insex scraper.
This commit is contained in:
parent
829a285a2d
commit
39813d4461
|
@ -89,6 +89,10 @@ module.exports = {
|
||||||
'uksinners',
|
'uksinners',
|
||||||
// mindgeek
|
// mindgeek
|
||||||
'pornhub',
|
'pornhub',
|
||||||
|
// insex
|
||||||
|
'paintoy',
|
||||||
|
'aganmedon',
|
||||||
|
'sensualpain',
|
||||||
],
|
],
|
||||||
networks: [
|
networks: [
|
||||||
// dummy network for testing
|
// dummy network for testing
|
||||||
|
|
|
@ -4219,7 +4219,6 @@ const sites = [
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
parameters: {
|
parameters: {
|
||||||
scraper: 'alt',
|
|
||||||
latest: 'https://www.sexuallybroken.com/sb',
|
latest: 'https://www.sexuallybroken.com/sb',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -4230,13 +4229,20 @@ const sites = [
|
||||||
url: 'https://www.infernalrestraints.com',
|
url: 'https://www.infernalrestraints.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.infernalrestraints.com/ir',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'hardtied',
|
slug: 'hardtied',
|
||||||
name: 'Hardtied',
|
name: 'Hardtied',
|
||||||
|
alias: ['ht'],
|
||||||
url: 'https://www.hardtied.com',
|
url: 'https://www.hardtied.com',
|
||||||
tags: ['bdsm'],
|
tags: ['bdsm'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.hardtied.com/ht',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'realtimebondage',
|
slug: 'realtimebondage',
|
||||||
|
@ -4245,6 +4251,9 @@ const sites = [
|
||||||
url: 'https://www.realtimebondage.com',
|
url: 'https://www.realtimebondage.com',
|
||||||
tags: ['bdsm', 'live'],
|
tags: ['bdsm', 'live'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
|
parameters: {
|
||||||
|
latest: 'https://www.realtimebondage.com/rtb',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
slug: 'topgrl',
|
slug: 'topgrl',
|
||||||
|
@ -4254,7 +4263,6 @@ const sites = [
|
||||||
tags: ['bdsm', 'femdom'],
|
tags: ['bdsm', 'femdom'],
|
||||||
parent: 'insex',
|
parent: 'insex',
|
||||||
parameters: {
|
parameters: {
|
||||||
scraper: 'alt',
|
|
||||||
latest: 'https://www.topgrl.com/tg',
|
latest: 'https://www.topgrl.com/tg',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -5,6 +5,27 @@ const http = require('../utils/http');
|
||||||
const slugify = require('../utils/slugify');
|
const slugify = require('../utils/slugify');
|
||||||
|
|
||||||
function scrapeLatest(scenes, site) {
|
function scrapeLatest(scenes, site) {
|
||||||
|
return scenes.map(({ query }) => {
|
||||||
|
const release = {};
|
||||||
|
|
||||||
|
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
|
||||||
|
|
||||||
|
release.title = query.cnt('.has-text-weight-bold, .is-size-6');
|
||||||
|
release.date = query.date('span.tag', 'YYYY-MM-DD');
|
||||||
|
release.actors = query.cnts('a.tag');
|
||||||
|
|
||||||
|
const cover = query.img('.image img');
|
||||||
|
|
||||||
|
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
|
||||||
|
release.covers = [cover];
|
||||||
|
|
||||||
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`;
|
||||||
|
|
||||||
|
return release;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeLatestLegacy(scenes, site) {
|
||||||
return scenes.map(({ query }) => {
|
return scenes.map(({ query }) => {
|
||||||
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
|
||||||
const release = {};
|
const release = {};
|
||||||
|
@ -43,35 +64,39 @@ function scrapeLatest(scenes, site) {
|
||||||
cover,
|
cover,
|
||||||
]];
|
]];
|
||||||
|
|
||||||
console.log(release);
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeLatestAlt(scenes, site) {
|
async function scrapeScene({ query }, url, channel, session) {
|
||||||
return scenes.map(({ query }) => {
|
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
|
release.title = query.cnt('.columns div.is-size-5.has-text-weight-bold');
|
||||||
|
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
|
||||||
|
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
|
||||||
|
|
||||||
release.title = query.cnt('.has-text-weight-bold, .is-size-6');
|
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
|
||||||
release.date = query.date('span.tag', 'YYYY-MM-DD');
|
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
|
||||||
release.actors = query.cnts('a.tag');
|
|
||||||
|
|
||||||
const cover = query.img('.image img');
|
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
|
||||||
|
release.photos = Array.from(query.html('body > div:nth-child(6)').matchAll(/src="(http.*jpg)"/g), (match) => match[1]);
|
||||||
|
|
||||||
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
|
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
||||||
release.covers = [cover];
|
|
||||||
|
|
||||||
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`;
|
release.trailer = query.video();
|
||||||
console.log('alt', release);
|
|
||||||
|
|
||||||
return release;
|
if (!release.trailer) {
|
||||||
});
|
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
||||||
|
|
||||||
|
if (trailerRes.ok) {
|
||||||
|
release.trailer = trailerRes.body;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrapeScene({ query }, site) {
|
return release;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrapeSceneLegacy({ query }, site) {
|
||||||
const release = {};
|
const release = {};
|
||||||
|
|
||||||
const titleEl = query.q('.articleTitleText');
|
const titleEl = query.q('.articleTitleText');
|
||||||
|
@ -97,60 +122,26 @@ function scrapeScene({ query }, site) {
|
||||||
const trailer = query.trailer();
|
const trailer = query.trailer();
|
||||||
if (trailer) release.trailer = { src: trailer };
|
if (trailer) release.trailer = { src: trailer };
|
||||||
|
|
||||||
console.log(release);
|
|
||||||
|
|
||||||
return release;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeSceneAlt({ query }, url, channel, session) {
|
|
||||||
const release = {};
|
|
||||||
|
|
||||||
release.title = query.cnt('.columns div.is-size-5');
|
|
||||||
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
|
|
||||||
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
|
|
||||||
|
|
||||||
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
|
|
||||||
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
|
|
||||||
|
|
||||||
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
|
|
||||||
release.photos = query.imgs('body > div:nth-child(6) img');
|
|
||||||
|
|
||||||
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
|
|
||||||
|
|
||||||
release.trailer = query.video();
|
|
||||||
|
|
||||||
if (!release.trailer) {
|
|
||||||
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
|
|
||||||
|
|
||||||
if (trailerRes.ok) {
|
|
||||||
release.trailer = trailerRes.body;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return release;
|
return release;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchLatest(site, page = 1) {
|
async function fetchLatest(site, page = 1) {
|
||||||
const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`)
|
const url = `${site.parameters.latest}/home.php?o=latest&p=${page}`;
|
||||||
// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
|
const res = await qu.getAll(url, 'body > .columns .column', { cookie: 'consent=yes' });
|
||||||
|| `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
|
||||||
|
|
||||||
const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column'))
|
|
||||||
// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
|
|
||||||
|| qu.get(url)); // JSON containing html as a property
|
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (site.parameters?.scraper === 'alt') {
|
|
||||||
return scrapeLatestAlt(res.items, site);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
if (site.slug === 'paintoy') {
|
|
||||||
return scrapeLatest(res.items, site);
|
return scrapeLatest(res.items, site);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
|
return res.status;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchLatestLegacy(site, page = 1) {
|
||||||
|
const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
|
||||||
|
const res = await qu.get(url); // JSON containing html as a property
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
return scrapeLatestLegacy(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -158,14 +149,10 @@ async function fetchLatest(site, page = 1) {
|
||||||
|
|
||||||
async function fetchScene(url, site) {
|
async function fetchScene(url, site) {
|
||||||
const session = http.session();
|
const session = http.session();
|
||||||
const res = await qu.get(url, null, null, { session });
|
const res = await qu.get(url, null, { cookie: 'consent=yes' }, { session });
|
||||||
|
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
if (site.parameters?.scraper === 'alt') {
|
return scrapeScene(res.item, url, site, session);
|
||||||
return scrapeSceneAlt(res.item, url, site, session);
|
|
||||||
}
|
|
||||||
|
|
||||||
return scrapeScene(res.item, site);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status;
|
return res.status;
|
||||||
|
@ -174,4 +161,8 @@ async function fetchScene(url, site) {
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchLatest,
|
fetchLatest,
|
||||||
fetchScene,
|
fetchScene,
|
||||||
|
legacy: {
|
||||||
|
fetchLatest: fetchLatestLegacy,
|
||||||
|
scrapeScene: scrapeSceneLegacy,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue