From 39813d4461955e4fcc549c1167fd3d45280c66f3 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Thu, 31 Mar 2022 22:46:54 +0200 Subject: [PATCH] Updated Insex scraper. --- config/default.js | 4 ++ seeds/02_sites.js | 12 +++- src/scrapers/insex.js | 129 ++++++++++++++++++++---------------------- 3 files changed, 74 insertions(+), 71 deletions(-) diff --git a/config/default.js b/config/default.js index bbb3e191..26d8b113 100644 --- a/config/default.js +++ b/config/default.js @@ -89,6 +89,10 @@ module.exports = { 'uksinners', // mindgeek 'pornhub', + // insex + 'paintoy', + 'aganmedon', + 'sensualpain', ], networks: [ // dummy network for testing diff --git a/seeds/02_sites.js b/seeds/02_sites.js index 2d50e9b1..a166e0b8 100644 --- a/seeds/02_sites.js +++ b/seeds/02_sites.js @@ -4219,7 +4219,6 @@ const sites = [ tags: ['bdsm'], parent: 'insex', parameters: { - scraper: 'alt', latest: 'https://www.sexuallybroken.com/sb', }, }, @@ -4230,13 +4229,20 @@ const sites = [ url: 'https://www.infernalrestraints.com', tags: ['bdsm'], parent: 'insex', + parameters: { + latest: 'https://www.infernalrestraints.com/ir', + }, }, { slug: 'hardtied', name: 'Hardtied', + alias: ['ht'], url: 'https://www.hardtied.com', tags: ['bdsm'], parent: 'insex', + parameters: { + latest: 'https://www.hardtied.com/ht', + }, }, { slug: 'realtimebondage', @@ -4245,6 +4251,9 @@ const sites = [ url: 'https://www.realtimebondage.com', tags: ['bdsm', 'live'], parent: 'insex', + parameters: { + latest: 'https://www.realtimebondage.com/rtb', + }, }, { slug: 'topgrl', @@ -4254,7 +4263,6 @@ const sites = [ tags: ['bdsm', 'femdom'], parent: 'insex', parameters: { - scraper: 'alt', latest: 'https://www.topgrl.com/tg', }, }, diff --git a/src/scrapers/insex.js b/src/scrapers/insex.js index 0b0affab..3d3d8098 100644 --- a/src/scrapers/insex.js +++ b/src/scrapers/insex.js @@ -5,6 +5,27 @@ const http = require('../utils/http'); const slugify = require('../utils/slugify'); function scrapeLatest(scenes, site) { + return scenes.map(({ query }) => { + const release = {}; + + release.url = query.url('figure a', 'href', { origin: site.parameters.latest }); + + release.title = query.cnt('.has-text-weight-bold, .is-size-6'); + release.date = query.date('span.tag', 'YYYY-MM-DD'); + release.actors = query.cnts('a.tag'); + + const cover = query.img('.image img'); + + release.poster = cover.replace('poster_noplay', 'trailer_noplay'); + release.covers = [cover]; + + release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`; + + return release; + }); +} + +function scrapeLatestLegacy(scenes, site) { return scenes.map(({ query }) => { // if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site); const release = {}; @@ -43,35 +64,39 @@ function scrapeLatest(scenes, site) { cover, ]]; - console.log(release); - return release; }); } -function scrapeLatestAlt(scenes, site) { - return scenes.map(({ query }) => { - const release = {}; +async function scrapeScene({ query }, url, channel, session) { + const release = {}; - release.url = query.url('figure a', 'href', { origin: site.parameters.latest }); + release.title = query.cnt('.columns div.is-size-5.has-text-weight-bold'); + release.description = query.cnt('.has-background-black-ter > div:nth-child(4)'); + release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD'); - release.title = query.cnt('.has-text-weight-bold, .is-size-6'); - release.date = query.date('span.tag', 'YYYY-MM-DD'); - release.actors = query.cnts('a.tag'); + release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]'); + release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span'); - const cover = query.img('.image img'); + release.poster = query.img('#videoPlayer, #iodvideo', 'poster'); + release.photos = Array.from(query.html('body > div:nth-child(6)').matchAll(/src="(http.*jpg)"/g), (match) => match[1]); - release.poster = cover.replace('poster_noplay', 'trailer_noplay'); - release.covers = [cover]; + release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; - release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`; - console.log('alt', release); + release.trailer = query.video(); - return release; - }); + if (!release.trailer) { + const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session }); + + if (trailerRes.ok) { + release.trailer = trailerRes.body; + } + } + + return release; } -function scrapeScene({ query }, site) { +function scrapeSceneLegacy({ query }, site) { const release = {}; const titleEl = query.q('.articleTitleText'); @@ -97,60 +122,26 @@ function scrapeScene({ query }, site) { const trailer = query.trailer(); if (trailer) release.trailer = { src: trailer }; - console.log(release); - - return release; -} - -async function scrapeSceneAlt({ query }, url, channel, session) { - const release = {}; - - release.title = query.cnt('.columns div.is-size-5'); - release.description = query.cnt('.has-background-black-ter > div:nth-child(4)'); - release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD'); - - release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]'); - release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span'); - - release.poster = query.img('#videoPlayer, #iodvideo', 'poster'); - release.photos = query.imgs('body > div:nth-child(6) img'); - - release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`; - - release.trailer = query.video(); - - if (!release.trailer) { - const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session }); - - if (trailerRes.ok) { - release.trailer = trailerRes.body; - } - } - return release; } async function fetchLatest(site, page = 1) { - const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`) - // || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page - || `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`; - - const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column')) - // || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]')) - || qu.get(url)); // JSON containing html as a property + const url = `${site.parameters.latest}/home.php?o=latest&p=${page}`; + const res = await qu.getAll(url, 'body > .columns .column', { cookie: 'consent=yes' }); if (res.ok) { - if (site.parameters?.scraper === 'alt') { - return scrapeLatestAlt(res.items, site); - } + return scrapeLatest(res.items, site); + } - /* - if (site.slug === 'paintoy') { - return scrapeLatest(res.items, site); - } - */ + return res.status; +} - return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site); +async function fetchLatestLegacy(site, page = 1) { + const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`; + const res = await qu.get(url); // JSON containing html as a property + + if (res.ok) { + return scrapeLatestLegacy(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site); } return res.status; @@ -158,14 +149,10 @@ async function fetchLatest(site, page = 1) { async function fetchScene(url, site) { const session = http.session(); - const res = await qu.get(url, null, null, { session }); + const res = await qu.get(url, null, { cookie: 'consent=yes' }, { session }); if (res.ok) { - if (site.parameters?.scraper === 'alt') { - return scrapeSceneAlt(res.item, url, site, session); - } - - return scrapeScene(res.item, site); + return scrapeScene(res.item, url, site, session); } return res.status; @@ -174,4 +161,8 @@ async function fetchScene(url, site) { module.exports = { fetchLatest, fetchScene, + legacy: { + fetchLatest: fetchLatestLegacy, + scrapeScene: scrapeSceneLegacy, + }, };