6 Commits

Author SHA1 Message Date
DebaucheryLibrarian
e9ba02d65d 1.213.2 2022-03-31 22:46:56 +02:00
DebaucheryLibrarian
39813d4461 Updated Insex scraper. 2022-03-31 22:46:54 +02:00
DebaucheryLibrarian
829a285a2d 1.213.1 2022-03-31 14:34:12 +02:00
DebaucheryLibrarian
a19a77e165 Optionalized qualities. 2022-03-31 14:34:10 +02:00
DebaucheryLibrarian
122dd3eaee 1.213.0 2022-03-31 14:11:23 +02:00
DebaucheryLibrarian
18b219850e Storing scene qualities. Updated Perv City scraper. 2022-03-31 14:11:13 +02:00
11 changed files with 133 additions and 93 deletions

View File

@@ -203,6 +203,19 @@
</div>
</div>
<div
v-if="release.qualities"
class="row"
>
<span class="row-label">Available qualities</span>
<span
v-for="quality in release.qualities"
:key="quality"
class="quality"
>{{ quality }}</span>
</div>
<div
v-if="release.comment"
class="row"
@@ -470,6 +483,16 @@ export default {
text-overflow: ellipsis;
}
.quality {
&::after {
content: 'p, ';
}
&:last-child::after {
content: 'p',
}
}
.releases {
margin: 0 0 .5rem 0;
}

View File

@@ -367,6 +367,7 @@ const releaseFields = `
date
datePrecision
slug
qualities
shootId
productionDate
comment
@@ -475,6 +476,7 @@ const releaseFragment = `
duration
createdAt
shootId
qualities
productionDate
createdBatchId
productionLocation

View File

@@ -89,6 +89,10 @@ module.exports = {
'uksinners',
// mindgeek
'pornhub',
// insex
'paintoy',
'aganmedon',
'sensualpain',
],
networks: [
// dummy network for testing

View File

@@ -0,0 +1,7 @@
exports.up = async (knex) => knex.schema.alterTable('releases', (table) => {
table.specificType('qualities', 'text[]');
});
exports.down = async (knex) => knex.schema.alterTable('releases', (table) => {
table.dropColumn('qualities');
});

View File

@@ -0,0 +1,12 @@
exports.up = async (knex) => knex.raw(`
CREATE MATERIALIZED VIEW entities_stats
AS
SELECT entities.id AS entity_id, count(releases.id) AS releases_count
FROM entities
LEFT JOIN releases ON releases.entity_id = entities.id
GROUP BY entities.id;
`);
exports.down = async (knex) => knex.raw(`
DROP MATERIALIZED VIEW entities_stats;
`);

23
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "traxxx",
"version": "1.212.9",
"version": "1.213.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "traxxx",
"version": "1.212.9",
"version": "1.213.2",
"license": "ISC",
"dependencies": {
"@casl/ability": "^5.2.2",
@@ -11650,25 +11650,6 @@
"webidl-conversions": "^3.0.0"
}
},
"node_modules/node-fetch/node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o="
},
"node_modules/node-fetch/node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
},
"node_modules/node-fetch/node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha1-lmRU6HZUYuN2RNNib2dCzotwll0=",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/node-gyp": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-7.1.2.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "traxxx",
"version": "1.212.9",
"version": "1.213.2",
"description": "All the latest porn releases in one place",
"main": "src/app.js",
"scripts": {

View File

@@ -4219,7 +4219,6 @@ const sites = [
tags: ['bdsm'],
parent: 'insex',
parameters: {
scraper: 'alt',
latest: 'https://www.sexuallybroken.com/sb',
},
},
@@ -4230,13 +4229,20 @@ const sites = [
url: 'https://www.infernalrestraints.com',
tags: ['bdsm'],
parent: 'insex',
parameters: {
latest: 'https://www.infernalrestraints.com/ir',
},
},
{
slug: 'hardtied',
name: 'Hardtied',
alias: ['ht'],
url: 'https://www.hardtied.com',
tags: ['bdsm'],
parent: 'insex',
parameters: {
latest: 'https://www.hardtied.com/ht',
},
},
{
slug: 'realtimebondage',
@@ -4245,6 +4251,9 @@ const sites = [
url: 'https://www.realtimebondage.com',
tags: ['bdsm', 'live'],
parent: 'insex',
parameters: {
latest: 'https://www.realtimebondage.com/rtb',
},
},
{
slug: 'topgrl',
@@ -4254,7 +4263,6 @@ const sites = [
tags: ['bdsm', 'femdom'],
parent: 'insex',
parameters: {
scraper: 'alt',
latest: 'https://www.topgrl.com/tg',
},
},

View File

@@ -5,6 +5,27 @@ const http = require('../utils/http');
const slugify = require('../utils/slugify');
function scrapeLatest(scenes, site) {
return scenes.map(({ query }) => {
const release = {};
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
release.title = query.cnt('.has-text-weight-bold, .is-size-6');
release.date = query.date('span.tag', 'YYYY-MM-DD');
release.actors = query.cnts('a.tag');
const cover = query.img('.image img');
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
release.covers = [cover];
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title.split(/\s+/).slice(0, 5).join(' '))}`;
return release;
});
}
function scrapeLatestLegacy(scenes, site) {
return scenes.map(({ query }) => {
// if (q('.articleTitleText')) return scrapeFirstLatest(ctx(el), site);
const release = {};
@@ -47,28 +68,35 @@ function scrapeLatest(scenes, site) {
});
}
function scrapeLatestAlt(scenes, site) {
return scenes.map(({ query }) => {
const release = {};
async function scrapeScene({ query }, url, channel, session) {
const release = {};
release.url = query.url('figure a', 'href', { origin: site.parameters.latest });
release.title = query.cnt('.columns div.is-size-5.has-text-weight-bold');
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
release.title = query.cnt('.has-text-weight-bold');
release.date = query.date('span.tag', 'YYYY-MM-DD');
release.actors = query.cnts('a.tag');
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
const cover = query.img('.image img');
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
release.photos = Array.from(query.html('body > div:nth-child(6)').matchAll(/src="(http.*jpg)"/g), (match) => match[1]);
release.poster = cover.replace('poster_noplay', 'trailer_noplay');
release.covers = [cover];
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.trailer = query.video();
return release;
});
if (!release.trailer) {
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
if (trailerRes.ok) {
release.trailer = trailerRes.body;
}
}
return release;
}
function scrapeScene({ query }, site) {
function scrapeSceneLegacy({ query }, site) {
const release = {};
const titleEl = query.q('.articleTitleText');
@@ -97,55 +125,23 @@ function scrapeScene({ query }, site) {
return release;
}
async function scrapeSceneAlt({ query }, url, channel, session) {
const release = {};
release.title = query.cnt('.columns div.is-size-5');
release.description = query.cnt('.has-background-black-ter > div:nth-child(4)');
release.date = query.date('.has-text-white-ter span.tag', 'YYYY-MM-DD');
release.actors = query.cnts('.has-text-white-ter a.tag[href*="home.php"]');
release.tags = query.cnts('.has-background-black-ter > div:nth-child(6) > span');
release.poster = query.img('#videoPlayer, #iodvideo', 'poster');
release.photos = query.imgs('body > div:nth-child(6) img');
release.entryId = `${qu.formatDate(release.date, 'YYYY-MM-DD')}-${slugify(release.title)}`;
release.trailer = query.video();
if (!release.trailer) {
const trailerRes = await http.get(`${channel.url}/api/play-api.php`, { session });
if (trailerRes.ok) {
release.trailer = trailerRes.body;
}
}
return release;
}
async function fetchLatest(site, page = 1) {
const url = (site.parameters?.scraper === 'alt' && `${site.parameters.latest}/home.php?o=latest&p=${page}`)
// || (site.slug === 'paintoy' && `${site.url}/corporal/punishment/gallery.php?type=brief&page=${page}`) // paintoy's site is (was?) partially broken, use front page
|| `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await ((site.parameters?.scraper === 'alt' && qu.getAll(url, 'body > .columns .column'))
// || (site.slug === 'paintoy' && qu.getAll(url, '#articleTable table[cellspacing="2"]'))
|| qu.get(url)); // JSON containing html as a property
const url = `${site.parameters.latest}/home.php?o=latest&p=${page}`;
const res = await qu.getAll(url, 'body > .columns .column', { cookie: 'consent=yes' });
if (res.ok) {
if (site.parameters?.scraper === 'alt') {
return scrapeLatestAlt(res.items, site);
}
return scrapeLatest(res.items, site);
}
/*
if (site.slug === 'paintoy') {
return scrapeLatest(res.items, site);
}
*/
return res.status;
}
return scrapeLatest(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
async function fetchLatestLegacy(site, page = 1) {
const url = `${site.url}/scripts/switch_tour.php?type=brief&page=${page}`;
const res = await qu.get(url); // JSON containing html as a property
if (res.ok) {
return scrapeLatestLegacy(qu.extractAll(res.body.html, '#articleTable > tbody > tr:nth-child(2) > td > table'), site);
}
return res.status;
@@ -153,14 +149,10 @@ async function fetchLatest(site, page = 1) {
async function fetchScene(url, site) {
const session = http.session();
const res = await qu.get(url, null, null, { session });
const res = await qu.get(url, null, { cookie: 'consent=yes' }, { session });
if (res.ok) {
if (site.parameters?.scraper === 'alt') {
return scrapeSceneAlt(res.item, url, site, session);
}
return scrapeScene(res.item, site);
return scrapeScene(res.item, url, site, session);
}
return res.status;
@@ -169,4 +161,8 @@ async function fetchScene(url, site) {
module.exports = {
fetchLatest,
fetchScene,
legacy: {
fetchLatest: fetchLatestLegacy,
scrapeScene: scrapeSceneLegacy,
},
};

View File

@@ -12,6 +12,13 @@ const channelCodes = {
uha: 'upherasshole',
};
const qualities = {
v4k: 2160,
vFullHD: 1080,
vHD: 720,
vSD: 480,
};
const channelRegExp = new RegExp(Object.keys(channelCodes).join('|'), 'i');
function scrapeAll(scenes, entity) {
@@ -42,9 +49,12 @@ function scrapeScene({ query }) {
release.entryId = query.q('.trailerLeft img', 'id').match(/set-target-(\d+)/)[1];
release.title = query.cnt('.infoHeader h1');
release.description = query.cnt('.infoBox p');
release.description = query.cnt('.description');
release.duration = query.duration('.tRuntime');
release.actors = query.cnts('.infoBox .tour_update_models a');
release.tags = query.cnts('.tagcats a');
release.qualities = query.imgs('.avaiFormate img').map((src) => qualities[src.match(/\/(\w+)\.png/)[1]]).filter(Boolean);
release.poster = query.img('.posterimg');
release.photos = query.imgs('.trailerSnaps img').slice(1); // first photo is poster in lower quality

View File

@@ -38,11 +38,8 @@ async function curateReleaseEntry(release, batchId, existingRelease, type = 'sce
date_precision: release.datePrecision,
slug,
description: release.description,
qualities: release.qualities?.map(Number).filter(Boolean),
comment: release.comment,
// director: release.director,
// likes: release.rating && release.rating.likes,
// dislikes: release.rating && release.rating.dislikes,
// rating: release.rating && release.rating.stars && Math.floor(release.rating.stars),
deep: typeof release.deep === 'boolean' ? release.deep : false,
deep_url: release.deepUrl,
updated_batch_id: batchId,