From 644226531e2c15fd46273460142faf3c05a63c8a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 5 Jun 2024 03:05:30 +0200 Subject: [PATCH] Using URL or title slug for Whale Member entry IDs for backwards compatability. --- src/scrapers/whalemember.js | 16 +++++++++++----- src/tools/whalemember-fix.js | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 src/tools/whalemember-fix.js diff --git a/src/scrapers/whalemember.js b/src/scrapers/whalemember.js index b7e0e5ae..b105ea51 100755 --- a/src/scrapers/whalemember.js +++ b/src/scrapers/whalemember.js @@ -3,17 +3,22 @@ const unprint = require('unprint'); const { stripQuery } = require('../utils/url'); +const slugify = require('../utils/slugify'); function scrapeLatest(scenes, channel) { - return scenes.map(({ query, element }) => { + return scenes.map(({ query, _element }) => { const release = {}; release.url = query.url('[href*="/video"]'); - release.entryId = unprint.query.attribute(element, null, 'data-vid'); + // release.entryId = unprint.query.attribute(element, null, 'data-vid'); // does not match old videos release.title = query.content('.video-thumbnail-footer a[href*="/video"]'); release.date = query.date('.actor-list + span', 'MM/DD/YYYY'); + release.entryId = release.url + ? new URL(release.url).pathname.split('/').at(-1) + : slugify(release.title); + release.actors = query.all('.actor-list a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null, { origin: channel.url }), @@ -39,10 +44,11 @@ function scrapeLatest(scenes, channel) { }); } -function scrapeScene({ query }, channel) { +function scrapeScene({ query }, { url, entity }) { const release = {}; - release.entryId = query.attribute('div[data-id]', 'data-id'); + // release.entryId = query.attribute('div[data-id]', 'data-id'); + release.entryId = new URL(url).pathname.split('/').at(-1); release.title = query.content('.scene-info h1'); release.description = query.content('//div[contains(@class, \'scene-info\')]//i[contains(@class, \'fa-quote\')]/following-sibling::span'); @@ -51,7 +57,7 @@ function scrapeScene({ query }, channel) { release.actors = query.all('.scene-info a[href*="/models"]').map((actorEl) => ({ name: unprint.query.content(actorEl), - url: unprint.query.url(actorEl, null, { origin: channel.url }), + url: unprint.query.url(actorEl, null, { origin: entity.url }), })); release.poster = query.poster('#player-wrapper video'); diff --git a/src/tools/whalemember-fix.js b/src/tools/whalemember-fix.js new file mode 100644 index 00000000..0cc38bd8 --- /dev/null +++ b/src/tools/whalemember-fix.js @@ -0,0 +1,34 @@ +'use strict'; + +// const config = require('config'); +const initKnex = require('knex'); +// const args = require('yargs').argv; + +async function init() { + const knex = initKnex({ + client: 'pg', + connection: { + host: '135.125.235.53', + user: 'traxxx', + password: 'YGDdBeXZXE25gKuzh5g7u4RV61G00XP6', + database: 'traxxx', + }, + asyncStackTraces: true, + }); + + const results = await knex('releases') + .select('releases.*') + .leftJoin('entities', 'entities.id', 'releases.entity_id') + .leftJoin('entities as networks', 'networks.id', 'entities.parent_id') + .whereIn('networks.slug', ['whalemember', 'pornpros']); + + console.log(results.length); + + await knex('releases') + .whereIn('id', results.map((result) => result.id)) + .update('entry_id', knex.raw('split_part(releases.url, \'/\', -1)')); + + knex.destroy(); +} + +init();