From 6590e457b5cb038b1727d5bc0e1c752e3e38936c Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 27 Aug 2024 02:43:46 +0200 Subject: [PATCH] Added actor names to Jules Jordan title entry ID. --- src/scrapers/julesjordan.js | 22 ++++++++++++---------- src/tools/julesjordan-fix.js | 14 +++++++++----- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/scrapers/julesjordan.js b/src/scrapers/julesjordan.js index 654251c9..29bd3b78 100755 --- a/src/scrapers/julesjordan.js +++ b/src/scrapers/julesjordan.js @@ -26,7 +26,9 @@ function getEntryId(html) { } function getEntryIdFromTitle(release) { - return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); + // return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page + // return slugify(release.title); + return slugify([release.title, ...(release.actors?.map((actor) => actor.name).toSorted() || [])]); } function scrapeAll(scenes, site, entryIdFromTitle) { @@ -38,11 +40,6 @@ function scrapeAll(scenes, site, entryIdFromTitle) { release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]'); release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']); - release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release)) - || element.dataset.setid - || query.element('.rating_box')?.dataset.id - || query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1]; - release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), @@ -79,6 +76,11 @@ function scrapeAll(scenes, site, entryIdFromTitle) { release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); } + release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release)) + || element.dataset.setid + || query.element('.rating_box')?.dataset.id + || query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1]; + return release; }); } @@ -169,10 +171,6 @@ async function scrapeScene({ html, query }, context) { release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']); - release.entryId = context.entity.parameters?.entryIdFromTitle - ? getEntryIdFromTitle(release) - : getEntryId(html); - release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({ name: unprint.query.content(actorEl), url: unprint.query.url(actorEl, null), @@ -239,6 +237,10 @@ async function scrapeScene({ html, query }, context) { release.stars = query.number('.avg_rating'); + release.entryId = context.entity.parameters?.entryIdFromTitle + ? getEntryIdFromTitle(release) + : getEntryId(html); + return release; } diff --git a/src/tools/julesjordan-fix.js b/src/tools/julesjordan-fix.js index 071826b7..0fcbddc7 100644 --- a/src/tools/julesjordan-fix.js +++ b/src/tools/julesjordan-fix.js @@ -2,7 +2,7 @@ // const config = require('config'); const initKnex = require('knex'); -const unprint = require('unprint'); +// const unprint = require('unprint'); // const args = require('yargs').argv; // const stashes = require('./julesjordan_stashes.json'); const slugify = require('../utils/slugify'); @@ -20,15 +20,19 @@ async function init() { }); const results = await knex('releases') - .select('releases.*') + .select('releases.*', knex.raw('json_agg(actors.name) as actor_names')) .leftJoin('entities', 'entities.id', 'releases.entity_id') - .whereIn('entities.slug', ['julesjordan']); + .leftJoin('releases_actors', 'releases_actors.release_id', 'releases.id') + .leftJoin('actors', 'actors.id', 'releases_actors.actor_id') + .whereIn('entities.slug', ['julesjordan']) + .groupBy('releases.id'); await knex.transaction(async (trx) => { return results.reduce(async (chain, scene) => { await chain; - const newEntryId = slugify([scene.title, scene.date && unprint.formatDate(scene.date, 'YYYY-MM-DD')]); + // const newEntryId = scene.entry_id.replace(/-\d{4}-\d{2}-\d{2}$/, ''); // remove date + const newEntryId = slugify([scene.title, ...(scene.actor_names?.toSorted() || [])]); console.log(newEntryId); @@ -36,7 +40,7 @@ async function init() { .where('id', scene.id) .update({ entry_id: newEntryId, - comment: `old entry: ${scene.entryId}`, + comment: `old entry id: ${scene.entry_id}`, }); }, Promise.resolve()); });