Added actor names to Jules Jordan title entry ID.

This commit is contained in:
DebaucheryLibrarian 2024-08-27 02:43:46 +02:00
parent 386d464225
commit 6590e457b5
2 changed files with 21 additions and 15 deletions

View File

@ -26,7 +26,9 @@ function getEntryId(html) {
} }
function getEntryIdFromTitle(release) { function getEntryIdFromTitle(release) {
return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // return slugify([release.title, release.date && unprint.formatDate(release.date, 'YYYY-MM-DD')]); // date not shown on updates page
// return slugify(release.title);
return slugify([release.title, ...(release.actors?.map((actor) => actor.name).toSorted() || [])]);
} }
function scrapeAll(scenes, site, entryIdFromTitle) { function scrapeAll(scenes, site, entryIdFromTitle) {
@ -38,11 +40,6 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]'); release.url = query.url('.content_img a, .dvd_info > a, a.update_title, a[title]');
release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']); release.date = query.date('.update_date', ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release))
|| element.dataset.setid
|| query.element('.rating_box')?.dataset.id
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({ release.actors = query.all('.content_img .update_models a, .update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl), name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null), url: unprint.query.url(actorEl, null),
@ -79,6 +76,11 @@ function scrapeAll(scenes, site, entryIdFromTitle) {
release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4); release.teaser = teaserScript.slice(teaserScript.indexOf('http'), teaserScript.indexOf('.mp4') + 4);
} }
release.entryId = (entryIdFromTitle && getEntryIdFromTitle(release))
|| element.dataset.setid
|| query.element('.rating_box')?.dataset.id
|| query.attribute('a img', 'id')?.match(/set-target-(\d+)/)?.[1];
return release; return release;
}); });
} }
@ -169,10 +171,6 @@ async function scrapeScene({ html, query }, context) {
release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']); release.date = query.date(['.update_date', '//div[./span[contains(text(), "Date")]]'], ['MM/DD/YYYY', 'YYYY-MM-DD']);
release.entryId = context.entity.parameters?.entryIdFromTitle
? getEntryIdFromTitle(release)
: getEntryId(html);
release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({ release.actors = query.all('.backgroundcolor_info > .update_models a, .item .update_models a, .player-scene-description .update_models a').map((actorEl) => ({
name: unprint.query.content(actorEl), name: unprint.query.content(actorEl),
url: unprint.query.url(actorEl, null), url: unprint.query.url(actorEl, null),
@ -239,6 +237,10 @@ async function scrapeScene({ html, query }, context) {
release.stars = query.number('.avg_rating'); release.stars = query.number('.avg_rating');
release.entryId = context.entity.parameters?.entryIdFromTitle
? getEntryIdFromTitle(release)
: getEntryId(html);
return release; return release;
} }

View File

@ -2,7 +2,7 @@
// const config = require('config'); // const config = require('config');
const initKnex = require('knex'); const initKnex = require('knex');
const unprint = require('unprint'); // const unprint = require('unprint');
// const args = require('yargs').argv; // const args = require('yargs').argv;
// const stashes = require('./julesjordan_stashes.json'); // const stashes = require('./julesjordan_stashes.json');
const slugify = require('../utils/slugify'); const slugify = require('../utils/slugify');
@ -20,15 +20,19 @@ async function init() {
}); });
const results = await knex('releases') const results = await knex('releases')
.select('releases.*') .select('releases.*', knex.raw('json_agg(actors.name) as actor_names'))
.leftJoin('entities', 'entities.id', 'releases.entity_id') .leftJoin('entities', 'entities.id', 'releases.entity_id')
.whereIn('entities.slug', ['julesjordan']); .leftJoin('releases_actors', 'releases_actors.release_id', 'releases.id')
.leftJoin('actors', 'actors.id', 'releases_actors.actor_id')
.whereIn('entities.slug', ['julesjordan'])
.groupBy('releases.id');
await knex.transaction(async (trx) => { await knex.transaction(async (trx) => {
return results.reduce(async (chain, scene) => { return results.reduce(async (chain, scene) => {
await chain; await chain;
const newEntryId = slugify([scene.title, scene.date && unprint.formatDate(scene.date, 'YYYY-MM-DD')]); // const newEntryId = scene.entry_id.replace(/-\d{4}-\d{2}-\d{2}$/, ''); // remove date
const newEntryId = slugify([scene.title, ...(scene.actor_names?.toSorted() || [])]);
console.log(newEntryId); console.log(newEntryId);
@ -36,7 +40,7 @@ async function init() {
.where('id', scene.id) .where('id', scene.id)
.update({ .update({
entry_id: newEntryId, entry_id: newEntryId,
comment: `old entry: ${scene.entryId}`, comment: `old entry id: ${scene.entry_id}`,
}); });
}, Promise.resolve()); }, Promise.resolve());
}); });