diff --git a/migrations/20260208044729_random_sfw_improvements.js b/migrations/20260208044729_random_sfw_improvements.js new file mode 100644 index 00000000..e8328cde --- /dev/null +++ b/migrations/20260208044729_random_sfw_improvements.js @@ -0,0 +1,28 @@ +exports.up = async function(knex) { + await knex.schema.createMaterializedView('media_sfw', (view) => { + view.as(knex('media').select('id').where('is_sfw', true)); + }); + + await knex.raw('CREATE UNIQUE INDEX media_sfw_id ON media_sfw(id)'); + + await knex.raw(` + CREATE OR REPLACE FUNCTION get_random_sfw_media_id() RETURNS varchar AS $$ + SELECT id FROM media_sfw + ORDER BY random() + LIMIT 1; + $$ LANGUAGE sql STABLE; + `); +}; + +exports.down = async function(knex) { + await knex.raw(` + CREATE OR REPLACE FUNCTION get_random_sfw_media_id() RETURNS varchar AS $$ + SELECT id FROM media + WHERE is_sfw = true + ORDER BY random() + LIMIT 1; + $$ LANGUAGE sql STABLE; + `); + + await knex.schema.dropMaterializedView('media_sfw'); +}; diff --git a/seeds/04_media.js b/seeds/04_media.js index dd82b0f8..f1da058d 100755 --- a/seeds/04_media.js +++ b/seeds/04_media.js @@ -1179,4 +1179,6 @@ exports.seed = (knex) => Promise.resolve() .whereNotIn('media_id', tagPhotos.map((photo) => photo.id)) .delete(), ]); + + await knex.raw('REFRESH MATERIALIZED VIEW media_sfw'); }); diff --git a/src/scrapers/modelmedia.js b/src/scrapers/modelmedia.js index c30d28fb..7eef0f23 100644 --- a/src/scrapers/modelmedia.js +++ b/src/scrapers/modelmedia.js @@ -204,6 +204,13 @@ async function fetchProfileApi(actor, { entity, parameters }) { return null; } +function getBioXPath(field) { + return [ + `//span[text()="${field}"]/following-sibling::span`, + `//span[text()="${field}"]/following-sibling::text()`, + ]; +} + function scrapeProfile({ query }, url) { const profile = { url }; const avatar = query.img('div[class*="prof-pic"] > img'); @@ -216,11 +223,13 @@ function scrapeProfile({ query }, url) { } profile.description = query.content('h2') || null; - profile.height = query.number('//span[text()="Height"]/following-sibling::node()[self::span or self::text()]', { match: /(\d+) cm/, matchIndex: 1 }); - profile.weight = query.number('//span[text()="Weight"]/following-sibling::node()[self::span or self::text()]', { match: /(\d+) kg/, matchIndex: 1 }); - profile.measurements = query.content('//span[text()="Measurements"]/following-sibling::node()[self::span or self::text()]'); - profile.birthPlace = query.content('//span[text()="Birth Place"]/following-sibling::node()[self::span or self::text()]'); + // ::node()[self::span or self::text()] not supported by unprint/JSDOM + profile.height = query.number(getBioXPath('Height'), { match: /(\d+) cm/, matchIndex: 1 }) || null; + profile.weight = query.number(getBioXPath('Weight'), { match: /(\d+) kg/, matchIndex: 1 }) || null; + + profile.measurements = query.content(getBioXPath('Measurements')) || null; + profile.birthPlace = query.content(getBioXPath('Birth Place')) || null; profile.banner = query.img('div[class*="banner"] > img'); profile.photos = query.imgs('#MusModelSwiper img');