From 8a7baa02c171be39476745b4a6aac8886409e966 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 14 Feb 2021 14:31:33 +0100 Subject: [PATCH 1/6] Improved date query for upcoming scenes in ElevatedX scraper. --- public/img/logos/nebraskacoeds/favicon.png | Bin 0 -> 2046 bytes src/scrapers/elevatedx.js | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 public/img/logos/nebraskacoeds/favicon.png diff --git a/public/img/logos/nebraskacoeds/favicon.png b/public/img/logos/nebraskacoeds/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..ff3f0bbb8617a4c0125de5d5621d82af1c1e41a0 GIT binary patch literal 2046 zcmVEX>4Tx04R}tkv&MmKp2MKrfNl694sQ@kfAzR5EXIMDionYs1;guFuC+cXws0R zxHt-~1qXi?s}3&Cx;nTDg5VE`o12rOi%ULhcYewtk}vy53uO2K!0-6O#FyExDCulsXE)V#%jfJi*U4AUlFC!X50 z4bJ<-QC5;w;&b9rlP*a7$aTfzH_kL;k>@Q#C2LjNMZqtkRU=q6&onSMx1t?6borOPx$x;UB5&wgD001riSe}qIG+yDRo24YJ`L;zv{VgO?DUVvx-000SaNLh0L z04^f{04^f|c%?sf00007bV*G`2jv0{78n`)EJSwz000?uMObu0Z*6U5Zgc=ca%Ew3 zWn>_CX>@2HM@dakSAh-}000HmNklOyd#h@cE84!9EChzKI041#VHbf;l5Q3KP{G3o9k zUH{&@pNo5|x~jT5G8c+h_1<~+oO{l9zVnp;Za#nGhV2)DKC4TBS-(`Te&VflYb@!< zx8AsYyD8TG9Pknl=S-iplX+X_3oBy_c)044NdvI!1eh@9lVsIp0WgjDc`EUk>Jbf_}fYbo8>%;0Co}kf(;-=*>q|%&Z0$p z+KJbij;;c9n&IIz(FvrCoj_(wW;aT0ZIBjFU46znWbY^Qj+EI-CA#d4fX#qGzy}n( zh;ZGf5%g+CrVd)0Hk*f?xYvqFz8D{|3Lq_f0@GJl zKHsPb;L4@T9|t~M8Wc_)AymFsj+h$W+ckVLm)1M!#N$qM zh`!7q%pjQ!f#i!pC^!KfE*5A%%+;o2pi;wGATxuFFjta!dZXp@63<#(ZZek}*bcKH zA(a~Uv!2t0oG7km&h9n3w zQtc;u5ziywv%sI{0o=NC`_X%@KYzavaaj+tc(W5Bu^F(ZOcN!niN$FShS}u0^M?pp znYUcZloSMJGhiI68SwpEciwokeikg$W<$n^Nro{&#-ut01VlX?m_E>RDUyNCrK28q z!I3k>`f=5)&i&cY*%Az*RJC9{a^f{|SD;ROc8LgH{~2aKB3&XE8KPv%w}1vSw50jQq?CH?QB=pJ%}ECEzPFNoJQ6 zswT}UHSVWO5A#pVUsB6Qa$-~`q+Piqmzmm^*$LR3-8@@;;M<0M;On&n9spa5yg#&f zTnQ#ndHaxhhSq#Ty9mL=b6xmZN$*xQHGIW z!dWZZunoK}^D9hGW-rWBjd?nm%c)~mnSPiQV3iCFOvMUMRzy37@8`rd!ETzXGQ7}U zU+WUTiOegJ`JlzkS&J$&P*p@2lbKk&Ka1~liEpXpbuFVk&iB7~7O_hacP3ZYrN)~Q z@0PfP7y>^e^Hx`eREL(T9Rz*B)nwimVz4Fyab7Pk%ab~O&Xk-pNkyoD`CXVlV2-=s zU>Y2CVq{fb@Y375SP?C^=W&K%E;?{gyQ%j5vP3bjHZ5v8w!(}p{@aPewv6Lbi`F8~ z?R7B*z__CSs7{5X@~2@VL_-;^QE7NKo9_a@j?5QTH2lz4kfm1EQ5Okb(^#nVs>F-HrAsm=w}>l}km`Xi|2 zs~6BZOK7|Dd!B*sAzrQK0ooI6A~#im_b~(D3ecmFVOs-Qg-EXZj_cb%707*qoM6N<$f|$#|)Bpeg literal 0 HcmV?d00001 diff --git a/src/scrapers/elevatedx.js b/src/scrapers/elevatedx.js index 39cdc709..c7b39939 100644 --- a/src/scrapers/elevatedx.js +++ b/src/scrapers/elevatedx.js @@ -66,7 +66,7 @@ function scrapeAllClassic(scenes, channel) { release.title = query.cnt('.updateInfo h5 a'); release.actors = query.cnts('.tour_update_models a'); - release.date = query.date('.availdate, .updateInfo p span:nth-child(2)', 'MM/DD/YYYY'); + release.date = query.date('.availdate, .updateInfo p span:last-child', 'MM/DD/YYYY'); release.poster = query.img('.updateThumb img'); From b26a029f66e34ddcf3a150548fb2c4da1773f295 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Sun, 14 Feb 2021 14:31:38 +0100 Subject: [PATCH 2/6] 1.171.1 --- package-lock.json | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 1b7bf874..84d3a2ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "traxxx", - "version": "1.171.0", + "version": "1.171.1", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 1c9a707c..658bb0c0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "traxxx", - "version": "1.171.0", + "version": "1.171.1", "description": "All the latest porn releases in one place", "main": "src/app.js", "scripts": { From 67055bf9201cac2aae0be5206e1272f5ae6e79d4 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 16 Feb 2021 03:37:52 +0100 Subject: [PATCH 3/6] Improved actor entity and entry ID storage. --- assets/components/actors/tile.vue | 8 ++-- assets/js/fragments.js | 2 +- src/actors.js | 61 ++++++++++++++++++++++--------- src/scrapers/resolve.js | 2 +- src/scrapers/traxxx.js | 3 +- 5 files changed, 52 insertions(+), 24 deletions(-) diff --git a/assets/components/actors/tile.vue b/assets/components/actors/tile.vue index 0c7f3af5..5c0a45d7 100644 --- a/assets/components/actors/tile.vue +++ b/assets/components/actors/tile.vue @@ -16,13 +16,13 @@ >{{ actor.name }} diff --git a/assets/js/fragments.js b/assets/js/fragments.js index 197a555d..bc95747a 100644 --- a/assets/js/fragments.js +++ b/assets/js/fragments.js @@ -61,7 +61,7 @@ const actorFields = ` lazy } } - network: entity { + entity { id name slug diff --git a/src/actors.js b/src/actors.js index 474e1e56..c38394e1 100644 --- a/src/actors.js +++ b/src/actors.js @@ -180,6 +180,11 @@ function toBaseActors(actorsOrNames, release) { return baseActors; } +function getCollisionLikely(actor) { + // actor with single name + return actor.name.match(/\w+/g).length === 1; +} + function curateActor(actor, withDetails = false, isProfile = false) { if (!actor) { return null; @@ -260,11 +265,13 @@ function curateActor(actor, withDetails = false, isProfile = false) { } function curateActorEntry(baseActor, batchId) { + const collisionLikely = getCollisionLikely(baseActor); + return { name: baseActor.name, slug: baseActor.slug, - entity_id: null, - entry_id: baseActor.entryId, + entity_id: collisionLikely ? baseActor.entity.id : null, + entry_id: collisionLikely ? baseActor.entryId : null, batch_id: batchId, }; } @@ -641,6 +648,11 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy const scraper = scrapers[scraperSlug]; const layoutScraper = resolveLayoutScraper(entity, scraper); + if (!layoutScraper?.fetchProfile) { + logger.warn(`No profile profile scraper available for ${scraperSlug}`); + throw new Error(`No profile profile scraper available for ${scraperSlug}`); + } + const context = { ...entity, // legacy @@ -653,11 +665,6 @@ async function scrapeProfiles(actor, sources, entitiesBySlug, existingProfilesBy const label = context.entity?.name; - if (!layoutScraper?.fetchProfile) { - logger.warn(`No profile profile scraper available for ${scraperSlug}`); - throw new Error(`No profile profile scraper available for ${scraperSlug}`); - } - if (!context.entity) { logger.warn(`No entity found for ${scraperSlug}`); throw new Error(`No entity found for ${scraperSlug}`); @@ -813,33 +820,53 @@ async function scrapeActors(argNames) { async function getOrCreateActors(baseActors, batchId) { // WHERE IN causes stack depth error and performance issues with a large amount of values, no knex VALUES helper available - const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId)', { slug: actor.slug, entityId: actor.entity.id })).join(', '); + const actorValues = baseActors.map(actor => knex.raw('(:slug, :entityId, :entryId, :collisionLikely)', { + slug: actor.slug, + entityId: actor.entity.id, + entryId: actor.entryId, + collisionLikely: getCollisionLikely(actor), + })).join(', '); const existingActors = await knex .select('actors.*') - .from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id)`)) - .whereRaw('actors.slug = base_actors.slug AND actors.entity_id IS NULL') - .orWhereRaw('actors.slug = base_actors.slug AND actors.entity_id = base_actors.entity_id'); + .from(knex.raw(`actors, (VALUES ${actorValues}) AS base_actors (slug, entity_id, entry_id, collision_likely)`)) + .whereRaw(` + actors.slug = base_actors.slug + AND actors.entity_id IS NULL + AND NOT base_actors.collision_likely + `) + .orWhereRaw(` + actors.slug = base_actors.slug + AND actors.entity_id = base_actors.entity_id + AND ((actors.entry_id IS NULL AND base_actors.entry_id IS NULL) + OR actors.entry_id = base_actors.entry_id) + `); // const existingActorSlugs = new Set(existingActors.map(actor => actor.slug)); const existingActorSlugs = existingActors.reduce((acc, actor) => ({ ...acc, [actor.entity_id]: { ...acc[actor.entity_id], - [actor.slug]: true, + [actor.entry_id]: { + ...acc[actor.entity_id]?.[actor.entry_id], + [actor.slug]: true, + }, }, }), {}); - const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.slug] && !existingActorSlugs.null?.[baseActor.slug]); - + const uniqueBaseActors = baseActors.filter(baseActor => !existingActorSlugs[baseActor.entity.id]?.[baseActor.entryId]?.[baseActor.slug] && !existingActorSlugs.null?.null?.[baseActor.slug]); const curatedActorEntries = curateActorEntries(uniqueBaseActors, batchId); + const newActors = await bulkInsert('actors', curatedActorEntries); - const newActorIdsByEntityIdAndSlug = newActors.reduce((acc, actor) => ({ + const newActorIdsByEntityIdEntryIdAndSlug = newActors.reduce((acc, actor) => ({ ...acc, [actor.entity_id]: { ...acc[actor.entity_id], - [actor.slug]: actor.id, + [actor.entry_id]: { + ...acc[actor.entity_id]?.[actor.entry_id], + [actor.slug]: actor.id, + }, }, }), {}); @@ -847,7 +874,7 @@ async function getOrCreateActors(baseActors, batchId) { .filter(actor => actor.hasProfile) .map(actor => ({ ...actor, - id: newActorIdsByEntityIdAndSlug[actor.entity?.id]?.[actor.slug] || newActorIdsByEntityIdAndSlug.null?.[actor.slug], + id: newActorIdsByEntityIdEntryIdAndSlug[actor.entity?.id]?.[actor.entryId]?.[actor.slug] || newActorIdsByEntityIdEntryIdAndSlug.null?.null?.[actor.slug], })) .filter(actor => !!actor.id) .map(actor => curateProfile(actor))); diff --git a/src/scrapers/resolve.js b/src/scrapers/resolve.js index 89726b56..db72f51a 100644 --- a/src/scrapers/resolve.js +++ b/src/scrapers/resolve.js @@ -19,7 +19,7 @@ function resolveLayoutScraper(entity, scraper) { return scraper[entity.parameters.layout]; } - if (entity.parent) { + if (entity?.parent) { return resolveLayoutScraper(entity.parent, scraper); } diff --git a/src/scrapers/traxxx.js b/src/scrapers/traxxx.js index 76d51060..60270ed6 100644 --- a/src/scrapers/traxxx.js +++ b/src/scrapers/traxxx.js @@ -258,7 +258,8 @@ async function fetchLatest(entity, page, options) { .limit(faker.random.number({ min: 2, max: 15 })) .pluck('name'); - release.actors = [...actors(release), null]; // include empty actor to ensure proper handling + // release.actors = [...actors(release), null]; // include empty actor to ensure proper handling + release.actors = ['Amber']; release.title = title(release); return release; From 3469da674a8842795826c9b53642ab4033edd078 Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Tue, 16 Feb 2021 19:53:32 +0100 Subject: [PATCH 4/6] Fixed PornCZ video query. --- src/scrapers/porncz.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scrapers/porncz.js b/src/scrapers/porncz.js index 1b522b12..6513b479 100644 --- a/src/scrapers/porncz.js +++ b/src/scrapers/porncz.js @@ -47,7 +47,7 @@ function scrapeScene({ query }, url, channel) { release.poster = query.img('#video-poster', 'data-poster', { origin: channel.url }); release.photos = query.imgs('#gallery .photo-item img', 'data-src', { origin: channel.url }); - release.trailer = query.video('.trailer source'); + release.trailer = query.video(); release.channel = slugify(query.q('.video-detail-logo img', 'alt'), ''); From c51cd080fa60d605901e54082be3816d57a4443a Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 17 Feb 2021 00:40:20 +0100 Subject: [PATCH 5/6] Improved actor mapping in release associations. Storing alias ID in actor release association. --- migrations/20190325001339_releases.js | 5 +++++ src/actors.js | 18 +++++++++++++++++- src/scrapers/traxxx.js | 9 ++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/migrations/20190325001339_releases.js b/migrations/20190325001339_releases.js index 405c2887..ab5d53a9 100644 --- a/migrations/20190325001339_releases.js +++ b/migrations/20190325001339_releases.js @@ -682,6 +682,11 @@ exports.up = knex => Promise.resolve() .inTable('actors') .onDelete('cascade'); + table.integer('alias_id', 12) + .references('id') + .inTable('actors') + .onDelete('cascade'); + table.unique(['release_id', 'actor_id']); table.datetime('created_at') diff --git a/src/actors.js b/src/actors.js index c38394e1..3f5146cc 100644 --- a/src/actors.js +++ b/src/actors.js @@ -912,16 +912,32 @@ async function associateActors(releases, batchId) { const actors = await getOrCreateActors(uniqueBaseActors, batchId); + /* const actorIdsBySlug = actors.reduce((acc, actor) => ({ ...acc, [actor.slug]: actor.alias_for || actor.id, }), {}); + */ + + const actorIdsByEntityIdEntryIdAndSlug = actors.reduce((acc, actor) => ({ + ...acc, + [actor.entity_id]: { + ...acc[actor.entity_id], + [actor.entry_id]: { + ...acc[actor.entity_id]?.[actor.entry_id], + [actor.slug]: { + actor_id: actor.alias_for || actor.id, + alias_id: actor.alias_for ? actor.id : null, + }, + }, + }, + }), {}); const releaseActorAssociations = Object.entries(baseActorsByReleaseId) .map(([releaseId, releaseActors]) => releaseActors .map(releaseActor => ({ release_id: releaseId, - actor_id: actorIdsBySlug[releaseActor.slug], + ...(actorIdsByEntityIdEntryIdAndSlug[releaseActor.entity?.id]?.[releaseActor.entryId]?.[releaseActor.slug] || actorIdsByEntityIdEntryIdAndSlug.null.null[releaseActor.slug]), }))) .flat(); diff --git a/src/scrapers/traxxx.js b/src/scrapers/traxxx.js index 60270ed6..3016f1ac 100644 --- a/src/scrapers/traxxx.js +++ b/src/scrapers/traxxx.js @@ -222,7 +222,11 @@ function actors(release) { : Math.floor(Math.random() * 3) + 2; return Array.from({ length }, () => ({ - name: faker.name.findName(), + name: faker.name + .findName() + .split(' ') + .slice(0, Math.random() < 0.2 ? 1 : 2) // sometimes only use the first name + .join(' '), gender: gender(), })); } @@ -258,8 +262,7 @@ async function fetchLatest(entity, page, options) { .limit(faker.random.number({ min: 2, max: 15 })) .pluck('name'); - // release.actors = [...actors(release), null]; // include empty actor to ensure proper handling - release.actors = ['Amber']; + release.actors = [...actors(release), null]; // include empty actor to ensure proper handling release.title = title(release); return release; From 58c01bdfcf80c960d5982a5d02fb670b4beeacbb Mon Sep 17 00:00:00 2001 From: DebaucheryLibrarian Date: Wed, 17 Feb 2021 00:40:27 +0100 Subject: [PATCH 6/6] 1.172.0 --- package-lock.json | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 84d3a2ee..8581ea5d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "traxxx", - "version": "1.171.1", + "version": "1.172.0", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 658bb0c0..968250a8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "traxxx", - "version": "1.171.1", + "version": "1.172.0", "description": "All the latest porn releases in one place", "main": "src/app.js", "scripts": {