diff --git a/debian/patches/07_gallium-fix-build-failure-on-powerpcspe.diff b/debian/patches/07_gallium-fix-build-failure-on-powerpcspe.diff
deleted file mode 100644
index 029bd22..0000000
--- a/debian/patches/07_gallium-fix-build-failure-on-powerpcspe.diff
+++ /dev/null
@@ -1,35 +0,0 @@
-From a4f14e7239780b02af8d74669c5458d4b0957d4d Mon Sep 17 00:00:00 2001
-From: Roland Stigge <stigge@antcom.de>
-Date: Sun, 2 Mar 2014 19:52:56 +0100
-Subject: [PATCH] gallium: fix build failure on powerpcspe
-
-In the case of powerpc, mesa activates some altivec instructions
-that are unknown on the powerpcspe architecture (see
-https://wiki.debian.org/PowerPCSPEPort), causing a build failure as the
-'vand' opcode is not recognized by the assembler.
-
-This patch fixes this by preventing the PPC-specialcasing in case of
-powerpcspe (__NO_FPRS__ is only defined there).
-
-https://bugs.debian.org/695746
----
- src/gallium/include/pipe/p_config.h | 2 ++
- 1 file changed, 2 insertions(+)
-
---- a/src/util/detect_arch.h
-+++ b/src/util/detect_arch.h
-@@ -70,12 +70,14 @@
- #endif
- #endif
- 
-+#ifndef __NO_FPRS__
- #if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__) || defined(__PPC64__)
- #define DETECT_ARCH_PPC 1
- #if defined(__ppc64__) || defined(__PPC64__)
- #define DETECT_ARCH_PPC_64 1
- #endif
- #endif
-+#endif
- 
- #if defined(__s390x__)
- #define DETECT_ARCH_S390 1
diff --git a/debian/patches/21929.patch b/debian/patches/21929.patch
deleted file mode 100644
index 85317d0..0000000
--- a/debian/patches/21929.patch
+++ /dev/null
@@ -1,679 +0,0 @@
-From ed9fb6be100cff6c2066beb0cdf8b3a17cab292c Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Sat, 24 Jun 2023 15:49:13 +0200
-Subject: [PATCH 1/4] radv: Add rt.monolithic to radv_pipeline_key
-
----
- src/amd/vulkan/radv_shader.h | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
-index 0c53695edae7c..6eb95fdd0a097 100644
---- a/src/amd/vulkan/radv_shader.h
-+++ b/src/amd/vulkan/radv_shader.h
-@@ -135,6 +135,10 @@ struct radv_pipeline_key {
- 
-       bool line_smooth_enabled;
-    } ps;
-+
-+   struct {
-+      bool monolithic;
-+   } rt;
- };
- 
- struct radv_nir_compiler_options {
--- 
-
-
-From 8f45cc08361f55c1e613a11198b1ae97c519406e Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Sat, 24 Jun 2023 15:46:51 +0200
-Subject: [PATCH 2/4] radv/rt: Store NIR shaders separately
-
-In order to compile monolithic shaders with pipeline libraries, we need
-to keep the NIR around for inlining recursive stages.
----
- src/amd/vulkan/radv_pipeline_cache.c |  9 +--
- src/amd/vulkan/radv_pipeline_rt.c    | 93 +++++++++++++++++++++-------
- src/amd/vulkan/radv_private.h        |  1 +
- src/amd/vulkan/radv_rt_shader.c      |  7 +--
- 4 files changed, 79 insertions(+), 31 deletions(-)
-
-diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
-index 5bbbc755ae11f..7e4c6f8898130 100644
---- a/src/amd/vulkan/radv_pipeline_cache.c
-+++ b/src/amd/vulkan/radv_pipeline_cache.c
-@@ -481,11 +481,12 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip
-       pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]);
- 
-    for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
--      if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
-+      if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i]))
-          pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base;
--      } else if (is_library) {
--         pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
--         complete &= pipeline->stages[i].shader != NULL;
-+
-+      if (is_library) {
-+         pipeline->stages[i].nir = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
-+         complete &= pipeline->stages[i].nir != NULL;
-       }
-    }
- 
-diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
-index c86ea3a508468..85afc8cb28e1b 100644
---- a/src/amd/vulkan/radv_pipeline_rt.c
-+++ b/src/amd/vulkan/radv_pipeline_rt.c
-@@ -263,7 +263,10 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, st
-          RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
-          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
-          for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
--            stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
-+            stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir);
-+            if (library_pipeline->stages[j].shader)
-+               stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
-+
-             stages[idx].stage = library_pipeline->stages[j].stage;
-             stages[idx].stack_size = library_pipeline->stages[j].stack_size;
-             memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH);
-@@ -462,45 +465,83 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
-       return VK_PIPELINE_COMPILE_REQUIRED;
-    VkResult result = VK_SUCCESS;
- 
--   struct radv_ray_tracing_stage *stages = pipeline->stages;
-+   struct radv_ray_tracing_stage *rt_stages = pipeline->stages;
-+
-+   struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage));
-+   if (!stages)
-+      return VK_ERROR_OUT_OF_HOST_MEMORY;
-+
-+   bool has_callable = false;
-+   for (uint32_t i = 0; i < pipeline->stage_count; i++) {
-+      if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) {
-+         has_callable = true;
-+         break;
-+      }
-+   }
- 
-    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
-+      if (rt_stages[idx].shader || rt_stages[idx].nir)
-+         continue;
-+
-       int64_t stage_start = os_time_get_nano();
--      struct radv_shader_stage stage;
--      radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage);
- 
--      if (stages[idx].shader)
--         goto feedback;
-+      struct radv_shader_stage *stage = &stages[idx];
-+      radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, stage);
- 
-       /* precompile the shader */
--      stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
-+      stage->nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
-+
-+      /* Cases in which we need to keep around the NIR:
-+       *    - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader.
-+       *                        If there is a callable shader, we can be sure that the final pipeline won't be
-+       *                        monolithic.
-+       *    - non-recursive:    Non-recursive shaders are inlined into the traversal shader.
-+       *    - monolithic:       Callable shaders (chit/miss) are inlined into the raygen shader.
-+       */
-+      bool compiled = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]);
-+      bool library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
-+      bool nir_needed =
-+         (library && !has_callable) || !compiled || (key->rt.monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN);
-+      nir_needed &= !rt_stages[idx].nir;
-+      if (nir_needed) {
-+         rt_stages[idx].stack_size = stage->nir->scratch_size;
-+         rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1,
-+                                                                !key->optimisations_disabled);
-+      }
- 
--      if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
--         uint32_t stack_size = 0;
-+      stage->feedback.duration = os_time_get_nano() - stage_start;
-+   }
- 
-+   for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
-+      int64_t stage_start = os_time_get_nano();
-+      struct radv_shader_stage *stage = &stages[idx];
-+
-+      /* Cases in which we need to compile the shader (raygen/callable/chit/miss):
-+       *    TODO: - monolithic: Extend the loop to cover imported stages and force compilation of imported raygen
-+       *                        shaders since pipeline library shaders use separate compilation.
-+       *    - separate:   Compile any recursive stage if wasn't compiled yet.
-+       * TODO: Skip chit and miss shaders in the monolithic case.
-+       */
-+      bool shader_needed = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]) && !rt_stages[idx].shader;
-+      if (shader_needed) {
-+         uint32_t stack_size = 0;
-          struct radv_serialized_shader_arena_block *replay_block =
-             capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL;
- 
-          struct radv_shader *shader;
-          result =
--            radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &stage, &stack_size, replay_block, &shader);
--         stages[idx].stack_size = stack_size;
--         stages[idx].shader = shader ? &shader->base : NULL;
--      } else {
--         stages[idx].stack_size = stage.nir->scratch_size;
--         stages[idx].shader =
--            radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, stages[idx].sha1, !key->optimisations_disabled);
--      }
--      ralloc_free(stage.nir);
-+            radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, stage, &stack_size, replay_block, &shader);
-+         if (result != VK_SUCCESS)
-+            goto cleanup;
- 
--      if (result != VK_SUCCESS)
--         return result;
-+         rt_stages[idx].stack_size = stack_size;
-+         rt_stages[idx].shader = shader ? &shader->base : NULL;
-+      }
- 
--   feedback:
-       if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) {
-          assert(idx < creation_feedback->pipelineStageCreationFeedbackCount);
--         stage.feedback.duration = os_time_get_nano() - stage_start;
--         creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage.feedback;
-+         stage->feedback.duration += os_time_get_nano() - stage_start;
-+         creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback;
-       }
-    }
- 
-@@ -527,6 +568,10 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
-    result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &traversal_stage, NULL, NULL,
-                                &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
- 
-+cleanup:
-+   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
-+      ralloc_free(stages[i].nir);
-+   free(stages);
-    return result;
- }
- 
-@@ -732,6 +777,8 @@ void
- radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
- {
-    for (unsigned i = 0; i < pipeline->stage_count; i++) {
-+      if (pipeline->stages[i].nir)
-+         vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir);
-       if (pipeline->stages[i].shader)
-          vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].shader);
-    }
-diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
-index 47e315488e9f9..7ab46738b46f0 100644
---- a/src/amd/vulkan/radv_private.h
-+++ b/src/amd/vulkan/radv_private.h
-@@ -2364,6 +2364,7 @@ struct radv_ray_tracing_group {
- };
- 
- struct radv_ray_tracing_stage {
-+   struct vk_pipeline_cache_object *nir;
-    struct vk_pipeline_cache_object *shader;
-    gl_shader_stage stage;
-    uint32_t stack_size;
-diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
-index cc92beebc3503..3def324bcccf3 100644
---- a/src/amd/vulkan/radv_rt_shader.c
-+++ b/src/amd/vulkan/radv_rt_shader.c
-@@ -1132,7 +1132,7 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers
-       if (is_dup)
-          continue;
- 
--      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader);
-+      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].nir);
-       assert(nir_stage);
- 
-       insert_rt_case(b, nir_stage, vars, sbt_idx, data->pipeline->groups[i].handle.any_hit_index);
-@@ -1262,13 +1262,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
-       if (is_dup)
-          continue;
- 
--      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader);
-+      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].nir);
-       assert(nir_stage);
- 
-       nir_shader *any_hit_stage = NULL;
-       if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
--         any_hit_stage =
--            radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader);
-+         any_hit_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].nir);
-          assert(any_hit_stage);
- 
-          /* reserve stack size for any_hit before it is inlined */
--- 
-GitLab
-
-
-From bba42cbc235e75a5c7ed05e55e48f71640c68ad4 Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Mon, 21 Aug 2023 13:32:53 +0200
-Subject: [PATCH 3/4] radv/rt: Add monolithic raygen lowering
-
-Ray traversal is inlined to allow for constant folding and avoid
-spilling.
----
- src/amd/vulkan/radv_pipeline_rt.c |  11 +-
- src/amd/vulkan/radv_rt_shader.c   | 276 ++++++++++++++++++++++++++----
- src/amd/vulkan/radv_shader.h      |   3 +-
- 3 files changed, 248 insertions(+), 42 deletions(-)
-
-diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
-index 85afc8cb28e1b..12562c6cf89ba 100644
---- a/src/amd/vulkan/radv_pipeline_rt.c
-+++ b/src/amd/vulkan/radv_pipeline_rt.c
-@@ -356,9 +356,8 @@ move_rt_instructions(nir_shader *shader)
- static VkResult
- radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
-                    const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key,
--                   const struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage,
--                   uint32_t *stack_size, struct radv_serialized_shader_arena_block *replay_block,
--                   struct radv_shader **out_shader)
-+                   struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, uint32_t *stack_size,
-+                   struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
- {
-    struct radv_shader_binary *binary;
-    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
-@@ -384,7 +383,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
-    uint32_t num_resume_shaders = 0;
-    nir_shader **resume_shaders = NULL;
- 
--   if (stage->stage != MESA_SHADER_INTERSECTION) {
-+   bool monolithic_raygen = pipeline_key->rt.monolithic && stage->stage == MESA_SHADER_RAYGEN;
-+   if (stage->stage != MESA_SHADER_INTERSECTION && !monolithic_raygen) {
-       nir_builder b = nir_builder_at(nir_after_cf_list(&nir_shader_get_entrypoint(stage->nir)->body));
-       nir_rt_return_amd(&b);
- 
-@@ -411,7 +411,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
-    for (uint32_t i = 0; i < num_shaders; i++) {
-       struct radv_shader_stage temp_stage = *stage;
-       temp_stage.nir = shaders[i];
--      radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0);
-+      radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
-+                            pipeline, pipeline_key);
-       radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled);
-       radv_postprocess_nir(device, pipeline_key, &temp_stage);
- 
-diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
-index 3def324bcccf3..362d918597008 100644
---- a/src/amd/vulkan/radv_rt_shader.c
-+++ b/src/amd/vulkan/radv_rt_shader.c
-@@ -1306,6 +1306,87 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
-    nir_pop_if(b, NULL);
- }
- 
-+static void
-+visit_closest_hit_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
-+                          struct rt_variables *vars)
-+{
-+   nir_def *sbt_idx = nir_load_var(b, vars->idx);
-+
-+   if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
-+      nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
-+
-+   for (unsigned i = 0; i < pipeline->group_count; ++i) {
-+      struct radv_ray_tracing_group *group = &pipeline->groups[i];
-+
-+      unsigned shader_id = VK_SHADER_UNUSED_KHR;
-+      if (group->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
-+         shader_id = group->recursive_shader;
-+
-+      if (shader_id == VK_SHADER_UNUSED_KHR)
-+         continue;
-+
-+      /* Avoid emitting stages with the same shaders/handles multiple times. */
-+      bool is_dup = false;
-+      for (unsigned j = 0; j < i; ++j)
-+         if (pipeline->groups[j].handle.closest_hit_index == pipeline->groups[i].handle.closest_hit_index)
-+            is_dup = true;
-+
-+      if (is_dup)
-+         continue;
-+
-+      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
-+      assert(nir_stage);
-+
-+      insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.closest_hit_index);
-+      ralloc_free(nir_stage);
-+   }
-+
-+   if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
-+      nir_pop_if(b, NULL);
-+}
-+
-+static void
-+visit_miss_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
-+                   struct rt_variables *vars)
-+{
-+   nir_def *sbt_idx = nir_load_var(b, vars->idx);
-+
-+   if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
-+      nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
-+
-+   for (unsigned i = 0; i < pipeline->group_count; ++i) {
-+      struct radv_ray_tracing_group *group = &pipeline->groups[i];
-+
-+      unsigned shader_id = VK_SHADER_UNUSED_KHR;
-+      if (group->type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
-+         shader_id = group->recursive_shader;
-+
-+      if (shader_id == VK_SHADER_UNUSED_KHR)
-+         continue;
-+
-+      if (pipeline->stages[shader_id].stage != MESA_SHADER_MISS)
-+         continue;
-+
-+      /* Avoid emitting stages with the same shaders/handles multiple times. */
-+      bool is_dup = false;
-+      for (unsigned j = 0; j < i; ++j)
-+         if (pipeline->groups[j].handle.general_index == pipeline->groups[i].handle.general_index)
-+            is_dup = true;
-+
-+      if (is_dup)
-+         continue;
-+
-+      nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
-+      assert(nir_stage);
-+
-+      insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.general_index);
-+      ralloc_free(nir_stage);
-+   }
-+
-+   if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
-+      nir_pop_if(b, NULL);
-+}
-+
- static void
- store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
- {
-@@ -1414,25 +1495,47 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
-    /* Register storage for hit attributes */
-    nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
- 
--   for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
--      hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
-+   if (!key->rt.monolithic || b->shader->info.stage != MESA_SHADER_RAYGEN) {
-+      for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
-+         hit_attribs[i] =
-+            nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
- 
--   lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
-+      lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
-+   }
- 
-    /* Initialize follow-up shader. */
-    nir_push_if(b, nir_load_var(b, trav_vars.hit));
-    {
--      for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
--         nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
--      nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
--                                  nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
--                                  nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
-+      if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
-+         load_sbt_entry(b, vars, nir_load_var(b, vars->idx), SBT_HIT, SBT_CLOSEST_HIT_IDX);
-+
-+         nir_def *should_return =
-+            nir_test_mask(b, nir_load_var(b, vars->cull_mask_and_flags), SpvRayFlagsSkipClosestHitShaderKHRMask);
-+
-+         /* should_return is set if we had a hit but we won't be calling the closest hit
-+          * shader and hence need to return immediately to the calling shader. */
-+         nir_push_if(b, nir_inot(b, should_return));
-+         visit_closest_hit_shaders(device, b, pipeline, vars);
-+         nir_pop_if(b, NULL);
-+      } else {
-+         for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
-+            nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
-+         nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
-+                                     nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
-+                                     nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
-+      }
-    }
-    nir_push_else(b, NULL);
-    {
--      /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
--       * for miss shaders if none of the rays miss. */
--      nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
-+      if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
-+         load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, SBT_GENERAL_IDX);
-+
-+         visit_miss_shaders(device, b, pipeline, vars);
-+      } else {
-+         /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
-+          * for miss shaders if none of the rays miss. */
-+         nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
-+      }
-    }
-    nir_pop_if(b, NULL);
- }
-@@ -1477,6 +1580,98 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
-    return b.shader;
- }
- 
-+struct lower_rt_instruction_monolithic_state {
-+   struct radv_device *device;
-+   struct radv_ray_tracing_pipeline *pipeline;
-+   const struct radv_pipeline_key *key;
-+   const VkRayTracingPipelineCreateInfoKHR *pCreateInfo;
-+
-+   struct rt_variables *vars;
-+};
-+
-+static bool
-+lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
-+{
-+   if (instr->type != nir_instr_type_intrinsic)
-+      return false;
-+
-+   b->cursor = nir_after_instr(instr);
-+
-+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-+
-+   struct lower_rt_instruction_monolithic_state *state = data;
-+   struct rt_variables *vars = state->vars;
-+
-+   switch (intr->intrinsic) {
-+   case nir_intrinsic_execute_callable:
-+      unreachable("nir_intrinsic_execute_callable");
-+   case nir_intrinsic_trace_ray: {
-+      nir_store_var(b, vars->arg, nir_iadd_imm(b, intr->src[10].ssa, -b->shader->scratch_size), 1);
-+
-+      /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
-+      nir_store_var(b, vars->accel_struct, intr->src[0].ssa, 0x1);
-+      nir_store_var(b, vars->cull_mask_and_flags, nir_ior(b, nir_ishl_imm(b, intr->src[2].ssa, 24), intr->src[1].ssa),
-+                    0x1);
-+      nir_store_var(b, vars->sbt_offset, nir_iand_imm(b, intr->src[3].ssa, 0xf), 0x1);
-+      nir_store_var(b, vars->sbt_stride, nir_iand_imm(b, intr->src[4].ssa, 0xf), 0x1);
-+      nir_store_var(b, vars->miss_index, nir_iand_imm(b, intr->src[5].ssa, 0xffff), 0x1);
-+      nir_store_var(b, vars->origin, intr->src[6].ssa, 0x7);
-+      nir_store_var(b, vars->tmin, intr->src[7].ssa, 0x1);
-+      nir_store_var(b, vars->direction, intr->src[8].ssa, 0x7);
-+      nir_store_var(b, vars->tmax, intr->src[9].ssa, 0x1);
-+
-+      nir_def *stack_ptr = nir_load_var(b, vars->stack_ptr);
-+      nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
-+
-+      radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, state->key, b, vars);
-+      b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size *
-+                                                                         MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
-+
-+      nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
-+
-+      nir_instr_remove(instr);
-+      return true;
-+   }
-+   case nir_intrinsic_rt_resume:
-+      unreachable("nir_intrinsic_rt_resume");
-+   case nir_intrinsic_rt_return_amd:
-+      unreachable("nir_intrinsic_rt_return_amd");
-+   case nir_intrinsic_execute_closest_hit_amd:
-+      unreachable("nir_intrinsic_execute_closest_hit_amd");
-+   case nir_intrinsic_execute_miss_amd:
-+      unreachable("nir_intrinsic_execute_miss_amd");
-+   default:
-+      return false;
-+   }
-+}
-+
-+static void
-+lower_rt_instructions_monolithic(nir_shader *shader, struct radv_device *device,
-+                                 struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key,
-+                                 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct rt_variables *vars)
-+{
-+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-+
-+   struct lower_rt_instruction_monolithic_state state = {
-+      .device = device,
-+      .pipeline = pipeline,
-+      .key = key,
-+      .pCreateInfo = pCreateInfo,
-+      .vars = vars,
-+   };
-+
-+   nir_shader_instructions_pass(shader, lower_rt_instruction_monolithic, nir_metadata_none, &state);
-+   nir_index_ssa_defs(impl);
-+
-+   /* Register storage for hit attributes */
-+   nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
-+
-+   for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
-+      hit_attribs[i] = nir_local_variable_create(impl, glsl_uint_type(), "ahit_attrib");
-+
-+   lower_hit_attribs(shader, hit_attribs, 0);
-+}
-+
- /** Select the next shader based on priorities:
-  *
-  * Detect the priority of the shader stage by the lowest bits in the address (low to high):
-@@ -1517,13 +1712,18 @@ select_next_shader(nir_builder *b, nir_def *shader_addr, unsigned wave_size)
- void
- radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
-                       const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size,
--                      bool resume_shader)
-+                      bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
-+                      const struct radv_pipeline_key *key)
- {
-    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
- 
-    const VkPipelineCreateFlagBits2KHR create_flags = radv_get_pipeline_create_flags(pCreateInfo);
- 
-    struct rt_variables vars = create_rt_variables(shader, create_flags);
-+
-+   if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN)
-+      lower_rt_instructions_monolithic(shader, device, pipeline, key, pCreateInfo, &vars);
-+
-    lower_rt_instructions(shader, &vars, true);
- 
-    if (stack_size) {
-@@ -1585,32 +1785,36 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
-    if (shader_guard)
-       nir_pop_if(&b, shader_guard);
- 
--   /* select next shader */
-    b.cursor = nir_after_cf_list(&impl->body);
- 
--   shader_addr = nir_load_var(&b, vars.shader_addr);
--   nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
--
--   /* store back all variables to registers */
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
--
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
--   ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
-+   if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) {
-+      nir_terminate(&b);
-+   } else {
-+      /* select next shader */
-+      shader_addr = nir_load_var(&b, vars.shader_addr);
-+      nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
-+
-+      /* store back all variables to registers */
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
-+
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
-+      ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
-+   }
- 
-    nir_metadata_preserve(impl, nir_metadata_none);
- 
-diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
-index 6eb95fdd0a097..969f9a56ab7f5 100644
---- a/src/amd/vulkan/radv_shader.h
-+++ b/src/amd/vulkan/radv_shader.h
-@@ -635,7 +635,8 @@ nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShad
- 
- void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
-                            const struct radv_shader_args *args, const struct radv_shader_info *info,
--                           uint32_t *stack_size, bool resume_shader);
-+                           uint32_t *stack_size, bool resume_shader, struct radv_device *device,
-+                           struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key);
- 
- struct radv_shader_stage;
- 
--- 
-
-
-From 5c9dd4efece8f352d00d1310b556928cccb239c8 Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Sat, 24 Jun 2023 16:11:16 +0200
-Subject: [PATCH 4/4] radv/rt: Use monolithic pipelines
-
-Only available for non-recursive pipelines that do not have callables.
----
- src/amd/vulkan/radv_pipeline_rt.c | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
-index 12562c6cf89ba..97449b9cbafac 100644
---- a/src/amd/vulkan/radv_pipeline_rt.c
-+++ b/src/amd/vulkan/radv_pipeline_rt.c
-@@ -103,6 +103,17 @@ radv_generate_rt_pipeline_key(const struct radv_device *device, const struct rad
-       }
-    }
- 
-+   if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
-+      key.rt.monolithic = pCreateInfo->maxPipelineRayRecursionDepth <= 1;
-+
-+      for (uint32_t i = 0; i < pipeline->stage_count; i++) {
-+         if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) {
-+            key.rt.monolithic = false;
-+            break;
-+         }
-+      }
-+   }
-+
-    return key;
- }
- 
--- 
diff --git a/debian/patches/24720.patch b/debian/patches/24720.patch
deleted file mode 100644
index c27c928..0000000
--- a/debian/patches/24720.patch
+++ /dev/null
@@ -1,687 +0,0 @@
-From 42be7a3c53698a165e9612619f6a34a65bbf91ff Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Wed, 16 Aug 2023 10:37:56 +0200
-Subject: [PATCH 1/3] radv: Remove dead radix_sort_vk_get_memory_requirements
- call
-
----
- src/amd/vulkan/radv_acceleration_structure.c | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
-index ece47b1230c88..5c5eb16e61f9d 100644
---- a/src/amd/vulkan/radv_acceleration_structure.c
-+++ b/src/amd/vulkan/radv_acceleration_structure.c
-@@ -745,10 +745,6 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
- {
-    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-    for (uint32_t i = 0; i < infoCount; ++i) {
--      struct radix_sort_vk_memory_requirements requirements;
--      radix_sort_vk_get_memory_requirements(cmd_buffer->device->meta_state.accel_struct_build.radix_sort,
--                                            bvh_states[i].node_count, &requirements);
--
-       struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
-       info.count = bvh_states[i].node_count;
- 
--- 
-GitLab
-
-
-From faa17e5322ea66cd74e37aab48316059a05738d6 Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Wed, 16 Aug 2023 11:09:25 +0200
-Subject: [PATCH 2/3] radv/radix_sort: Vendor the radix sort dispatch code
-
-This needs to be done so we can optimize it for occpuancy when building
-multiple acceleration structures in parallel. Changes to the original
-code:
-
-- Change // to /* */
-- clang-format
-- Replace vkCmd calls with calls to the driver entrypoints
-- Add a light weight info struct
-- Use radv_fill_buffer directly
----
- src/amd/vulkan/radv_acceleration_structure.c | 218 ++++++++++++++++---
- src/amd/vulkan/radv_private.h                |   1 -
- 2 files changed, 187 insertions(+), 32 deletions(-)
-
-diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
-index 5c5eb16e61f9d..9866de2e594a8 100644
---- a/src/amd/vulkan/radv_acceleration_structure.c
-+++ b/src/amd/vulkan/radv_acceleration_structure.c
-@@ -27,7 +27,9 @@
- #include "nir_builder.h"
- #include "radv_cs.h"
- 
-+#include "radix_sort/common/vk/barrier.h"
- #include "radix_sort/radv_radix_sort.h"
-+#include "radix_sort/shaders/push.h"
- 
- #include "bvh/build_interface.h"
- #include "bvh/bvh.h"
-@@ -76,6 +78,7 @@ static const uint32_t header_spv[] = {
- };
- 
- #define KEY_ID_PAIR_SIZE 8
-+#define MORTON_BIT_SIZE  24
- 
- enum internal_build_type {
-    INTERNAL_BUILD_TYPE_LBVH,
-@@ -382,17 +385,6 @@ cleanup:
-    return result;
- }
- 
--static void
--radix_sort_fill_buffer(VkCommandBuffer commandBuffer, radix_sort_vk_buffer_info_t const *buffer_info,
--                       VkDeviceSize offset, VkDeviceSize size, uint32_t data)
--{
--   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
--
--   assert(size != VK_WHOLE_SIZE);
--
--   radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset, size, data);
--}
--
- VkResult
- radv_device_init_null_accel_struct(struct radv_device *device)
- {
-@@ -576,12 +568,6 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
- 
-    device->meta_state.accel_struct_build.radix_sort =
-       radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
--
--   struct radix_sort_vk_sort_devaddr_info *radix_sort_info = &device->meta_state.accel_struct_build.radix_sort_info;
--   radix_sort_info->ext = NULL;
--   radix_sort_info->key_bits = 24;
--   radix_sort_info->fill_buffer = radix_sort_fill_buffer;
--
- exit:
-    mtx_unlock(&device->meta_state.mtx);
-    return result;
-@@ -743,28 +729,198 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
-             const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
-             enum radv_cmd_flush_bits flush_bits)
- {
-+   /* Copyright 2019 The Fuchsia Authors. */
-    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-+
-+   radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
-+
-    for (uint32_t i = 0; i < infoCount; ++i) {
--      struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
--      info.count = bvh_states[i].node_count;
-+      uint32_t count = bvh_states[i].node_count;
-+      uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
-+      uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
-+      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
-+
-+      /* Anything to do? */
-+      if (!count) {
-+         bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
-+         continue;
-+      }
-+
-+      /*
-+       * OVERVIEW
-+       *
-+       *   1. Pad the keyvals in `scatter_even`.
-+       *   2. Zero the `histograms` and `partitions`.
-+       *      --- BARRIER ---
-+       *   3. HISTOGRAM is dispatched before PREFIX.
-+       *      --- BARRIER ---
-+       *   4. PREFIX is dispatched before the first SCATTER.
-+       *      --- BARRIER ---
-+       *   5. One or more SCATTER dispatches.
-+       *
-+       * Note that the `partitions` buffer can be zeroed anytime before the first
-+       * scatter.
-+       */
-+
-+      /* How many passes? */
-+      uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
-+      uint32_t keyval_bits = keyval_bytes * 8;
-+      uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
-+      uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
-+
-+      bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
-+
-+      /*
-+       * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
-+       *
-+       * Pad fractional blocks with max-valued keyvals.
-+       *
-+       * Zero the histograms and partitions buffer.
-+       *
-+       * This assumes the partitions follow the histograms.
-+       */
-+
-+      /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
-+
-+      /* How many scatter blocks? */
-+      uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
-+      uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
-+      uint32_t scatter_blocks = (count + scatter_block_kvs - 1) / scatter_block_kvs;
-+      uint32_t count_ru_scatter = scatter_blocks * scatter_block_kvs;
-+
-+      /*
-+       * How many histogram blocks?
-+       *
-+       * Note that it's OK to have more max-valued digits counted by the histogram
-+       * than sorted by the scatters because the sort is stable.
-+       */
-+      uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
-+      uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
-+      uint32_t histo_blocks = (count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
-+      uint32_t count_ru_histo = histo_blocks * histo_block_kvs;
-+
-+      /* Fill with max values */
-+      if (count_ru_histo > count) {
-+         radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + count * keyval_bytes,
-+                          (count_ru_histo - count) * keyval_bytes, 0xFFFFFFFF);
-+      }
-+
-+      /*
-+       * Zero histograms and invalidate partitions.
-+       *
-+       * Note that the partition invalidation only needs to be performed once
-+       * because the even/odd scatter dispatches rely on the the previous pass to
-+       * leave the partitions in an invalid state.
-+       *
-+       * Note that the last workgroup doesn't read/write a partition so it doesn't
-+       * need to be initialized.
-+       */
-+      uint32_t histo_partition_count = passes + scatter_blocks - 1;
-+      uint32_t pass_idx = (keyval_bytes - passes);
-+
-+      uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
-+
-+      radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
-+                       histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
-+
-+      /*
-+       * Pipeline: HISTOGRAM
-+       *
-+       * TODO(allanmac): All subgroups should try to process approximately the same
-+       * number of blocks in order to minimize tail effects.  This was implemented
-+       * and reverted but should be reimplemented and benchmarked later.
-+       */
-+      vk_barrier_transfer_w_to_compute_r(commandBuffer);
-+
-+      uint64_t devaddr_histograms = internal_addr + rs->internal.histograms.offset;
-+
-+      /* Dispatch histogram */
-+      struct rs_push_histogram push_histogram = {
-+         .devaddr_histograms = devaddr_histograms,
-+         .devaddr_keyvals = keyvals_even_addr,
-+         .passes = passes,
-+      };
-+
-+      radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
-+                            sizeof(push_histogram), &push_histogram);
-+
-+      radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
-+
-+      vk_common_CmdDispatch(commandBuffer, histo_blocks, 1, 1);
- 
--      info.keyvals_even.buffer = VK_NULL_HANDLE;
--      info.keyvals_even.offset = 0;
--      info.keyvals_even.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
-+      /*
-+       * Pipeline: PREFIX
-+       *
-+       * Launch one workgroup per pass.
-+       */
-+      vk_barrier_compute_w_to_compute_r(commandBuffer);
-+
-+      struct rs_push_prefix push_prefix = {
-+         .devaddr_histograms = devaddr_histograms,
-+      };
-+
-+      radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
-+                            sizeof(push_prefix), &push_prefix);
- 
--      info.keyvals_odd = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
-+      radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
- 
--      info.internal.buffer = VK_NULL_HANDLE;
--      info.internal.offset = 0;
--      info.internal.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
-+      vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
- 
--      VkDeviceAddress result_addr;
--      radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, &info,
--                                 radv_device_to_handle(cmd_buffer->device), commandBuffer, &result_addr);
-+      /* Pipeline: SCATTER */
-+      vk_barrier_compute_w_to_compute_r(commandBuffer);
-+
-+      uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
-+      uint64_t devaddr_partitions = internal_addr + rs->internal.partitions.offset;
-+
-+      struct rs_push_scatter push_scatter = {
-+         .devaddr_keyvals_even = keyvals_even_addr,
-+         .devaddr_keyvals_odd = keyvals_odd_addr,
-+         .devaddr_partitions = devaddr_partitions,
-+         .devaddr_histograms = devaddr_histograms + histogram_offset,
-+         .pass_offset = (pass_idx & 3) * RS_RADIX_LOG2,
-+      };
- 
--      assert(result_addr == info.keyvals_even.devaddr || result_addr == info.keyvals_odd);
-+      {
-+         uint32_t pass_dword = pass_idx / 4;
- 
--      bvh_states[i].scratch_offset = (uint32_t)(result_addr - pInfos[i].scratchData.deviceAddress);
-+         radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.scatter[pass_dword].even,
-+                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_scatter), &push_scatter);
-+
-+         radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
-+                              rs->pipelines.named.scatter[pass_dword].even);
-+      }
-+
-+      bool is_even = true;
-+
-+      while (true) {
-+         vk_common_CmdDispatch(commandBuffer, scatter_blocks, 1, 1);
-+
-+         /* Continue? */
-+         if (++pass_idx >= keyval_bytes)
-+            break;
-+
-+         vk_barrier_compute_w_to_compute_r(commandBuffer);
-+
-+         is_even ^= true;
-+         push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
-+         push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
-+
-+         uint32_t pass_dword = pass_idx / 4;
-+
-+         /* Update push constants that changed */
-+         VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
-+                                       : rs->pipeline_layouts.named.scatter[pass_dword].odd;
-+         radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT,
-+                               offsetof(struct rs_push_scatter, devaddr_histograms),
-+                               sizeof(push_scatter.devaddr_histograms) + sizeof(push_scatter.pass_offset),
-+                               &push_scatter.devaddr_histograms);
-+
-+         /* Bind new pipeline */
-+         VkPipeline p =
-+            is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
-+
-+         radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
-+      }
-    }
- 
-    cmd_buffer->state.flush_bits |= flush_bits;
-diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
-index 1ea606c2ca111..2b0b9189f33f6 100644
---- a/src/amd/vulkan/radv_private.h
-+++ b/src/amd/vulkan/radv_private.h
-@@ -731,7 +731,6 @@ struct radv_meta_state {
-       VkPipeline copy_pipeline;
- 
-       struct radix_sort_vk *radix_sort;
--      struct radix_sort_vk_sort_devaddr_info radix_sort_info;
- 
-       struct {
-          VkBuffer buffer;
--- 
-GitLab
-
-
-From 04c77145628fe9956ae44a25ba7b1dfe401a9de8 Mon Sep 17 00:00:00 2001
-From: Konstantin Seurer <konstantin.seurer@gmail.com>
-Date: Wed, 16 Aug 2023 11:50:18 +0200
-Subject: [PATCH 3/3] radv: Perform multiple sorts in parallel
-
-This was the last part that didn't scale with multiple infos. Reducing
-the amount of barriers in this case improves DOOM Eternal performance by
-50%. (Running with low resolution)
----
- src/amd/vulkan/radv_acceleration_structure.c | 264 ++++++++++---------
- 1 file changed, 143 insertions(+), 121 deletions(-)
-
-diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
-index 9866de2e594a8..85852453fcb29 100644
---- a/src/amd/vulkan/radv_acceleration_structure.c
-+++ b/src/amd/vulkan/radv_acceleration_structure.c
-@@ -598,6 +598,13 @@ struct bvh_state {
-    struct acceleration_structure_layout accel_struct;
-    struct scratch_layout scratch;
-    struct build_config config;
-+
-+   /* Radix sort state */
-+   uint32_t scatter_blocks;
-+   uint32_t count_ru_scatter;
-+   uint32_t histo_blocks;
-+   uint32_t count_ru_histo;
-+   struct rs_push_scatter push_scatter;
- };
- 
- static uint32_t
-@@ -734,75 +741,79 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
- 
-    radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
- 
--   for (uint32_t i = 0; i < infoCount; ++i) {
--      uint32_t count = bvh_states[i].node_count;
--      uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
--      uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
--      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
-+   /*
-+    * OVERVIEW
-+    *
-+    *   1. Pad the keyvals in `scatter_even`.
-+    *   2. Zero the `histograms` and `partitions`.
-+    *      --- BARRIER ---
-+    *   3. HISTOGRAM is dispatched before PREFIX.
-+    *      --- BARRIER ---
-+    *   4. PREFIX is dispatched before the first SCATTER.
-+    *      --- BARRIER ---
-+    *   5. One or more SCATTER dispatches.
-+    *
-+    * Note that the `partitions` buffer can be zeroed anytime before the first
-+    * scatter.
-+    */
-+
-+   /* How many passes? */
-+   uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
-+   uint32_t keyval_bits = keyval_bytes * 8;
-+   uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
-+   uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
- 
--      /* Anything to do? */
--      if (!count) {
-+   for (uint32_t i = 0; i < infoCount; ++i) {
-+      if (bvh_states[i].node_count)
-+         bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
-+      else
-          bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
--         continue;
--      }
--
--      /*
--       * OVERVIEW
--       *
--       *   1. Pad the keyvals in `scatter_even`.
--       *   2. Zero the `histograms` and `partitions`.
--       *      --- BARRIER ---
--       *   3. HISTOGRAM is dispatched before PREFIX.
--       *      --- BARRIER ---
--       *   4. PREFIX is dispatched before the first SCATTER.
--       *      --- BARRIER ---
--       *   5. One or more SCATTER dispatches.
--       *
--       * Note that the `partitions` buffer can be zeroed anytime before the first
--       * scatter.
--       */
--
--      /* How many passes? */
--      uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
--      uint32_t keyval_bits = keyval_bytes * 8;
--      uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
--      uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
-+   }
- 
--      bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
-+   /*
-+    * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
-+    *
-+    * Pad fractional blocks with max-valued keyvals.
-+    *
-+    * Zero the histograms and partitions buffer.
-+    *
-+    * This assumes the partitions follow the histograms.
-+    */
-+
-+   /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
-+
-+   /* How many scatter blocks? */
-+   uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
-+   uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
-+
-+   /*
-+    * How many histogram blocks?
-+    *
-+    * Note that it's OK to have more max-valued digits counted by the histogram
-+    * than sorted by the scatters because the sort is stable.
-+    */
-+   uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
-+   uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
-+
-+   uint32_t pass_idx = (keyval_bytes - passes);
- 
--      /*
--       * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
--       *
--       * Pad fractional blocks with max-valued keyvals.
--       *
--       * Zero the histograms and partitions buffer.
--       *
--       * This assumes the partitions follow the histograms.
--       */
-+   for (uint32_t i = 0; i < infoCount; ++i) {
-+      if (!bvh_states[i].node_count)
-+         continue;
- 
--      /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
-+      uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
-+      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
- 
--      /* How many scatter blocks? */
--      uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
--      uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
--      uint32_t scatter_blocks = (count + scatter_block_kvs - 1) / scatter_block_kvs;
--      uint32_t count_ru_scatter = scatter_blocks * scatter_block_kvs;
-+      bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
-+      bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
- 
--      /*
--       * How many histogram blocks?
--       *
--       * Note that it's OK to have more max-valued digits counted by the histogram
--       * than sorted by the scatters because the sort is stable.
--       */
--      uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
--      uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
--      uint32_t histo_blocks = (count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
--      uint32_t count_ru_histo = histo_blocks * histo_block_kvs;
-+      bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
-+      bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
- 
-       /* Fill with max values */
--      if (count_ru_histo > count) {
--         radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + count * keyval_bytes,
--                          (count_ru_histo - count) * keyval_bytes, 0xFFFFFFFF);
-+      if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
-+         radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
-+                          (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
-       }
- 
-       /*
-@@ -815,28 +826,35 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
-        * Note that the last workgroup doesn't read/write a partition so it doesn't
-        * need to be initialized.
-        */
--      uint32_t histo_partition_count = passes + scatter_blocks - 1;
--      uint32_t pass_idx = (keyval_bytes - passes);
-+      uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
- 
-       uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
- 
-       radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
-                        histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
-+   }
- 
--      /*
--       * Pipeline: HISTOGRAM
--       *
--       * TODO(allanmac): All subgroups should try to process approximately the same
--       * number of blocks in order to minimize tail effects.  This was implemented
--       * and reverted but should be reimplemented and benchmarked later.
--       */
--      vk_barrier_transfer_w_to_compute_r(commandBuffer);
-+   /*
-+    * Pipeline: HISTOGRAM
-+    *
-+    * TODO(allanmac): All subgroups should try to process approximately the same
-+    * number of blocks in order to minimize tail effects.  This was implemented
-+    * and reverted but should be reimplemented and benchmarked later.
-+    */
-+   vk_barrier_transfer_w_to_compute_r(commandBuffer);
-+
-+   radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
-+
-+   for (uint32_t i = 0; i < infoCount; ++i) {
-+      if (!bvh_states[i].node_count)
-+         continue;
- 
--      uint64_t devaddr_histograms = internal_addr + rs->internal.histograms.offset;
-+      uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
-+      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
- 
-       /* Dispatch histogram */
-       struct rs_push_histogram push_histogram = {
--         .devaddr_histograms = devaddr_histograms,
-+         .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
-          .devaddr_keyvals = keyvals_even_addr,
-          .passes = passes,
-       };
-@@ -844,83 +862,87 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
-       radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
-                             sizeof(push_histogram), &push_histogram);
- 
--      radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
-+      vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
-+   }
- 
--      vk_common_CmdDispatch(commandBuffer, histo_blocks, 1, 1);
-+   /*
-+    * Pipeline: PREFIX
-+    *
-+    * Launch one workgroup per pass.
-+    */
-+   vk_barrier_compute_w_to_compute_r(commandBuffer);
- 
--      /*
--       * Pipeline: PREFIX
--       *
--       * Launch one workgroup per pass.
--       */
--      vk_barrier_compute_w_to_compute_r(commandBuffer);
-+   radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
-+
-+   for (uint32_t i = 0; i < infoCount; ++i) {
-+      if (!bvh_states[i].node_count)
-+         continue;
-+
-+      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
- 
-       struct rs_push_prefix push_prefix = {
--         .devaddr_histograms = devaddr_histograms,
-+         .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
-       };
- 
-       radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
-                             sizeof(push_prefix), &push_prefix);
- 
--      radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
--
-       vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
-+   }
- 
--      /* Pipeline: SCATTER */
--      vk_barrier_compute_w_to_compute_r(commandBuffer);
-+   /* Pipeline: SCATTER */
-+   vk_barrier_compute_w_to_compute_r(commandBuffer);
- 
--      uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
--      uint64_t devaddr_partitions = internal_addr + rs->internal.partitions.offset;
-+   uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
- 
--      struct rs_push_scatter push_scatter = {
-+   for (uint32_t i = 0; i < infoCount; i++) {
-+      uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
-+      uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
-+      uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
-+
-+      bvh_states[i].push_scatter = (struct rs_push_scatter){
-          .devaddr_keyvals_even = keyvals_even_addr,
-          .devaddr_keyvals_odd = keyvals_odd_addr,
--         .devaddr_partitions = devaddr_partitions,
--         .devaddr_histograms = devaddr_histograms + histogram_offset,
--         .pass_offset = (pass_idx & 3) * RS_RADIX_LOG2,
-+         .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
-+         .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
-       };
-+   }
- 
--      {
--         uint32_t pass_dword = pass_idx / 4;
-+   bool is_even = true;
- 
--         radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.scatter[pass_dword].even,
--                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_scatter), &push_scatter);
-+   while (true) {
-+      uint32_t pass_dword = pass_idx / 4;
- 
--         radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
--                              rs->pipelines.named.scatter[pass_dword].even);
--      }
-+      /* Bind new pipeline */
-+      VkPipeline p =
-+         is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
-+      radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
- 
--      bool is_even = true;
-+      /* Update push constants that changed */
-+      VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even //
-+                                    : rs->pipeline_layouts.named.scatter[pass_dword].odd;
- 
--      while (true) {
--         vk_common_CmdDispatch(commandBuffer, scatter_blocks, 1, 1);
-+      for (uint32_t i = 0; i < infoCount; i++) {
-+         if (!bvh_states[i].node_count)
-+            continue;
- 
--         /* Continue? */
--         if (++pass_idx >= keyval_bytes)
--            break;
-+         bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
- 
--         vk_barrier_compute_w_to_compute_r(commandBuffer);
-+         radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
-+                               &bvh_states[i].push_scatter);
- 
--         is_even ^= true;
--         push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
--         push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
-+         vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
- 
--         uint32_t pass_dword = pass_idx / 4;
-+         bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
-+      }
- 
--         /* Update push constants that changed */
--         VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
--                                       : rs->pipeline_layouts.named.scatter[pass_dword].odd;
--         radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT,
--                               offsetof(struct rs_push_scatter, devaddr_histograms),
--                               sizeof(push_scatter.devaddr_histograms) + sizeof(push_scatter.pass_offset),
--                               &push_scatter.devaddr_histograms);
-+      /* Continue? */
-+      if (++pass_idx >= keyval_bytes)
-+         break;
- 
--         /* Bind new pipeline */
--         VkPipeline p =
--            is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
-+      vk_barrier_compute_w_to_compute_r(commandBuffer);
- 
--         radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
--      }
-+      is_even ^= true;
-    }
- 
-    cmd_buffer->state.flush_bits |= flush_bits;
--- 
-GitLab
diff --git a/debian/patches/24789.patch b/debian/patches/24789.patch
deleted file mode 100644
index 8b01f93..0000000
--- a/debian/patches/24789.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 87f95fa7f24415f51391f128adf7f048358be226 Mon Sep 17 00:00:00 2001
-From: Friedrich Vock <friedrich.vock@gmx.de>
-Date: Sat, 19 Aug 2023 11:00:45 +0200
-Subject: [PATCH] nir/load_store_vectorize: Handle intrinsics with constant
- base
-
-This includes nir_load_stack and nir_store_stack, which are vectorized
-in nir_lower_shader_calls. If not adjusted, we end up loading from
-the wrong base.
-
-Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9596
-Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9587
-Cc: mesa-stable
-Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24789>
----
- src/compiler/nir/nir_opt_load_store_vectorize.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
-index 73e6ff6d8878..4bea8fbea6ff 100644
---- a/src/compiler/nir/nir_opt_load_store_vectorize.c
-+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
-@@ -756,6 +756,8 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
- 
-       nir_intrinsic_set_range_base(first->intrin, low_base);
-       nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base);
-+   } else if (nir_intrinsic_has_base(first->intrin) && info->base_src == -1 && info->deref_src == -1) {
-+      nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
-    }
- 
-    first->key = low->key;
--- 
-GitLab
-
diff --git a/debian/patches/path_max.diff b/debian/patches/path_max.diff
deleted file mode 100644
index 9832c73..0000000
--- a/debian/patches/path_max.diff
+++ /dev/null
@@ -1,39 +0,0 @@
---- a/src/util/tests/cache_test.cpp
-+++ b/src/util/tests/cache_test.cpp
-@@ -82,8 +82,8 @@ check_directories_created(void *mem_ctx,
- {
-    bool sub_dirs_created = false;
- 
--   char buf[PATH_MAX];
--   if (getcwd(buf, PATH_MAX)) {
-+   char *buf = getcwd(NULL, 0);
-+   if (buf) {
-       char *full_path = ralloc_asprintf(mem_ctx, "%s%s", buf, ++cache_dir);
-       struct stat sb;
-       if (stat(full_path, &sb) != -1 && S_ISDIR(sb.st_mode))
---- a/src/util/tests/process_test.c
-+++ b/src/util/tests/process_test.c
-@@ -36,6 +36,10 @@
- #define PATH_MAX MAX_PATH
- #endif
- 
-+#if !defined(PATH_MAX) && defined(__GNU__)
-+#define PATH_MAX (4096)
-+#endif
-+
- static bool error = false;
- 
- static void
---- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
-+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
-@@ -42,6 +42,10 @@
- #define PATH_MAX _MAX_PATH
- #endif
- 
-+#if !defined(PATH_MAX) && defined(__GNU__)
-+#define PATH_MAX (4096)
-+#endif
-+
- #define MODULE_PREFIX "pipe_"
- 
- static int (*backends[])(struct pipe_loader_device **, int) = {
diff --git a/debian/patches/revert-af1ee8e01044.diff b/debian/patches/revert-af1ee8e01044.diff
deleted file mode 100644
index 6fd03d7..0000000
--- a/debian/patches/revert-af1ee8e01044.diff
+++ /dev/null
@@ -1,153 +0,0 @@
-From 0c3587a2f8e1b6cfadf9a4bbb6ae4b2c3e14a651 Mon Sep 17 00:00:00 2001
-From: Leandro Ribeiro <leandro.ribeiro@collabora.com>
-Date: Sun, 10 Apr 2022 22:54:36 -0300
-Subject: [PATCH] Revert "egl/wayland: deprecate drm_handle_format() and
- drm_handle_capabilities()"
-
-Commit af1ee8e010441f8f2ed8c77065b159652a4ac9fe dropped support to
-wl_drm, as we thought that most compositors from active projects were
-already supporting zwp_linux_dmabuf_v1.
-
-But that's not true, so revert this commit in order to give these
-projects a longer transition period.
-
-Note that we didn't add back the support to GEM name API, and that was
-on purpose.
-
-Signed-off-by: Leandro Ribeiro <leandro.ribeiro@collabora.com>
----
- src/egl/drivers/dri2/egl_dri2.h         |  1 +
- src/egl/drivers/dri2/platform_wayland.c | 59 +++++++++++++++++++------
- 2 files changed, 47 insertions(+), 13 deletions(-)
-
-diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
-index 89158993efdd..1c840a966b3c 100644
---- a/src/egl/drivers/dri2/egl_dri2.h
-+++ b/src/egl/drivers/dri2/egl_dri2.h
-@@ -284,6 +284,7 @@ struct dri2_egl_display
-    struct zwp_linux_dmabuf_feedback_v1 *wl_dmabuf_feedback;
-    struct dmabuf_feedback_format_table format_table;
-    bool authenticated;
-+   uint32_t capabilities;
-    char *device_name;
- #endif
- 
-diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
-index e9ecf6d1e716..19fad8bfa08e 100644
---- a/src/egl/drivers/dri2/platform_wayland.c
-+++ b/src/egl/drivers/dri2/platform_wayland.c
-@@ -1344,7 +1344,7 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
-                  struct dri2_egl_surface *dri2_surf,
-                  __DRIimage *image)
- {
--   struct wl_buffer *ret;
-+   struct wl_buffer *ret = NULL;
-    EGLBoolean query;
-    int width, height, fourcc, num_planes;
-    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-@@ -1448,11 +1448,28 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
-       ret = zwp_linux_buffer_params_v1_create_immed(params, width, height,
-                                                     fourcc, 0);
-       zwp_linux_buffer_params_v1_destroy(params);
-+   } else {
-+      struct wl_drm *wl_drm =
-+         dri2_surf ? dri2_surf->wl_drm_wrapper : dri2_dpy->wl_drm;
-+      int fd, stride;
-+
-+      if (num_planes > 1)
-+         return NULL;
-+
-+      query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FD, &fd);
-+      query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);
-+      if (!query) {
-+         if (fd >= 0)
-+            close(fd);
-+         return NULL;
-+      }
- 
--      return ret;
-+      ret = wl_drm_create_prime_buffer(wl_drm, fd, width, height, fourcc, 0,
-+                                       stride, 0, 0, 0, 0);
-+      close(fd);
-    }
- 
--   return NULL;
-+   return ret;
- }
- 
- static EGLBoolean
-@@ -1699,16 +1716,21 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device)
- static void
- drm_handle_format(void *data, struct wl_drm *drm, uint32_t format)
- {
--   /* deprecated, as compositors already support the dma-buf protocol extension
--    * and so we can rely on dmabuf_handle_modifier() to receive formats and
--    * modifiers */
-+   struct dri2_egl_display *dri2_dpy = data;
-+   int visual_idx = dri2_wl_visual_idx_from_fourcc(format);
-+
-+   if (visual_idx == -1)
-+      return;
-+
-+   BITSET_SET(dri2_dpy->formats.formats_bitmap, visual_idx);
- }
- 
- static void
- drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t value)
- {
--   /* deprecated, as compositors already support the dma-buf protocol extension
--    * and so we can rely on it to create wl_buffer's */
-+   struct dri2_egl_display *dri2_dpy = data;
-+
-+   dri2_dpy->capabilities = value;
- }
- 
- static void
-@@ -2077,13 +2099,12 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
-    wl_registry_add_listener(dri2_dpy->wl_registry,
-                             &registry_listener_drm, dri2_dpy);
- 
--   /* The compositor must expose the dma-buf interface. */
--   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_dmabuf == NULL)
-+   if (roundtrip(dri2_dpy) < 0)
-       goto cleanup;
- 
-    /* Get default dma-buf feedback */
--   if (zwp_linux_dmabuf_v1_get_version(dri2_dpy->wl_dmabuf) >=
--       ZWP_LINUX_DMABUF_V1_GET_DEFAULT_FEEDBACK_SINCE_VERSION) {
-+   if (dri2_dpy->wl_dmabuf && zwp_linux_dmabuf_v1_get_version(dri2_dpy->wl_dmabuf) >=
-+                              ZWP_LINUX_DMABUF_V1_GET_DEFAULT_FEEDBACK_SINCE_VERSION) {
-       dmabuf_feedback_format_table_init(&dri2_dpy->format_table);
-       dri2_dpy->wl_dmabuf_feedback =
-          zwp_linux_dmabuf_v1_get_default_feedback(dri2_dpy->wl_dmabuf);
-@@ -2091,7 +2112,6 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
-                                                 &dmabuf_feedback_listener, dri2_dpy);
-    }
- 
--   /* Receive events from the interfaces */
-    if (roundtrip(dri2_dpy) < 0)
-       goto cleanup;
- 
-@@ -2178,6 +2198,19 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
- 
-    dri2_wl_setup_swap_interval(disp);
- 
-+   if (dri2_dpy->wl_drm) {
-+      /* To use Prime, we must have _DRI_IMAGE v7 at least. createImageFromFds
-+       * support indicates that Prime export/import is supported by the driver.
-+       * We deprecated the support to GEM names API, so we bail out if the
-+       * driver does not suport Prime. */
-+      if (!(dri2_dpy->capabilities & WL_DRM_CAPABILITY_PRIME) ||
-+          (dri2_dpy->image->base.version < 7) ||
-+          (dri2_dpy->image->createImageFromFds == NULL)) {
-+         _eglLog(_EGL_WARNING, "wayland-egl: display does not support prime");
-+         goto cleanup;
-+      }
-+   }
-+
-    if (dri2_dpy->is_different_gpu &&
-        (dri2_dpy->image->base.version < 9 ||
-         dri2_dpy->image->blitImage == NULL)) {
--- 
-GitLab
-
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index d2b1d0c..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1,6 +0,0 @@
-#07_gallium-fix-build-failure-on-powerpcspe.diff
-#path_max.diff
-#src_glx_dri_common.h.diff
-#21929.patch
-#24720.patch
-#24789.patch
diff --git a/debian/patches/src_glx_dri_common.h.diff b/debian/patches/src_glx_dri_common.h.diff
deleted file mode 100644
index 7524922..0000000
--- a/debian/patches/src_glx_dri_common.h.diff
+++ /dev/null
@@ -1,13 +0,0 @@
---- a/src/glx/dri_common.h
-+++ b/src/glx/dri_common.h
-@@ -57,6 +57,10 @@ extern struct glx_config *driConvertConf
- 
- extern void driDestroyConfigs(const __DRIconfig **configs);
- 
-+#ifndef __GLXDRIdrawable
-+typedef struct __GLXDRIdrawableRec __GLXDRIdrawable;
-+#endif
-+
- extern __GLXDRIdrawable *
- driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable);
-