24.0
This commit is contained in:
parent
dcfdd44242
commit
d7cb625189
679
debian/patches/21929.patch
vendored
679
debian/patches/21929.patch
vendored
@ -1,679 +0,0 @@
|
|||||||
From ed9fb6be100cff6c2066beb0cdf8b3a17cab292c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Sat, 24 Jun 2023 15:49:13 +0200
|
|
||||||
Subject: [PATCH 1/4] radv: Add rt.monolithic to radv_pipeline_key
|
|
||||||
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_shader.h | 4 ++++
|
|
||||||
1 file changed, 4 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
|
|
||||||
index 0c53695edae7c..6eb95fdd0a097 100644
|
|
||||||
--- a/src/amd/vulkan/radv_shader.h
|
|
||||||
+++ b/src/amd/vulkan/radv_shader.h
|
|
||||||
@@ -135,6 +135,10 @@ struct radv_pipeline_key {
|
|
||||||
|
|
||||||
bool line_smooth_enabled;
|
|
||||||
} ps;
|
|
||||||
+
|
|
||||||
+ struct {
|
|
||||||
+ bool monolithic;
|
|
||||||
+ } rt;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct radv_nir_compiler_options {
|
|
||||||
--
|
|
||||||
|
|
||||||
|
|
||||||
From 8f45cc08361f55c1e613a11198b1ae97c519406e Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Sat, 24 Jun 2023 15:46:51 +0200
|
|
||||||
Subject: [PATCH 2/4] radv/rt: Store NIR shaders separately
|
|
||||||
|
|
||||||
In order to compile monolithic shaders with pipeline libraries, we need
|
|
||||||
to keep the NIR around for inlining recursive stages.
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_pipeline_cache.c | 9 +--
|
|
||||||
src/amd/vulkan/radv_pipeline_rt.c | 93 +++++++++++++++++++++-------
|
|
||||||
src/amd/vulkan/radv_private.h | 1 +
|
|
||||||
src/amd/vulkan/radv_rt_shader.c | 7 +--
|
|
||||||
4 files changed, 79 insertions(+), 31 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
|
|
||||||
index 5bbbc755ae11f..7e4c6f8898130 100644
|
|
||||||
--- a/src/amd/vulkan/radv_pipeline_cache.c
|
|
||||||
+++ b/src/amd/vulkan/radv_pipeline_cache.c
|
|
||||||
@@ -481,11 +481,12 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip
|
|
||||||
pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
|
|
||||||
- if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
|
|
||||||
+ if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i]))
|
|
||||||
pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base;
|
|
||||||
- } else if (is_library) {
|
|
||||||
- pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
|
|
||||||
- complete &= pipeline->stages[i].shader != NULL;
|
|
||||||
+
|
|
||||||
+ if (is_library) {
|
|
||||||
+ pipeline->stages[i].nir = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
|
|
||||||
+ complete &= pipeline->stages[i].nir != NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
index c86ea3a508468..85afc8cb28e1b 100644
|
|
||||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
@@ -263,7 +263,10 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, st
|
|
||||||
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
|
|
||||||
struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
|
|
||||||
for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
|
|
||||||
- stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
|
|
||||||
+ stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir);
|
|
||||||
+ if (library_pipeline->stages[j].shader)
|
|
||||||
+ stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
|
|
||||||
+
|
|
||||||
stages[idx].stage = library_pipeline->stages[j].stage;
|
|
||||||
stages[idx].stack_size = library_pipeline->stages[j].stack_size;
|
|
||||||
memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH);
|
|
||||||
@@ -462,45 +465,83 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
|
||||||
return VK_PIPELINE_COMPILE_REQUIRED;
|
|
||||||
VkResult result = VK_SUCCESS;
|
|
||||||
|
|
||||||
- struct radv_ray_tracing_stage *stages = pipeline->stages;
|
|
||||||
+ struct radv_ray_tracing_stage *rt_stages = pipeline->stages;
|
|
||||||
+
|
|
||||||
+ struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage));
|
|
||||||
+ if (!stages)
|
|
||||||
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
||||||
+
|
|
||||||
+ bool has_callable = false;
|
|
||||||
+ for (uint32_t i = 0; i < pipeline->stage_count; i++) {
|
|
||||||
+ if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) {
|
|
||||||
+ has_callable = true;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
|
|
||||||
for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
|
|
||||||
+ if (rt_stages[idx].shader || rt_stages[idx].nir)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
int64_t stage_start = os_time_get_nano();
|
|
||||||
- struct radv_shader_stage stage;
|
|
||||||
- radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage);
|
|
||||||
|
|
||||||
- if (stages[idx].shader)
|
|
||||||
- goto feedback;
|
|
||||||
+ struct radv_shader_stage *stage = &stages[idx];
|
|
||||||
+ radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, stage);
|
|
||||||
|
|
||||||
/* precompile the shader */
|
|
||||||
- stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
|
|
||||||
+ stage->nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
|
|
||||||
+
|
|
||||||
+ /* Cases in which we need to keep around the NIR:
|
|
||||||
+ * - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader.
|
|
||||||
+ * If there is a callable shader, we can be sure that the final pipeline won't be
|
|
||||||
+ * monolithic.
|
|
||||||
+ * - non-recursive: Non-recursive shaders are inlined into the traversal shader.
|
|
||||||
+ * - monolithic: Callable shaders (chit/miss) are inlined into the raygen shader.
|
|
||||||
+ */
|
|
||||||
+ bool compiled = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]);
|
|
||||||
+ bool library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
|
|
||||||
+ bool nir_needed =
|
|
||||||
+ (library && !has_callable) || !compiled || (key->rt.monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN);
|
|
||||||
+ nir_needed &= !rt_stages[idx].nir;
|
|
||||||
+ if (nir_needed) {
|
|
||||||
+ rt_stages[idx].stack_size = stage->nir->scratch_size;
|
|
||||||
+ rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1,
|
|
||||||
+ !key->optimisations_disabled);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
|
|
||||||
- uint32_t stack_size = 0;
|
|
||||||
+ stage->feedback.duration = os_time_get_nano() - stage_start;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
+ for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
|
|
||||||
+ int64_t stage_start = os_time_get_nano();
|
|
||||||
+ struct radv_shader_stage *stage = &stages[idx];
|
|
||||||
+
|
|
||||||
+ /* Cases in which we need to compile the shader (raygen/callable/chit/miss):
|
|
||||||
+ * TODO: - monolithic: Extend the loop to cover imported stages and force compilation of imported raygen
|
|
||||||
+ * shaders since pipeline library shaders use separate compilation.
|
|
||||||
+ * - separate: Compile any recursive stage if wasn't compiled yet.
|
|
||||||
+ * TODO: Skip chit and miss shaders in the monolithic case.
|
|
||||||
+ */
|
|
||||||
+ bool shader_needed = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]) && !rt_stages[idx].shader;
|
|
||||||
+ if (shader_needed) {
|
|
||||||
+ uint32_t stack_size = 0;
|
|
||||||
struct radv_serialized_shader_arena_block *replay_block =
|
|
||||||
capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL;
|
|
||||||
|
|
||||||
struct radv_shader *shader;
|
|
||||||
result =
|
|
||||||
- radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &stage, &stack_size, replay_block, &shader);
|
|
||||||
- stages[idx].stack_size = stack_size;
|
|
||||||
- stages[idx].shader = shader ? &shader->base : NULL;
|
|
||||||
- } else {
|
|
||||||
- stages[idx].stack_size = stage.nir->scratch_size;
|
|
||||||
- stages[idx].shader =
|
|
||||||
- radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, stages[idx].sha1, !key->optimisations_disabled);
|
|
||||||
- }
|
|
||||||
- ralloc_free(stage.nir);
|
|
||||||
+ radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, stage, &stack_size, replay_block, &shader);
|
|
||||||
+ if (result != VK_SUCCESS)
|
|
||||||
+ goto cleanup;
|
|
||||||
|
|
||||||
- if (result != VK_SUCCESS)
|
|
||||||
- return result;
|
|
||||||
+ rt_stages[idx].stack_size = stack_size;
|
|
||||||
+ rt_stages[idx].shader = shader ? &shader->base : NULL;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- feedback:
|
|
||||||
if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) {
|
|
||||||
assert(idx < creation_feedback->pipelineStageCreationFeedbackCount);
|
|
||||||
- stage.feedback.duration = os_time_get_nano() - stage_start;
|
|
||||||
- creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage.feedback;
|
|
||||||
+ stage->feedback.duration += os_time_get_nano() - stage_start;
|
|
||||||
+ creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -527,6 +568,10 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
|
||||||
result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &traversal_stage, NULL, NULL,
|
|
||||||
&pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
|
|
||||||
|
|
||||||
+cleanup:
|
|
||||||
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
|
|
||||||
+ ralloc_free(stages[i].nir);
|
|
||||||
+ free(stages);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -732,6 +777,8 @@ void
|
|
||||||
radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < pipeline->stage_count; i++) {
|
|
||||||
+ if (pipeline->stages[i].nir)
|
|
||||||
+ vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir);
|
|
||||||
if (pipeline->stages[i].shader)
|
|
||||||
vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].shader);
|
|
||||||
}
|
|
||||||
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
|
|
||||||
index 47e315488e9f9..7ab46738b46f0 100644
|
|
||||||
--- a/src/amd/vulkan/radv_private.h
|
|
||||||
+++ b/src/amd/vulkan/radv_private.h
|
|
||||||
@@ -2364,6 +2364,7 @@ struct radv_ray_tracing_group {
|
|
||||||
};
|
|
||||||
|
|
||||||
struct radv_ray_tracing_stage {
|
|
||||||
+ struct vk_pipeline_cache_object *nir;
|
|
||||||
struct vk_pipeline_cache_object *shader;
|
|
||||||
gl_shader_stage stage;
|
|
||||||
uint32_t stack_size;
|
|
||||||
diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
index cc92beebc3503..3def324bcccf3 100644
|
|
||||||
--- a/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
+++ b/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
@@ -1132,7 +1132,7 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers
|
|
||||||
if (is_dup)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader);
|
|
||||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].nir);
|
|
||||||
assert(nir_stage);
|
|
||||||
|
|
||||||
insert_rt_case(b, nir_stage, vars, sbt_idx, data->pipeline->groups[i].handle.any_hit_index);
|
|
||||||
@@ -1262,13 +1262,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|
||||||
if (is_dup)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader);
|
|
||||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].nir);
|
|
||||||
assert(nir_stage);
|
|
||||||
|
|
||||||
nir_shader *any_hit_stage = NULL;
|
|
||||||
if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
|
|
||||||
- any_hit_stage =
|
|
||||||
- radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader);
|
|
||||||
+ any_hit_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].nir);
|
|
||||||
assert(any_hit_stage);
|
|
||||||
|
|
||||||
/* reserve stack size for any_hit before it is inlined */
|
|
||||||
--
|
|
||||||
GitLab
|
|
||||||
|
|
||||||
|
|
||||||
From bba42cbc235e75a5c7ed05e55e48f71640c68ad4 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Mon, 21 Aug 2023 13:32:53 +0200
|
|
||||||
Subject: [PATCH 3/4] radv/rt: Add monolithic raygen lowering
|
|
||||||
|
|
||||||
Ray traversal is inlined to allow for constant folding and avoid
|
|
||||||
spilling.
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_pipeline_rt.c | 11 +-
|
|
||||||
src/amd/vulkan/radv_rt_shader.c | 276 ++++++++++++++++++++++++++----
|
|
||||||
src/amd/vulkan/radv_shader.h | 3 +-
|
|
||||||
3 files changed, 248 insertions(+), 42 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
index 85afc8cb28e1b..12562c6cf89ba 100644
|
|
||||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
@@ -356,9 +356,8 @@ move_rt_instructions(nir_shader *shader)
|
|
||||||
static VkResult
|
|
||||||
radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|
||||||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key,
|
|
||||||
- const struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage,
|
|
||||||
- uint32_t *stack_size, struct radv_serialized_shader_arena_block *replay_block,
|
|
||||||
- struct radv_shader **out_shader)
|
|
||||||
+ struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, uint32_t *stack_size,
|
|
||||||
+ struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
|
|
||||||
{
|
|
||||||
struct radv_shader_binary *binary;
|
|
||||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
|
|
||||||
@@ -384,7 +383,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|
||||||
uint32_t num_resume_shaders = 0;
|
|
||||||
nir_shader **resume_shaders = NULL;
|
|
||||||
|
|
||||||
- if (stage->stage != MESA_SHADER_INTERSECTION) {
|
|
||||||
+ bool monolithic_raygen = pipeline_key->rt.monolithic && stage->stage == MESA_SHADER_RAYGEN;
|
|
||||||
+ if (stage->stage != MESA_SHADER_INTERSECTION && !monolithic_raygen) {
|
|
||||||
nir_builder b = nir_builder_at(nir_after_cf_list(&nir_shader_get_entrypoint(stage->nir)->body));
|
|
||||||
nir_rt_return_amd(&b);
|
|
||||||
|
|
||||||
@@ -411,7 +411,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|
||||||
for (uint32_t i = 0; i < num_shaders; i++) {
|
|
||||||
struct radv_shader_stage temp_stage = *stage;
|
|
||||||
temp_stage.nir = shaders[i];
|
|
||||||
- radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0);
|
|
||||||
+ radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
|
|
||||||
+ pipeline, pipeline_key);
|
|
||||||
radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled);
|
|
||||||
radv_postprocess_nir(device, pipeline_key, &temp_stage);
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
index 3def324bcccf3..362d918597008 100644
|
|
||||||
--- a/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
+++ b/src/amd/vulkan/radv_rt_shader.c
|
|
||||||
@@ -1306,6 +1306,87 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|
||||||
nir_pop_if(b, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void
|
|
||||||
+visit_closest_hit_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
|
|
||||||
+ struct rt_variables *vars)
|
|
||||||
+{
|
|
||||||
+ nir_def *sbt_idx = nir_load_var(b, vars->idx);
|
|
||||||
+
|
|
||||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
|
|
||||||
+ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
|
|
||||||
+
|
|
||||||
+ for (unsigned i = 0; i < pipeline->group_count; ++i) {
|
|
||||||
+ struct radv_ray_tracing_group *group = &pipeline->groups[i];
|
|
||||||
+
|
|
||||||
+ unsigned shader_id = VK_SHADER_UNUSED_KHR;
|
|
||||||
+ if (group->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
|
|
||||||
+ shader_id = group->recursive_shader;
|
|
||||||
+
|
|
||||||
+ if (shader_id == VK_SHADER_UNUSED_KHR)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ /* Avoid emitting stages with the same shaders/handles multiple times. */
|
|
||||||
+ bool is_dup = false;
|
|
||||||
+ for (unsigned j = 0; j < i; ++j)
|
|
||||||
+ if (pipeline->groups[j].handle.closest_hit_index == pipeline->groups[i].handle.closest_hit_index)
|
|
||||||
+ is_dup = true;
|
|
||||||
+
|
|
||||||
+ if (is_dup)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
|
|
||||||
+ assert(nir_stage);
|
|
||||||
+
|
|
||||||
+ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.closest_hit_index);
|
|
||||||
+ ralloc_free(nir_stage);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
|
|
||||||
+ nir_pop_if(b, NULL);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+visit_miss_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
|
|
||||||
+ struct rt_variables *vars)
|
|
||||||
+{
|
|
||||||
+ nir_def *sbt_idx = nir_load_var(b, vars->idx);
|
|
||||||
+
|
|
||||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
|
|
||||||
+ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
|
|
||||||
+
|
|
||||||
+ for (unsigned i = 0; i < pipeline->group_count; ++i) {
|
|
||||||
+ struct radv_ray_tracing_group *group = &pipeline->groups[i];
|
|
||||||
+
|
|
||||||
+ unsigned shader_id = VK_SHADER_UNUSED_KHR;
|
|
||||||
+ if (group->type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
|
|
||||||
+ shader_id = group->recursive_shader;
|
|
||||||
+
|
|
||||||
+ if (shader_id == VK_SHADER_UNUSED_KHR)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ if (pipeline->stages[shader_id].stage != MESA_SHADER_MISS)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ /* Avoid emitting stages with the same shaders/handles multiple times. */
|
|
||||||
+ bool is_dup = false;
|
|
||||||
+ for (unsigned j = 0; j < i; ++j)
|
|
||||||
+ if (pipeline->groups[j].handle.general_index == pipeline->groups[i].handle.general_index)
|
|
||||||
+ is_dup = true;
|
|
||||||
+
|
|
||||||
+ if (is_dup)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
|
|
||||||
+ assert(nir_stage);
|
|
||||||
+
|
|
||||||
+ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.general_index);
|
|
||||||
+ ralloc_free(nir_stage);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
|
|
||||||
+ nir_pop_if(b, NULL);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static void
|
|
||||||
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
|
|
||||||
{
|
|
||||||
@@ -1414,25 +1495,47 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|
||||||
/* Register storage for hit attributes */
|
|
||||||
nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
|
|
||||||
|
|
||||||
- for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
|
||||||
- hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
|
|
||||||
+ if (!key->rt.monolithic || b->shader->info.stage != MESA_SHADER_RAYGEN) {
|
|
||||||
+ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
|
||||||
+ hit_attribs[i] =
|
|
||||||
+ nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
|
|
||||||
|
|
||||||
- lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
|
|
||||||
+ lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
/* Initialize follow-up shader. */
|
|
||||||
nir_push_if(b, nir_load_var(b, trav_vars.hit));
|
|
||||||
{
|
|
||||||
- for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
|
|
||||||
- nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
|
|
||||||
- nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
|
|
||||||
- nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
|
|
||||||
- nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
|
|
||||||
+ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
|
|
||||||
+ load_sbt_entry(b, vars, nir_load_var(b, vars->idx), SBT_HIT, SBT_CLOSEST_HIT_IDX);
|
|
||||||
+
|
|
||||||
+ nir_def *should_return =
|
|
||||||
+ nir_test_mask(b, nir_load_var(b, vars->cull_mask_and_flags), SpvRayFlagsSkipClosestHitShaderKHRMask);
|
|
||||||
+
|
|
||||||
+ /* should_return is set if we had a hit but we won't be calling the closest hit
|
|
||||||
+ * shader and hence need to return immediately to the calling shader. */
|
|
||||||
+ nir_push_if(b, nir_inot(b, should_return));
|
|
||||||
+ visit_closest_hit_shaders(device, b, pipeline, vars);
|
|
||||||
+ nir_pop_if(b, NULL);
|
|
||||||
+ } else {
|
|
||||||
+ for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
|
|
||||||
+ nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
|
|
||||||
+ nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
|
|
||||||
+ nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
|
|
||||||
+ nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
nir_push_else(b, NULL);
|
|
||||||
{
|
|
||||||
- /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
|
|
||||||
- * for miss shaders if none of the rays miss. */
|
|
||||||
- nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
|
|
||||||
+ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
|
|
||||||
+ load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, SBT_GENERAL_IDX);
|
|
||||||
+
|
|
||||||
+ visit_miss_shaders(device, b, pipeline, vars);
|
|
||||||
+ } else {
|
|
||||||
+ /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
|
|
||||||
+ * for miss shaders if none of the rays miss. */
|
|
||||||
+ nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
nir_pop_if(b, NULL);
|
|
||||||
}
|
|
||||||
@@ -1477,6 +1580,98 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|
||||||
return b.shader;
|
|
||||||
}
|
|
||||||
|
|
||||||
+struct lower_rt_instruction_monolithic_state {
|
|
||||||
+ struct radv_device *device;
|
|
||||||
+ struct radv_ray_tracing_pipeline *pipeline;
|
|
||||||
+ const struct radv_pipeline_key *key;
|
|
||||||
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo;
|
|
||||||
+
|
|
||||||
+ struct rt_variables *vars;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
|
|
||||||
+{
|
|
||||||
+ if (instr->type != nir_instr_type_intrinsic)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ b->cursor = nir_after_instr(instr);
|
|
||||||
+
|
|
||||||
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
||||||
+
|
|
||||||
+ struct lower_rt_instruction_monolithic_state *state = data;
|
|
||||||
+ struct rt_variables *vars = state->vars;
|
|
||||||
+
|
|
||||||
+ switch (intr->intrinsic) {
|
|
||||||
+ case nir_intrinsic_execute_callable:
|
|
||||||
+ unreachable("nir_intrinsic_execute_callable");
|
|
||||||
+ case nir_intrinsic_trace_ray: {
|
|
||||||
+ nir_store_var(b, vars->arg, nir_iadd_imm(b, intr->src[10].ssa, -b->shader->scratch_size), 1);
|
|
||||||
+
|
|
||||||
+ /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
|
|
||||||
+ nir_store_var(b, vars->accel_struct, intr->src[0].ssa, 0x1);
|
|
||||||
+ nir_store_var(b, vars->cull_mask_and_flags, nir_ior(b, nir_ishl_imm(b, intr->src[2].ssa, 24), intr->src[1].ssa),
|
|
||||||
+ 0x1);
|
|
||||||
+ nir_store_var(b, vars->sbt_offset, nir_iand_imm(b, intr->src[3].ssa, 0xf), 0x1);
|
|
||||||
+ nir_store_var(b, vars->sbt_stride, nir_iand_imm(b, intr->src[4].ssa, 0xf), 0x1);
|
|
||||||
+ nir_store_var(b, vars->miss_index, nir_iand_imm(b, intr->src[5].ssa, 0xffff), 0x1);
|
|
||||||
+ nir_store_var(b, vars->origin, intr->src[6].ssa, 0x7);
|
|
||||||
+ nir_store_var(b, vars->tmin, intr->src[7].ssa, 0x1);
|
|
||||||
+ nir_store_var(b, vars->direction, intr->src[8].ssa, 0x7);
|
|
||||||
+ nir_store_var(b, vars->tmax, intr->src[9].ssa, 0x1);
|
|
||||||
+
|
|
||||||
+ nir_def *stack_ptr = nir_load_var(b, vars->stack_ptr);
|
|
||||||
+ nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
|
|
||||||
+
|
|
||||||
+ radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, state->key, b, vars);
|
|
||||||
+ b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size *
|
|
||||||
+ MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
|
||||||
+
|
|
||||||
+ nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
|
|
||||||
+
|
|
||||||
+ nir_instr_remove(instr);
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ case nir_intrinsic_rt_resume:
|
|
||||||
+ unreachable("nir_intrinsic_rt_resume");
|
|
||||||
+ case nir_intrinsic_rt_return_amd:
|
|
||||||
+ unreachable("nir_intrinsic_rt_return_amd");
|
|
||||||
+ case nir_intrinsic_execute_closest_hit_amd:
|
|
||||||
+ unreachable("nir_intrinsic_execute_closest_hit_amd");
|
|
||||||
+ case nir_intrinsic_execute_miss_amd:
|
|
||||||
+ unreachable("nir_intrinsic_execute_miss_amd");
|
|
||||||
+ default:
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+lower_rt_instructions_monolithic(nir_shader *shader, struct radv_device *device,
|
|
||||||
+ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key,
|
|
||||||
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct rt_variables *vars)
|
|
||||||
+{
|
|
||||||
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
||||||
+
|
|
||||||
+ struct lower_rt_instruction_monolithic_state state = {
|
|
||||||
+ .device = device,
|
|
||||||
+ .pipeline = pipeline,
|
|
||||||
+ .key = key,
|
|
||||||
+ .pCreateInfo = pCreateInfo,
|
|
||||||
+ .vars = vars,
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ nir_shader_instructions_pass(shader, lower_rt_instruction_monolithic, nir_metadata_none, &state);
|
|
||||||
+ nir_index_ssa_defs(impl);
|
|
||||||
+
|
|
||||||
+ /* Register storage for hit attributes */
|
|
||||||
+ nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
|
|
||||||
+
|
|
||||||
+ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
|
||||||
+ hit_attribs[i] = nir_local_variable_create(impl, glsl_uint_type(), "ahit_attrib");
|
|
||||||
+
|
|
||||||
+ lower_hit_attribs(shader, hit_attribs, 0);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/** Select the next shader based on priorities:
|
|
||||||
*
|
|
||||||
* Detect the priority of the shader stage by the lowest bits in the address (low to high):
|
|
||||||
@@ -1517,13 +1712,18 @@ select_next_shader(nir_builder *b, nir_def *shader_addr, unsigned wave_size)
|
|
||||||
void
|
|
||||||
radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
|
||||||
const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size,
|
|
||||||
- bool resume_shader)
|
|
||||||
+ bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
|
||||||
+ const struct radv_pipeline_key *key)
|
|
||||||
{
|
|
||||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
||||||
|
|
||||||
const VkPipelineCreateFlagBits2KHR create_flags = radv_get_pipeline_create_flags(pCreateInfo);
|
|
||||||
|
|
||||||
struct rt_variables vars = create_rt_variables(shader, create_flags);
|
|
||||||
+
|
|
||||||
+ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN)
|
|
||||||
+ lower_rt_instructions_monolithic(shader, device, pipeline, key, pCreateInfo, &vars);
|
|
||||||
+
|
|
||||||
lower_rt_instructions(shader, &vars, true);
|
|
||||||
|
|
||||||
if (stack_size) {
|
|
||||||
@@ -1585,32 +1785,36 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|
||||||
if (shader_guard)
|
|
||||||
nir_pop_if(&b, shader_guard);
|
|
||||||
|
|
||||||
- /* select next shader */
|
|
||||||
b.cursor = nir_after_cf_list(&impl->body);
|
|
||||||
|
|
||||||
- shader_addr = nir_load_var(&b, vars.shader_addr);
|
|
||||||
- nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
|
|
||||||
-
|
|
||||||
- /* store back all variables to registers */
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
|
|
||||||
-
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
|
|
||||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
|
|
||||||
+ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) {
|
|
||||||
+ nir_terminate(&b);
|
|
||||||
+ } else {
|
|
||||||
+ /* select next shader */
|
|
||||||
+ shader_addr = nir_load_var(&b, vars.shader_addr);
|
|
||||||
+ nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
|
|
||||||
+
|
|
||||||
+ /* store back all variables to registers */
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
|
|
||||||
+
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
|
|
||||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
|
|
||||||
+ }
|
|
||||||
|
|
||||||
nir_metadata_preserve(impl, nir_metadata_none);
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
|
|
||||||
index 6eb95fdd0a097..969f9a56ab7f5 100644
|
|
||||||
--- a/src/amd/vulkan/radv_shader.h
|
|
||||||
+++ b/src/amd/vulkan/radv_shader.h
|
|
||||||
@@ -635,7 +635,8 @@ nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShad
|
|
||||||
|
|
||||||
void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
|
||||||
const struct radv_shader_args *args, const struct radv_shader_info *info,
|
|
||||||
- uint32_t *stack_size, bool resume_shader);
|
|
||||||
+ uint32_t *stack_size, bool resume_shader, struct radv_device *device,
|
|
||||||
+ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key);
|
|
||||||
|
|
||||||
struct radv_shader_stage;
|
|
||||||
|
|
||||||
--
|
|
||||||
|
|
||||||
|
|
||||||
From 5c9dd4efece8f352d00d1310b556928cccb239c8 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Sat, 24 Jun 2023 16:11:16 +0200
|
|
||||||
Subject: [PATCH 4/4] radv/rt: Use monolithic pipelines
|
|
||||||
|
|
||||||
Only available for non-recursive pipelines that do not have callables.
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_pipeline_rt.c | 11 +++++++++++
|
|
||||||
1 file changed, 11 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
index 12562c6cf89ba..97449b9cbafac 100644
|
|
||||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
|
||||||
@@ -103,6 +103,17 @@ radv_generate_rt_pipeline_key(const struct radv_device *device, const struct rad
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
|
|
||||||
+ key.rt.monolithic = pCreateInfo->maxPipelineRayRecursionDepth <= 1;
|
|
||||||
+
|
|
||||||
+ for (uint32_t i = 0; i < pipeline->stage_count; i++) {
|
|
||||||
+ if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) {
|
|
||||||
+ key.rt.monolithic = false;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
return key;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
687
debian/patches/24720.patch
vendored
687
debian/patches/24720.patch
vendored
@ -1,687 +0,0 @@
|
|||||||
From 42be7a3c53698a165e9612619f6a34a65bbf91ff Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Wed, 16 Aug 2023 10:37:56 +0200
|
|
||||||
Subject: [PATCH 1/3] radv: Remove dead radix_sort_vk_get_memory_requirements
|
|
||||||
call
|
|
||||||
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_acceleration_structure.c | 4 ----
|
|
||||||
1 file changed, 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
index ece47b1230c88..5c5eb16e61f9d 100644
|
|
||||||
--- a/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
+++ b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
@@ -745,10 +745,6 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|
||||||
{
|
|
||||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
||||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
- struct radix_sort_vk_memory_requirements requirements;
|
|
||||||
- radix_sort_vk_get_memory_requirements(cmd_buffer->device->meta_state.accel_struct_build.radix_sort,
|
|
||||||
- bvh_states[i].node_count, &requirements);
|
|
||||||
-
|
|
||||||
struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
|
|
||||||
info.count = bvh_states[i].node_count;
|
|
||||||
|
|
||||||
--
|
|
||||||
GitLab
|
|
||||||
|
|
||||||
|
|
||||||
From faa17e5322ea66cd74e37aab48316059a05738d6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Wed, 16 Aug 2023 11:09:25 +0200
|
|
||||||
Subject: [PATCH 2/3] radv/radix_sort: Vendor the radix sort dispatch code
|
|
||||||
|
|
||||||
This needs to be done so we can optimize it for occpuancy when building
|
|
||||||
multiple acceleration structures in parallel. Changes to the original
|
|
||||||
code:
|
|
||||||
|
|
||||||
- Change // to /* */
|
|
||||||
- clang-format
|
|
||||||
- Replace vkCmd calls with calls to the driver entrypoints
|
|
||||||
- Add a light weight info struct
|
|
||||||
- Use radv_fill_buffer directly
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_acceleration_structure.c | 218 ++++++++++++++++---
|
|
||||||
src/amd/vulkan/radv_private.h | 1 -
|
|
||||||
2 files changed, 187 insertions(+), 32 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
index 5c5eb16e61f9d..9866de2e594a8 100644
|
|
||||||
--- a/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
+++ b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
@@ -27,7 +27,9 @@
|
|
||||||
#include "nir_builder.h"
|
|
||||||
#include "radv_cs.h"
|
|
||||||
|
|
||||||
+#include "radix_sort/common/vk/barrier.h"
|
|
||||||
#include "radix_sort/radv_radix_sort.h"
|
|
||||||
+#include "radix_sort/shaders/push.h"
|
|
||||||
|
|
||||||
#include "bvh/build_interface.h"
|
|
||||||
#include "bvh/bvh.h"
|
|
||||||
@@ -76,6 +78,7 @@ static const uint32_t header_spv[] = {
|
|
||||||
};
|
|
||||||
|
|
||||||
#define KEY_ID_PAIR_SIZE 8
|
|
||||||
+#define MORTON_BIT_SIZE 24
|
|
||||||
|
|
||||||
enum internal_build_type {
|
|
||||||
INTERNAL_BUILD_TYPE_LBVH,
|
|
||||||
@@ -382,17 +385,6 @@ cleanup:
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
-static void
|
|
||||||
-radix_sort_fill_buffer(VkCommandBuffer commandBuffer, radix_sort_vk_buffer_info_t const *buffer_info,
|
|
||||||
- VkDeviceSize offset, VkDeviceSize size, uint32_t data)
|
|
||||||
-{
|
|
||||||
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
||||||
-
|
|
||||||
- assert(size != VK_WHOLE_SIZE);
|
|
||||||
-
|
|
||||||
- radv_fill_buffer(cmd_buffer, NULL, NULL, buffer_info->devaddr + buffer_info->offset + offset, size, data);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
VkResult
|
|
||||||
radv_device_init_null_accel_struct(struct radv_device *device)
|
|
||||||
{
|
|
||||||
@@ -576,12 +568,6 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
|
|
||||||
|
|
||||||
device->meta_state.accel_struct_build.radix_sort =
|
|
||||||
radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
|
|
||||||
-
|
|
||||||
- struct radix_sort_vk_sort_devaddr_info *radix_sort_info = &device->meta_state.accel_struct_build.radix_sort_info;
|
|
||||||
- radix_sort_info->ext = NULL;
|
|
||||||
- radix_sort_info->key_bits = 24;
|
|
||||||
- radix_sort_info->fill_buffer = radix_sort_fill_buffer;
|
|
||||||
-
|
|
||||||
exit:
|
|
||||||
mtx_unlock(&device->meta_state.mtx);
|
|
||||||
return result;
|
|
||||||
@@ -743,28 +729,198 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|
||||||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
|
|
||||||
enum radv_cmd_flush_bits flush_bits)
|
|
||||||
{
|
|
||||||
+ /* Copyright 2019 The Fuchsia Authors. */
|
|
||||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
||||||
+
|
|
||||||
+ radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
|
|
||||||
+
|
|
||||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
- struct radix_sort_vk_sort_devaddr_info info = cmd_buffer->device->meta_state.accel_struct_build.radix_sort_info;
|
|
||||||
- info.count = bvh_states[i].node_count;
|
|
||||||
+ uint32_t count = bvh_states[i].node_count;
|
|
||||||
+ uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
|
|
||||||
+ uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
+
|
|
||||||
+ /* Anything to do? */
|
|
||||||
+ if (!count) {
|
|
||||||
+ bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * OVERVIEW
|
|
||||||
+ *
|
|
||||||
+ * 1. Pad the keyvals in `scatter_even`.
|
|
||||||
+ * 2. Zero the `histograms` and `partitions`.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 3. HISTOGRAM is dispatched before PREFIX.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 4. PREFIX is dispatched before the first SCATTER.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 5. One or more SCATTER dispatches.
|
|
||||||
+ *
|
|
||||||
+ * Note that the `partitions` buffer can be zeroed anytime before the first
|
|
||||||
+ * scatter.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+ /* How many passes? */
|
|
||||||
+ uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
|
|
||||||
+ uint32_t keyval_bits = keyval_bytes * 8;
|
|
||||||
+ uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
|
|
||||||
+ uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
|
|
||||||
+
|
|
||||||
+ bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
|
|
||||||
+ *
|
|
||||||
+ * Pad fractional blocks with max-valued keyvals.
|
|
||||||
+ *
|
|
||||||
+ * Zero the histograms and partitions buffer.
|
|
||||||
+ *
|
|
||||||
+ * This assumes the partitions follow the histograms.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+ /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
|
|
||||||
+
|
|
||||||
+ /* How many scatter blocks? */
|
|
||||||
+ uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
|
|
||||||
+ uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
|
|
||||||
+ uint32_t scatter_blocks = (count + scatter_block_kvs - 1) / scatter_block_kvs;
|
|
||||||
+ uint32_t count_ru_scatter = scatter_blocks * scatter_block_kvs;
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * How many histogram blocks?
|
|
||||||
+ *
|
|
||||||
+ * Note that it's OK to have more max-valued digits counted by the histogram
|
|
||||||
+ * than sorted by the scatters because the sort is stable.
|
|
||||||
+ */
|
|
||||||
+ uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
|
|
||||||
+ uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
|
|
||||||
+ uint32_t histo_blocks = (count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
|
|
||||||
+ uint32_t count_ru_histo = histo_blocks * histo_block_kvs;
|
|
||||||
+
|
|
||||||
+ /* Fill with max values */
|
|
||||||
+ if (count_ru_histo > count) {
|
|
||||||
+ radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + count * keyval_bytes,
|
|
||||||
+ (count_ru_histo - count) * keyval_bytes, 0xFFFFFFFF);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * Zero histograms and invalidate partitions.
|
|
||||||
+ *
|
|
||||||
+ * Note that the partition invalidation only needs to be performed once
|
|
||||||
+ * because the even/odd scatter dispatches rely on the the previous pass to
|
|
||||||
+ * leave the partitions in an invalid state.
|
|
||||||
+ *
|
|
||||||
+ * Note that the last workgroup doesn't read/write a partition so it doesn't
|
|
||||||
+ * need to be initialized.
|
|
||||||
+ */
|
|
||||||
+ uint32_t histo_partition_count = passes + scatter_blocks - 1;
|
|
||||||
+ uint32_t pass_idx = (keyval_bytes - passes);
|
|
||||||
+
|
|
||||||
+ uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
+
|
|
||||||
+ radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
|
|
||||||
+ histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * Pipeline: HISTOGRAM
|
|
||||||
+ *
|
|
||||||
+ * TODO(allanmac): All subgroups should try to process approximately the same
|
|
||||||
+ * number of blocks in order to minimize tail effects. This was implemented
|
|
||||||
+ * and reverted but should be reimplemented and benchmarked later.
|
|
||||||
+ */
|
|
||||||
+ vk_barrier_transfer_w_to_compute_r(commandBuffer);
|
|
||||||
+
|
|
||||||
+ uint64_t devaddr_histograms = internal_addr + rs->internal.histograms.offset;
|
|
||||||
+
|
|
||||||
+ /* Dispatch histogram */
|
|
||||||
+ struct rs_push_histogram push_histogram = {
|
|
||||||
+ .devaddr_histograms = devaddr_histograms,
|
|
||||||
+ .devaddr_keyvals = keyvals_even_addr,
|
|
||||||
+ .passes = passes,
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
||||||
+ sizeof(push_histogram), &push_histogram);
|
|
||||||
+
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
|
|
||||||
+
|
|
||||||
+ vk_common_CmdDispatch(commandBuffer, histo_blocks, 1, 1);
|
|
||||||
|
|
||||||
- info.keyvals_even.buffer = VK_NULL_HANDLE;
|
|
||||||
- info.keyvals_even.offset = 0;
|
|
||||||
- info.keyvals_even.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ /*
|
|
||||||
+ * Pipeline: PREFIX
|
|
||||||
+ *
|
|
||||||
+ * Launch one workgroup per pass.
|
|
||||||
+ */
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+
|
|
||||||
+ struct rs_push_prefix push_prefix = {
|
|
||||||
+ .devaddr_histograms = devaddr_histograms,
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
||||||
+ sizeof(push_prefix), &push_prefix);
|
|
||||||
|
|
||||||
- info.keyvals_odd = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
|
|
||||||
|
|
||||||
- info.internal.buffer = VK_NULL_HANDLE;
|
|
||||||
- info.internal.offset = 0;
|
|
||||||
- info.internal.devaddr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
+ vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
|
|
||||||
|
|
||||||
- VkDeviceAddress result_addr;
|
|
||||||
- radix_sort_vk_sort_devaddr(cmd_buffer->device->meta_state.accel_struct_build.radix_sort, &info,
|
|
||||||
- radv_device_to_handle(cmd_buffer->device), commandBuffer, &result_addr);
|
|
||||||
+ /* Pipeline: SCATTER */
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+
|
|
||||||
+ uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
+ uint64_t devaddr_partitions = internal_addr + rs->internal.partitions.offset;
|
|
||||||
+
|
|
||||||
+ struct rs_push_scatter push_scatter = {
|
|
||||||
+ .devaddr_keyvals_even = keyvals_even_addr,
|
|
||||||
+ .devaddr_keyvals_odd = keyvals_odd_addr,
|
|
||||||
+ .devaddr_partitions = devaddr_partitions,
|
|
||||||
+ .devaddr_histograms = devaddr_histograms + histogram_offset,
|
|
||||||
+ .pass_offset = (pass_idx & 3) * RS_RADIX_LOG2,
|
|
||||||
+ };
|
|
||||||
|
|
||||||
- assert(result_addr == info.keyvals_even.devaddr || result_addr == info.keyvals_odd);
|
|
||||||
+ {
|
|
||||||
+ uint32_t pass_dword = pass_idx / 4;
|
|
||||||
|
|
||||||
- bvh_states[i].scratch_offset = (uint32_t)(result_addr - pInfos[i].scratchData.deviceAddress);
|
|
||||||
+ radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.scatter[pass_dword].even,
|
|
||||||
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_scatter), &push_scatter);
|
|
||||||
+
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
||||||
+ rs->pipelines.named.scatter[pass_dword].even);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ bool is_even = true;
|
|
||||||
+
|
|
||||||
+ while (true) {
|
|
||||||
+ vk_common_CmdDispatch(commandBuffer, scatter_blocks, 1, 1);
|
|
||||||
+
|
|
||||||
+ /* Continue? */
|
|
||||||
+ if (++pass_idx >= keyval_bytes)
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+
|
|
||||||
+ is_even ^= true;
|
|
||||||
+ push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
+ push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
|
|
||||||
+
|
|
||||||
+ uint32_t pass_dword = pass_idx / 4;
|
|
||||||
+
|
|
||||||
+ /* Update push constants that changed */
|
|
||||||
+ VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
|
|
||||||
+ : rs->pipeline_layouts.named.scatter[pass_dword].odd;
|
|
||||||
+ radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
+ offsetof(struct rs_push_scatter, devaddr_histograms),
|
|
||||||
+ sizeof(push_scatter.devaddr_histograms) + sizeof(push_scatter.pass_offset),
|
|
||||||
+ &push_scatter.devaddr_histograms);
|
|
||||||
+
|
|
||||||
+ /* Bind new pipeline */
|
|
||||||
+ VkPipeline p =
|
|
||||||
+ is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
|
|
||||||
+
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd_buffer->state.flush_bits |= flush_bits;
|
|
||||||
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
|
|
||||||
index 1ea606c2ca111..2b0b9189f33f6 100644
|
|
||||||
--- a/src/amd/vulkan/radv_private.h
|
|
||||||
+++ b/src/amd/vulkan/radv_private.h
|
|
||||||
@@ -731,7 +731,6 @@ struct radv_meta_state {
|
|
||||||
VkPipeline copy_pipeline;
|
|
||||||
|
|
||||||
struct radix_sort_vk *radix_sort;
|
|
||||||
- struct radix_sort_vk_sort_devaddr_info radix_sort_info;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
VkBuffer buffer;
|
|
||||||
--
|
|
||||||
GitLab
|
|
||||||
|
|
||||||
|
|
||||||
From 04c77145628fe9956ae44a25ba7b1dfe401a9de8 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
|
||||||
Date: Wed, 16 Aug 2023 11:50:18 +0200
|
|
||||||
Subject: [PATCH 3/3] radv: Perform multiple sorts in parallel
|
|
||||||
|
|
||||||
This was the last part that didn't scale with multiple infos. Reducing
|
|
||||||
the amount of barriers in this case improves DOOM Eternal performance by
|
|
||||||
50%. (Running with low resolution)
|
|
||||||
---
|
|
||||||
src/amd/vulkan/radv_acceleration_structure.c | 264 ++++++++++---------
|
|
||||||
1 file changed, 143 insertions(+), 121 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
index 9866de2e594a8..85852453fcb29 100644
|
|
||||||
--- a/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
+++ b/src/amd/vulkan/radv_acceleration_structure.c
|
|
||||||
@@ -598,6 +598,13 @@ struct bvh_state {
|
|
||||||
struct acceleration_structure_layout accel_struct;
|
|
||||||
struct scratch_layout scratch;
|
|
||||||
struct build_config config;
|
|
||||||
+
|
|
||||||
+ /* Radix sort state */
|
|
||||||
+ uint32_t scatter_blocks;
|
|
||||||
+ uint32_t count_ru_scatter;
|
|
||||||
+ uint32_t histo_blocks;
|
|
||||||
+ uint32_t count_ru_histo;
|
|
||||||
+ struct rs_push_scatter push_scatter;
|
|
||||||
};
|
|
||||||
|
|
||||||
static uint32_t
|
|
||||||
@@ -734,75 +741,79 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|
||||||
|
|
||||||
radix_sort_vk_t *rs = cmd_buffer->device->meta_state.accel_struct_build.radix_sort;
|
|
||||||
|
|
||||||
- for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
- uint32_t count = bvh_states[i].node_count;
|
|
||||||
- uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
- uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
|
|
||||||
- uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
+ /*
|
|
||||||
+ * OVERVIEW
|
|
||||||
+ *
|
|
||||||
+ * 1. Pad the keyvals in `scatter_even`.
|
|
||||||
+ * 2. Zero the `histograms` and `partitions`.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 3. HISTOGRAM is dispatched before PREFIX.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 4. PREFIX is dispatched before the first SCATTER.
|
|
||||||
+ * --- BARRIER ---
|
|
||||||
+ * 5. One or more SCATTER dispatches.
|
|
||||||
+ *
|
|
||||||
+ * Note that the `partitions` buffer can be zeroed anytime before the first
|
|
||||||
+ * scatter.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+ /* How many passes? */
|
|
||||||
+ uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
|
|
||||||
+ uint32_t keyval_bits = keyval_bytes * 8;
|
|
||||||
+ uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
|
|
||||||
+ uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
|
|
||||||
|
|
||||||
- /* Anything to do? */
|
|
||||||
- if (!count) {
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
+ if (bvh_states[i].node_count)
|
|
||||||
+ bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
|
|
||||||
+ else
|
|
||||||
bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- /*
|
|
||||||
- * OVERVIEW
|
|
||||||
- *
|
|
||||||
- * 1. Pad the keyvals in `scatter_even`.
|
|
||||||
- * 2. Zero the `histograms` and `partitions`.
|
|
||||||
- * --- BARRIER ---
|
|
||||||
- * 3. HISTOGRAM is dispatched before PREFIX.
|
|
||||||
- * --- BARRIER ---
|
|
||||||
- * 4. PREFIX is dispatched before the first SCATTER.
|
|
||||||
- * --- BARRIER ---
|
|
||||||
- * 5. One or more SCATTER dispatches.
|
|
||||||
- *
|
|
||||||
- * Note that the `partitions` buffer can be zeroed anytime before the first
|
|
||||||
- * scatter.
|
|
||||||
- */
|
|
||||||
-
|
|
||||||
- /* How many passes? */
|
|
||||||
- uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
|
|
||||||
- uint32_t keyval_bits = keyval_bytes * 8;
|
|
||||||
- uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
|
|
||||||
- uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
|
|
||||||
+ /*
|
|
||||||
+ * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
|
|
||||||
+ *
|
|
||||||
+ * Pad fractional blocks with max-valued keyvals.
|
|
||||||
+ *
|
|
||||||
+ * Zero the histograms and partitions buffer.
|
|
||||||
+ *
|
|
||||||
+ * This assumes the partitions follow the histograms.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+ /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
|
|
||||||
+
|
|
||||||
+ /* How many scatter blocks? */
|
|
||||||
+ uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
|
|
||||||
+ uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * How many histogram blocks?
|
|
||||||
+ *
|
|
||||||
+ * Note that it's OK to have more max-valued digits counted by the histogram
|
|
||||||
+ * than sorted by the scatters because the sort is stable.
|
|
||||||
+ */
|
|
||||||
+ uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
|
|
||||||
+ uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
|
|
||||||
+
|
|
||||||
+ uint32_t pass_idx = (keyval_bytes - passes);
|
|
||||||
|
|
||||||
- /*
|
|
||||||
- * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
|
|
||||||
- *
|
|
||||||
- * Pad fractional blocks with max-valued keyvals.
|
|
||||||
- *
|
|
||||||
- * Zero the histograms and partitions buffer.
|
|
||||||
- *
|
|
||||||
- * This assumes the partitions follow the histograms.
|
|
||||||
- */
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
+ if (!bvh_states[i].node_count)
|
|
||||||
+ continue;
|
|
||||||
|
|
||||||
- /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
|
|
||||||
+ uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
|
|
||||||
- /* How many scatter blocks? */
|
|
||||||
- uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
|
|
||||||
- uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
|
|
||||||
- uint32_t scatter_blocks = (count + scatter_block_kvs - 1) / scatter_block_kvs;
|
|
||||||
- uint32_t count_ru_scatter = scatter_blocks * scatter_block_kvs;
|
|
||||||
+ bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
|
|
||||||
+ bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
|
|
||||||
|
|
||||||
- /*
|
|
||||||
- * How many histogram blocks?
|
|
||||||
- *
|
|
||||||
- * Note that it's OK to have more max-valued digits counted by the histogram
|
|
||||||
- * than sorted by the scatters because the sort is stable.
|
|
||||||
- */
|
|
||||||
- uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
|
|
||||||
- uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
|
|
||||||
- uint32_t histo_blocks = (count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
|
|
||||||
- uint32_t count_ru_histo = histo_blocks * histo_block_kvs;
|
|
||||||
+ bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
|
|
||||||
+ bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
|
|
||||||
|
|
||||||
/* Fill with max values */
|
|
||||||
- if (count_ru_histo > count) {
|
|
||||||
- radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + count * keyval_bytes,
|
|
||||||
- (count_ru_histo - count) * keyval_bytes, 0xFFFFFFFF);
|
|
||||||
+ if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
|
|
||||||
+ radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
|
|
||||||
+ (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
@@ -815,28 +826,35 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|
||||||
* Note that the last workgroup doesn't read/write a partition so it doesn't
|
|
||||||
* need to be initialized.
|
|
||||||
*/
|
|
||||||
- uint32_t histo_partition_count = passes + scatter_blocks - 1;
|
|
||||||
- uint32_t pass_idx = (keyval_bytes - passes);
|
|
||||||
+ uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
|
|
||||||
|
|
||||||
uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
|
|
||||||
radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
|
|
||||||
histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- /*
|
|
||||||
- * Pipeline: HISTOGRAM
|
|
||||||
- *
|
|
||||||
- * TODO(allanmac): All subgroups should try to process approximately the same
|
|
||||||
- * number of blocks in order to minimize tail effects. This was implemented
|
|
||||||
- * and reverted but should be reimplemented and benchmarked later.
|
|
||||||
- */
|
|
||||||
- vk_barrier_transfer_w_to_compute_r(commandBuffer);
|
|
||||||
+ /*
|
|
||||||
+ * Pipeline: HISTOGRAM
|
|
||||||
+ *
|
|
||||||
+ * TODO(allanmac): All subgroups should try to process approximately the same
|
|
||||||
+ * number of blocks in order to minimize tail effects. This was implemented
|
|
||||||
+ * and reverted but should be reimplemented and benchmarked later.
|
|
||||||
+ */
|
|
||||||
+ vk_barrier_transfer_w_to_compute_r(commandBuffer);
|
|
||||||
+
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
|
|
||||||
+
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
+ if (!bvh_states[i].node_count)
|
|
||||||
+ continue;
|
|
||||||
|
|
||||||
- uint64_t devaddr_histograms = internal_addr + rs->internal.histograms.offset;
|
|
||||||
+ uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
|
|
||||||
/* Dispatch histogram */
|
|
||||||
struct rs_push_histogram push_histogram = {
|
|
||||||
- .devaddr_histograms = devaddr_histograms,
|
|
||||||
+ .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
|
|
||||||
.devaddr_keyvals = keyvals_even_addr,
|
|
||||||
.passes = passes,
|
|
||||||
};
|
|
||||||
@@ -844,83 +862,87 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
|
||||||
radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
||||||
sizeof(push_histogram), &push_histogram);
|
|
||||||
|
|
||||||
- radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.histogram);
|
|
||||||
+ vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- vk_common_CmdDispatch(commandBuffer, histo_blocks, 1, 1);
|
|
||||||
+ /*
|
|
||||||
+ * Pipeline: PREFIX
|
|
||||||
+ *
|
|
||||||
+ * Launch one workgroup per pass.
|
|
||||||
+ */
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
|
|
||||||
- /*
|
|
||||||
- * Pipeline: PREFIX
|
|
||||||
- *
|
|
||||||
- * Launch one workgroup per pass.
|
|
||||||
- */
|
|
||||||
- vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
|
|
||||||
+
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; ++i) {
|
|
||||||
+ if (!bvh_states[i].node_count)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
|
|
||||||
struct rs_push_prefix push_prefix = {
|
|
||||||
- .devaddr_histograms = devaddr_histograms,
|
|
||||||
+ .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
|
|
||||||
};
|
|
||||||
|
|
||||||
radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
||||||
sizeof(push_prefix), &push_prefix);
|
|
||||||
|
|
||||||
- radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
|
|
||||||
-
|
|
||||||
vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- /* Pipeline: SCATTER */
|
|
||||||
- vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+ /* Pipeline: SCATTER */
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
|
|
||||||
- uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
- uint64_t devaddr_partitions = internal_addr + rs->internal.partitions.offset;
|
|
||||||
+ uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
|
|
||||||
- struct rs_push_scatter push_scatter = {
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; i++) {
|
|
||||||
+ uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
||||||
+ uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
|
|
||||||
+ uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
||||||
+
|
|
||||||
+ bvh_states[i].push_scatter = (struct rs_push_scatter){
|
|
||||||
.devaddr_keyvals_even = keyvals_even_addr,
|
|
||||||
.devaddr_keyvals_odd = keyvals_odd_addr,
|
|
||||||
- .devaddr_partitions = devaddr_partitions,
|
|
||||||
- .devaddr_histograms = devaddr_histograms + histogram_offset,
|
|
||||||
- .pass_offset = (pass_idx & 3) * RS_RADIX_LOG2,
|
|
||||||
+ .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
|
|
||||||
+ .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
|
|
||||||
};
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- {
|
|
||||||
- uint32_t pass_dword = pass_idx / 4;
|
|
||||||
+ bool is_even = true;
|
|
||||||
|
|
||||||
- radv_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.scatter[pass_dword].even,
|
|
||||||
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_scatter), &push_scatter);
|
|
||||||
+ while (true) {
|
|
||||||
+ uint32_t pass_dword = pass_idx / 4;
|
|
||||||
|
|
||||||
- radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
||||||
- rs->pipelines.named.scatter[pass_dword].even);
|
|
||||||
- }
|
|
||||||
+ /* Bind new pipeline */
|
|
||||||
+ VkPipeline p =
|
|
||||||
+ is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
|
|
||||||
+ radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
|
|
||||||
|
|
||||||
- bool is_even = true;
|
|
||||||
+ /* Update push constants that changed */
|
|
||||||
+ VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even //
|
|
||||||
+ : rs->pipeline_layouts.named.scatter[pass_dword].odd;
|
|
||||||
|
|
||||||
- while (true) {
|
|
||||||
- vk_common_CmdDispatch(commandBuffer, scatter_blocks, 1, 1);
|
|
||||||
+ for (uint32_t i = 0; i < infoCount; i++) {
|
|
||||||
+ if (!bvh_states[i].node_count)
|
|
||||||
+ continue;
|
|
||||||
|
|
||||||
- /* Continue? */
|
|
||||||
- if (++pass_idx >= keyval_bytes)
|
|
||||||
- break;
|
|
||||||
+ bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
|
|
||||||
|
|
||||||
- vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
+ radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
|
|
||||||
+ &bvh_states[i].push_scatter);
|
|
||||||
|
|
||||||
- is_even ^= true;
|
|
||||||
- push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
- push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
|
|
||||||
+ vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
|
|
||||||
|
|
||||||
- uint32_t pass_dword = pass_idx / 4;
|
|
||||||
+ bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- /* Update push constants that changed */
|
|
||||||
- VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
|
|
||||||
- : rs->pipeline_layouts.named.scatter[pass_dword].odd;
|
|
||||||
- radv_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
- offsetof(struct rs_push_scatter, devaddr_histograms),
|
|
||||||
- sizeof(push_scatter.devaddr_histograms) + sizeof(push_scatter.pass_offset),
|
|
||||||
- &push_scatter.devaddr_histograms);
|
|
||||||
+ /* Continue? */
|
|
||||||
+ if (++pass_idx >= keyval_bytes)
|
|
||||||
+ break;
|
|
||||||
|
|
||||||
- /* Bind new pipeline */
|
|
||||||
- VkPipeline p =
|
|
||||||
- is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
|
|
||||||
+ vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
||||||
|
|
||||||
- radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
|
|
||||||
- }
|
|
||||||
+ is_even ^= true;
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd_buffer->state.flush_bits |= flush_bits;
|
|
||||||
--
|
|
||||||
GitLab
|
|
34
debian/patches/24789.patch
vendored
34
debian/patches/24789.patch
vendored
@ -1,34 +0,0 @@
|
|||||||
From 87f95fa7f24415f51391f128adf7f048358be226 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Friedrich Vock <friedrich.vock@gmx.de>
|
|
||||||
Date: Sat, 19 Aug 2023 11:00:45 +0200
|
|
||||||
Subject: [PATCH] nir/load_store_vectorize: Handle intrinsics with constant
|
|
||||||
base
|
|
||||||
|
|
||||||
This includes nir_load_stack and nir_store_stack, which are vectorized
|
|
||||||
in nir_lower_shader_calls. If not adjusted, we end up loading from
|
|
||||||
the wrong base.
|
|
||||||
|
|
||||||
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9596
|
|
||||||
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9587
|
|
||||||
Cc: mesa-stable
|
|
||||||
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24789>
|
|
||||||
---
|
|
||||||
src/compiler/nir/nir_opt_load_store_vectorize.c | 2 ++
|
|
||||||
1 file changed, 2 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
|
|
||||||
index 73e6ff6d8878..4bea8fbea6ff 100644
|
|
||||||
--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
|
|
||||||
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
|
|
||||||
@@ -756,6 +756,8 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
|
|
||||||
|
|
||||||
nir_intrinsic_set_range_base(first->intrin, low_base);
|
|
||||||
nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base);
|
|
||||||
+ } else if (nir_intrinsic_has_base(first->intrin) && info->base_src == -1 && info->deref_src == -1) {
|
|
||||||
+ nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
|
|
||||||
}
|
|
||||||
|
|
||||||
first->key = low->key;
|
|
||||||
--
|
|
||||||
GitLab
|
|
||||||
|
|
3
debian/patches/series
vendored
3
debian/patches/series
vendored
@ -1,6 +1,3 @@
|
|||||||
#07_gallium-fix-build-failure-on-powerpcspe.diff
|
#07_gallium-fix-build-failure-on-powerpcspe.diff
|
||||||
#path_max.diff
|
#path_max.diff
|
||||||
#src_glx_dri_common.h.diff
|
#src_glx_dri_common.h.diff
|
||||||
#21929.patch
|
|
||||||
#24720.patch
|
|
||||||
#24789.patch
|
|
||||||
|
32
debian/rules
vendored
32
debian/rules
vendored
@ -13,12 +13,10 @@ DEB_HOST_ARCH_OS ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_OS)
|
|||||||
DEB_HOST_ARCH_CPU ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU)
|
DEB_HOST_ARCH_CPU ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU)
|
||||||
|
|
||||||
# for finding the correct llvm-config when meson doesn't know about it yet
|
# for finding the correct llvm-config when meson doesn't know about it yet
|
||||||
LLVM_VERSION = 15
|
LLVM_VERSION = 17
|
||||||
export PATH:=/usr/lib/llvm-$(LLVM_VERSION)/bin/:$(PATH)
|
export PATH:=/usr/lib/llvm-$(LLVM_VERSION)/bin/:$(PATH)
|
||||||
|
|
||||||
export DEB_BUILD_MAINT_OPTIONS=optimize=-lto
|
export DEB_BUILD_MAINT_OPTIONS=optimize=-lto
|
||||||
# enable LTO everywhere:
|
|
||||||
#confflags += -Db_lto=true
|
|
||||||
|
|
||||||
ifeq (,$(filter $(DEB_HOST_ARCH), armhf ppc64el sh3 sh4))
|
ifeq (,$(filter $(DEB_HOST_ARCH), armhf ppc64el sh3 sh4))
|
||||||
buildflags = \
|
buildflags = \
|
||||||
@ -52,11 +50,9 @@ confflags_SSE2 = -Dsse2=true
|
|||||||
|
|
||||||
LLVM_ARCHS = amd64 arm64 armel armhf i386 mips64el mipsel powerpc ppc64 ppc64el riscv64 s390x sparc64 x32
|
LLVM_ARCHS = amd64 arm64 armel armhf i386 mips64el mipsel powerpc ppc64 ppc64el riscv64 s390x sparc64 x32
|
||||||
RUSTICL_ARCHS = amd64 arm64 armel armhf mips64el mipsel ppc64el s390x
|
RUSTICL_ARCHS = amd64 arm64 armel armhf mips64el mipsel ppc64el s390x
|
||||||
#ifeq ($(DEB_DISTRIBUTION), jammy)
|
|
||||||
# RUSTICL_ARCHS = arm64
|
|
||||||
#else
|
|
||||||
VALGRIND_ARCHS = amd64 arm64 armhf i386 mips64el mipsel powerpc ppc64 ppc64el s390x
|
VALGRIND_ARCHS = amd64 arm64 armhf i386 mips64el mipsel powerpc ppc64 ppc64el s390x
|
||||||
WINE_ARCHS = amd64 arm64 armel armhf i386 powerpc
|
WINE_ARCHS = amd64 arm64 armel armhf i386 powerpc
|
||||||
|
WSL_ARCHS = amd64 arm64
|
||||||
|
|
||||||
# hurd doesn't do direct rendering
|
# hurd doesn't do direct rendering
|
||||||
ifeq ($(DEB_HOST_ARCH_OS), hurd)
|
ifeq ($(DEB_HOST_ARCH_OS), hurd)
|
||||||
@ -74,7 +70,12 @@ else
|
|||||||
# radv/lavapipe needs LLVM and the Vulkan loader, so only build on the subset of
|
# radv/lavapipe needs LLVM and the Vulkan loader, so only build on the subset of
|
||||||
# arches where we have LLVM enabled and where the Vulkan loader is built.
|
# arches where we have LLVM enabled and where the Vulkan loader is built.
|
||||||
ifneq (,$(filter $(DEB_HOST_ARCH), amd64 arm64 armel armhf i386 mips64el mipsel powerpc ppc64 ppc64el s390x sparc64))
|
ifneq (,$(filter $(DEB_HOST_ARCH), amd64 arm64 armel armhf i386 mips64el mipsel powerpc ppc64 ppc64el s390x sparc64))
|
||||||
VULKAN_DRIVERS += amd swrast virtio nouveau-experimental
|
VULKAN_DRIVERS += amd swrast virtio
|
||||||
|
# ifeq ($(DEB_DISTRIBUTION), noble)
|
||||||
|
# ifeq (,$(filter $(DEB_HOST_ARCH), i386))
|
||||||
|
# VULKAN_DRIVERS += nouveau-experimental
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Only enable amd on riscv64, swrast needs CPU JIT support which doesn't work properly yet
|
# Only enable amd on riscv64, swrast needs CPU JIT support which doesn't work properly yet
|
||||||
@ -90,7 +91,8 @@ else
|
|||||||
|
|
||||||
# Freedreno requires arm in addition
|
# Freedreno requires arm in addition
|
||||||
ifneq (,$(filter arm arm64,$(DEB_HOST_ARCH_CPU)))
|
ifneq (,$(filter arm arm64,$(DEB_HOST_ARCH_CPU)))
|
||||||
GALLIUM_DRIVERS += freedreno asahi
|
# GALLIUM_DRIVERS += freedreno asahi #9697
|
||||||
|
GALLIUM_DRIVERS += freedreno
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# etnaviv, tegra, vc4 and v3d kernel support are only available on armhf and arm64
|
# etnaviv, tegra, vc4 and v3d kernel support are only available on armhf and arm64
|
||||||
@ -123,9 +125,9 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# WSL supports only amd64 and arm64
|
# WSL supports only amd64 and arm64
|
||||||
ifneq (,$(filter amd64 arm64,$(DEB_HOST_ARCH)))
|
ifneq (,$(filter $(DEB_HOST_ARCH), $(WSL_ARCHS)))
|
||||||
GALLIUM_DRIVERS += d3d12
|
GALLIUM_DRIVERS += d3d12
|
||||||
# VULKAN_DRIVERS += microsoft-experimental
|
VULKAN_DRIVERS += microsoft-experimental
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@ -153,8 +155,12 @@ else
|
|||||||
|
|
||||||
# Build rusticl for archs where rustc is available
|
# Build rusticl for archs where rustc is available
|
||||||
ifneq (,$(filter $(DEB_HOST_ARCH), $(RUSTICL_ARCHS)))
|
ifneq (,$(filter $(DEB_HOST_ARCH), $(RUSTICL_ARCHS)))
|
||||||
|
ifeq ($(DEB_DISTRIBUTION), jammy)
|
||||||
|
confflags_GALLIUM += -Dgallium-rusticl=false
|
||||||
|
else
|
||||||
confflags_GALLIUM += -Dgallium-rusticl=true
|
confflags_GALLIUM += -Dgallium-rusticl=true
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# nine makes sense only on archs that build wine
|
# nine makes sense only on archs that build wine
|
||||||
ifneq (,$(filter $(DEB_HOST_ARCH), $(WINE_ARCHS)))
|
ifneq (,$(filter $(DEB_HOST_ARCH), $(WINE_ARCHS)))
|
||||||
@ -166,14 +172,10 @@ else
|
|||||||
|
|
||||||
ifeq (,$(filter pkg.mesa.nolibva,$(DEB_BUILD_PROFILES)))
|
ifeq (,$(filter pkg.mesa.nolibva,$(DEB_BUILD_PROFILES)))
|
||||||
confflags_GALLIUM += -Dgallium-va=enabled
|
confflags_GALLIUM += -Dgallium-va=enabled
|
||||||
confflags_GALLIUM += -Dvideo-codecs="vc1dec, h264dec, h264enc, h265dec, h265enc"
|
confflags_GALLIUM += -Dvideo-codecs="vc1dec, h264dec, h264enc, h265dec, h265enc, vp9dec, av1dec, av1enc"
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(DEB_HOST_ARCH), i386)
|
|
||||||
confflags_SSE2 = -Dsse2=false
|
|
||||||
endif
|
|
||||||
|
|
||||||
empty:=
|
empty:=
|
||||||
space := $(empty) $(empty)
|
space := $(empty) $(empty)
|
||||||
comma := ,
|
comma := ,
|
||||||
|
2
main.sh
2
main.sh
@ -8,7 +8,7 @@ cd ./mesa-git
|
|||||||
git submodule update --init
|
git submodule update --init
|
||||||
sed -i ' 1 s/.*/& - PikaOS YellowBirb Mesa Git /' ./VERSION
|
sed -i ' 1 s/.*/& - PikaOS YellowBirb Mesa Git /' ./VERSION
|
||||||
touch debian/changelog
|
touch debian/changelog
|
||||||
echo -e "mesa-git (23.3-99pika"$(date '+%Y%m%d')".git.1."$(git rev-parse --short HEAD)") lunar; urgency=medium\n\n * New GIT Release\n\n -- Ward Nakchbandi <hotrod.master@hotmail.com> Sat, 01 Oct 2022 14:50:00 +0200" > debian/changelog
|
echo -e "mesa-git (24.0-100pika"$(date '+%Y%m%d')".git.1."$(git rev-parse --short HEAD)") lunar; urgency=medium\n\n * New GIT Release\n\n -- Ward Nakchbandi <hotrod.master@hotmail.com> Sat, 01 Oct 2022 14:50:00 +0200" > debian/changelog
|
||||||
|
|
||||||
# Get build deps
|
# Get build deps
|
||||||
apt-get build-dep ./ -y
|
apt-get build-dep ./ -y
|
||||||
|
@ -8,7 +8,7 @@ cd ./mesa-git
|
|||||||
git submodule update --init
|
git submodule update --init
|
||||||
sed -i ' 1 s/.*/& - PikaOS YellowBirb Mesa Git /' ./VERSION
|
sed -i ' 1 s/.*/& - PikaOS YellowBirb Mesa Git /' ./VERSION
|
||||||
touch debian/changelog
|
touch debian/changelog
|
||||||
echo -e "mesa-git (23.3-99pika"$(date '+%Y%m%d')".git.1."$(git rev-parse --short HEAD)") lunar; urgency=medium\n\n * New GIT Release\n\n -- Ward Nakchbandi <hotrod.master@hotmail.com> Sat, 01 Oct 2022 14:50:00 +0200" > debian/changelog
|
echo -e "mesa-git (24.0-99pika"$(date '+%Y%m%d')".git.1."$(git rev-parse --short HEAD)") lunar; urgency=medium\n\n * New GIT Release\n\n -- Ward Nakchbandi <hotrod.master@hotmail.com> Sat, 01 Oct 2022 14:50:00 +0200" > debian/changelog
|
||||||
|
|
||||||
# Get build deps
|
# Get build deps
|
||||||
apt-get build-dep ./ -y
|
apt-get build-dep ./ -y
|
||||||
|
Loading…
Reference in New Issue
Block a user