Create 21929.patch
This commit is contained in:
parent
f846ef8ffd
commit
081aaab17e
669
debian/patches/21929.patch
vendored
Normal file
669
debian/patches/21929.patch
vendored
Normal file
@ -0,0 +1,669 @@
|
||||
From b52779b8fa24d3e6f23b4db4cfa4116f1149d6eb Mon Sep 17 00:00:00 2001
|
||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
||||
Date: Sat, 24 Jun 2023 15:49:13 +0200
|
||||
Subject: [PATCH 1/4] radv: Add rt.monolithic to radv_pipeline_key
|
||||
|
||||
---
|
||||
src/amd/vulkan/radv_shader.h | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
|
||||
index 9156f28f34dc..a8ea3aab13d4 100644
|
||||
--- a/src/amd/vulkan/radv_shader.h
|
||||
+++ b/src/amd/vulkan/radv_shader.h
|
||||
@@ -135,6 +135,10 @@ struct radv_pipeline_key {
|
||||
|
||||
bool line_smooth_enabled;
|
||||
} ps;
|
||||
+
|
||||
+ struct {
|
||||
+ bool monolithic;
|
||||
+ } rt;
|
||||
};
|
||||
|
||||
struct radv_nir_compiler_options {
|
||||
--
|
||||
GitLab
|
||||
|
||||
|
||||
From 69761a71f9dbfefe9a8bf13a310ae7004962e786 Mon Sep 17 00:00:00 2001
|
||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
||||
Date: Sat, 24 Jun 2023 15:46:51 +0200
|
||||
Subject: [PATCH 2/4] radv/rt: Store NIR shaders separately
|
||||
|
||||
In order to compile monolithic shaders with pipeline libraries, we need
|
||||
to keep the NIR around for inlining recursive stages.
|
||||
---
|
||||
src/amd/vulkan/radv_pipeline_cache.c | 9 ++--
|
||||
src/amd/vulkan/radv_pipeline_rt.c | 81 ++++++++++++++++++++--------
|
||||
src/amd/vulkan/radv_private.h | 1 +
|
||||
src/amd/vulkan/radv_rt_shader.c | 7 ++-
|
||||
4 files changed, 67 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
|
||||
index 5bbbc755ae11..7e4c6f889813 100644
|
||||
--- a/src/amd/vulkan/radv_pipeline_cache.c
|
||||
+++ b/src/amd/vulkan/radv_pipeline_cache.c
|
||||
@@ -481,11 +481,12 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip
|
||||
pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]);
|
||||
|
||||
for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
|
||||
- if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
|
||||
+ if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i]))
|
||||
pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base;
|
||||
- } else if (is_library) {
|
||||
- pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
|
||||
- complete &= pipeline->stages[i].shader != NULL;
|
||||
+
|
||||
+ if (is_library) {
|
||||
+ pipeline->stages[i].nir = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1);
|
||||
+ complete &= pipeline->stages[i].nir != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
index c86ea3a50846..21be900bb3dd 100644
|
||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
@@ -263,7 +263,10 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, st
|
||||
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
|
||||
struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
|
||||
for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
|
||||
- stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
|
||||
+ stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir);
|
||||
+ if (library_pipeline->stages[j].shader)
|
||||
+ stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader);
|
||||
+
|
||||
stages[idx].stage = library_pipeline->stages[j].stage;
|
||||
stages[idx].stack_size = library_pipeline->stages[j].stack_size;
|
||||
memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH);
|
||||
@@ -462,45 +465,71 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
||||
return VK_PIPELINE_COMPILE_REQUIRED;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
- struct radv_ray_tracing_stage *stages = pipeline->stages;
|
||||
+ struct radv_ray_tracing_stage *rt_stages = pipeline->stages;
|
||||
+
|
||||
+ struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage));
|
||||
+ if (!stages)
|
||||
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
|
||||
+ if (rt_stages[idx].shader || rt_stages[idx].nir)
|
||||
+ continue;
|
||||
+
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
- struct radv_shader_stage stage;
|
||||
- radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage);
|
||||
|
||||
- if (stages[idx].shader)
|
||||
- goto feedback;
|
||||
+ struct radv_shader_stage *stage = &stages[idx];
|
||||
+ radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, stage);
|
||||
|
||||
/* precompile the shader */
|
||||
- stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
|
||||
+ stage->nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout);
|
||||
+
|
||||
+ /* Cases in which we need to keep around the NIR:
|
||||
+ * - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader.
|
||||
+ * - non-recursive: Non-recursive shaders are inlined into the traversal shader.
|
||||
+ * - monolithic: Callable shaders (chit/miss) are inlined into the raygen shader.
|
||||
+ */
|
||||
+ bool compiled = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]);
|
||||
+ bool library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
|
||||
+ bool nir_needed = library || !compiled || (key->rt.monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN);
|
||||
+ nir_needed &= !rt_stages[idx].nir;
|
||||
+ if (nir_needed) {
|
||||
+ rt_stages[idx].stack_size = stage->nir->scratch_size;
|
||||
+ rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1,
|
||||
+ !key->optimisations_disabled);
|
||||
+ }
|
||||
|
||||
- if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
|
||||
- uint32_t stack_size = 0;
|
||||
+ stage->feedback.duration = os_time_get_nano() - stage_start;
|
||||
+ }
|
||||
|
||||
+ for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
|
||||
+ int64_t stage_start = os_time_get_nano();
|
||||
+ struct radv_shader_stage *stage = &stages[idx];
|
||||
+
|
||||
+ /* Cases in which we need to compile the shader (raygen/callable/chit/miss):
|
||||
+ * TODO: - monolithic: Force compilation if there already is a compiled shader
|
||||
+ * since pipeline library shaders use separate compilation.
|
||||
+ * - separate: Compile any recursive stage if wasn't compiled yet.
|
||||
+ */
|
||||
+ bool shader_needed = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]) && !rt_stages[idx].shader;
|
||||
+ if (shader_needed) {
|
||||
+ uint32_t stack_size = 0;
|
||||
struct radv_serialized_shader_arena_block *replay_block =
|
||||
capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL;
|
||||
|
||||
struct radv_shader *shader;
|
||||
result =
|
||||
- radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &stage, &stack_size, replay_block, &shader);
|
||||
- stages[idx].stack_size = stack_size;
|
||||
- stages[idx].shader = shader ? &shader->base : NULL;
|
||||
- } else {
|
||||
- stages[idx].stack_size = stage.nir->scratch_size;
|
||||
- stages[idx].shader =
|
||||
- radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, stages[idx].sha1, !key->optimisations_disabled);
|
||||
- }
|
||||
- ralloc_free(stage.nir);
|
||||
+ radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, stage, &stack_size, replay_block, &shader);
|
||||
+ if (result != VK_SUCCESS)
|
||||
+ goto cleanup;
|
||||
|
||||
- if (result != VK_SUCCESS)
|
||||
- return result;
|
||||
+ rt_stages[idx].stack_size = stack_size;
|
||||
+ rt_stages[idx].shader = shader ? &shader->base : NULL;
|
||||
+ }
|
||||
|
||||
- feedback:
|
||||
if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) {
|
||||
assert(idx < creation_feedback->pipelineStageCreationFeedbackCount);
|
||||
- stage.feedback.duration = os_time_get_nano() - stage_start;
|
||||
- creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage.feedback;
|
||||
+ stage->feedback.duration = os_time_get_nano() - stage_start;
|
||||
+ creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -527,6 +556,10 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
||||
result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &traversal_stage, NULL, NULL,
|
||||
&pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
|
||||
|
||||
+cleanup:
|
||||
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
|
||||
+ ralloc_free(stages[i].nir);
|
||||
+ free(stages);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -732,6 +765,8 @@ void
|
||||
radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
for (unsigned i = 0; i < pipeline->stage_count; i++) {
|
||||
+ if (pipeline->stages[i].nir)
|
||||
+ vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir);
|
||||
if (pipeline->stages[i].shader)
|
||||
vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].shader);
|
||||
}
|
||||
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
|
||||
index d51cbd3e173f..06755bba70b9 100644
|
||||
--- a/src/amd/vulkan/radv_private.h
|
||||
+++ b/src/amd/vulkan/radv_private.h
|
||||
@@ -2364,6 +2364,7 @@ struct radv_ray_tracing_group {
|
||||
};
|
||||
|
||||
struct radv_ray_tracing_stage {
|
||||
+ struct vk_pipeline_cache_object *nir;
|
||||
struct vk_pipeline_cache_object *shader;
|
||||
gl_shader_stage stage;
|
||||
uint32_t stack_size;
|
||||
diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
|
||||
index cc92beebc350..3def324bcccf 100644
|
||||
--- a/src/amd/vulkan/radv_rt_shader.c
|
||||
+++ b/src/amd/vulkan/radv_rt_shader.c
|
||||
@@ -1132,7 +1132,7 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers
|
||||
if (is_dup)
|
||||
continue;
|
||||
|
||||
- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader);
|
||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].nir);
|
||||
assert(nir_stage);
|
||||
|
||||
insert_rt_case(b, nir_stage, vars, sbt_idx, data->pipeline->groups[i].handle.any_hit_index);
|
||||
@@ -1262,13 +1262,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
||||
if (is_dup)
|
||||
continue;
|
||||
|
||||
- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader);
|
||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].nir);
|
||||
assert(nir_stage);
|
||||
|
||||
nir_shader *any_hit_stage = NULL;
|
||||
if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) {
|
||||
- any_hit_stage =
|
||||
- radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader);
|
||||
+ any_hit_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].nir);
|
||||
assert(any_hit_stage);
|
||||
|
||||
/* reserve stack size for any_hit before it is inlined */
|
||||
--
|
||||
GitLab
|
||||
|
||||
|
||||
From f3715521ae26fca036ba0a9bb2c700b3c9a59a10 Mon Sep 17 00:00:00 2001
|
||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
||||
Date: Mon, 21 Aug 2023 13:32:53 +0200
|
||||
Subject: [PATCH 3/4] radv/rt: Add monolithic raygen lowering
|
||||
|
||||
Ray traversal is inlined to allow for constant folding and avoid
|
||||
spilling.
|
||||
---
|
||||
src/amd/vulkan/radv_pipeline_rt.c | 11 +-
|
||||
src/amd/vulkan/radv_rt_shader.c | 276 ++++++++++++++++++++++++++----
|
||||
src/amd/vulkan/radv_shader.h | 3 +-
|
||||
3 files changed, 248 insertions(+), 42 deletions(-)
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
index 21be900bb3dd..00bdd6244432 100644
|
||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
@@ -356,9 +356,8 @@ move_rt_instructions(nir_shader *shader)
|
||||
static VkResult
|
||||
radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key,
|
||||
- const struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage,
|
||||
- uint32_t *stack_size, struct radv_serialized_shader_arena_block *replay_block,
|
||||
- struct radv_shader **out_shader)
|
||||
+ struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, uint32_t *stack_size,
|
||||
+ struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
|
||||
{
|
||||
struct radv_shader_binary *binary;
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
|
||||
@@ -384,7 +383,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
uint32_t num_resume_shaders = 0;
|
||||
nir_shader **resume_shaders = NULL;
|
||||
|
||||
- if (stage->stage != MESA_SHADER_INTERSECTION) {
|
||||
+ bool monolithic_raygen = pipeline_key->rt.monolithic && stage->stage == MESA_SHADER_RAYGEN;
|
||||
+ if (!monolithic_raygen && stage->stage != MESA_SHADER_INTERSECTION) {
|
||||
nir_builder b = nir_builder_at(nir_after_cf_list(&nir_shader_get_entrypoint(stage->nir)->body));
|
||||
nir_rt_return_amd(&b);
|
||||
|
||||
@@ -411,7 +411,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
for (uint32_t i = 0; i < num_shaders; i++) {
|
||||
struct radv_shader_stage temp_stage = *stage;
|
||||
temp_stage.nir = shaders[i];
|
||||
- radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0);
|
||||
+ radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
|
||||
+ pipeline, pipeline_key);
|
||||
radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled);
|
||||
radv_postprocess_nir(device, pipeline_key, &temp_stage);
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c
|
||||
index 3def324bcccf..362d91859700 100644
|
||||
--- a/src/amd/vulkan/radv_rt_shader.c
|
||||
+++ b/src/amd/vulkan/radv_rt_shader.c
|
||||
@@ -1306,6 +1306,87 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
+static void
|
||||
+visit_closest_hit_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
|
||||
+ struct rt_variables *vars)
|
||||
+{
|
||||
+ nir_def *sbt_idx = nir_load_var(b, vars->idx);
|
||||
+
|
||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
|
||||
+ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
|
||||
+
|
||||
+ for (unsigned i = 0; i < pipeline->group_count; ++i) {
|
||||
+ struct radv_ray_tracing_group *group = &pipeline->groups[i];
|
||||
+
|
||||
+ unsigned shader_id = VK_SHADER_UNUSED_KHR;
|
||||
+ if (group->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
|
||||
+ shader_id = group->recursive_shader;
|
||||
+
|
||||
+ if (shader_id == VK_SHADER_UNUSED_KHR)
|
||||
+ continue;
|
||||
+
|
||||
+ /* Avoid emitting stages with the same shaders/handles multiple times. */
|
||||
+ bool is_dup = false;
|
||||
+ for (unsigned j = 0; j < i; ++j)
|
||||
+ if (pipeline->groups[j].handle.closest_hit_index == pipeline->groups[i].handle.closest_hit_index)
|
||||
+ is_dup = true;
|
||||
+
|
||||
+ if (is_dup)
|
||||
+ continue;
|
||||
+
|
||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
|
||||
+ assert(nir_stage);
|
||||
+
|
||||
+ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.closest_hit_index);
|
||||
+ ralloc_free(nir_stage);
|
||||
+ }
|
||||
+
|
||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR))
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+visit_miss_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline,
|
||||
+ struct rt_variables *vars)
|
||||
+{
|
||||
+ nir_def *sbt_idx = nir_load_var(b, vars->idx);
|
||||
+
|
||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
|
||||
+ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
|
||||
+
|
||||
+ for (unsigned i = 0; i < pipeline->group_count; ++i) {
|
||||
+ struct radv_ray_tracing_group *group = &pipeline->groups[i];
|
||||
+
|
||||
+ unsigned shader_id = VK_SHADER_UNUSED_KHR;
|
||||
+ if (group->type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
|
||||
+ shader_id = group->recursive_shader;
|
||||
+
|
||||
+ if (shader_id == VK_SHADER_UNUSED_KHR)
|
||||
+ continue;
|
||||
+
|
||||
+ if (pipeline->stages[shader_id].stage != MESA_SHADER_MISS)
|
||||
+ continue;
|
||||
+
|
||||
+ /* Avoid emitting stages with the same shaders/handles multiple times. */
|
||||
+ bool is_dup = false;
|
||||
+ for (unsigned j = 0; j < i; ++j)
|
||||
+ if (pipeline->groups[j].handle.general_index == pipeline->groups[i].handle.general_index)
|
||||
+ is_dup = true;
|
||||
+
|
||||
+ if (is_dup)
|
||||
+ continue;
|
||||
+
|
||||
+ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir);
|
||||
+ assert(nir_stage);
|
||||
+
|
||||
+ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.general_index);
|
||||
+ ralloc_free(nir_stage);
|
||||
+ }
|
||||
+
|
||||
+ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR))
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
static void
|
||||
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
@@ -1414,25 +1495,47 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
||||
/* Register storage for hit attributes */
|
||||
nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
|
||||
|
||||
- for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
||||
- hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
|
||||
+ if (!key->rt.monolithic || b->shader->info.stage != MESA_SHADER_RAYGEN) {
|
||||
+ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
||||
+ hit_attribs[i] =
|
||||
+ nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
|
||||
|
||||
- lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
|
||||
+ lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
|
||||
+ }
|
||||
|
||||
/* Initialize follow-up shader. */
|
||||
nir_push_if(b, nir_load_var(b, trav_vars.hit));
|
||||
{
|
||||
- for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
|
||||
- nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
|
||||
- nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
|
||||
- nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
|
||||
- nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
|
||||
+ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
|
||||
+ load_sbt_entry(b, vars, nir_load_var(b, vars->idx), SBT_HIT, SBT_CLOSEST_HIT_IDX);
|
||||
+
|
||||
+ nir_def *should_return =
|
||||
+ nir_test_mask(b, nir_load_var(b, vars->cull_mask_and_flags), SpvRayFlagsSkipClosestHitShaderKHRMask);
|
||||
+
|
||||
+ /* should_return is set if we had a hit but we won't be calling the closest hit
|
||||
+ * shader and hence need to return immediately to the calling shader. */
|
||||
+ nir_push_if(b, nir_inot(b, should_return));
|
||||
+ visit_closest_hit_shaders(device, b, pipeline, vars);
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ } else {
|
||||
+ for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
|
||||
+ nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
|
||||
+ nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
|
||||
+ nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
|
||||
+ nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
|
||||
+ }
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
- /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
|
||||
- * for miss shaders if none of the rays miss. */
|
||||
- nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
|
||||
+ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) {
|
||||
+ load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, SBT_GENERAL_IDX);
|
||||
+
|
||||
+ visit_miss_shaders(device, b, pipeline, vars);
|
||||
+ } else {
|
||||
+ /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer
|
||||
+ * for miss shaders if none of the rays miss. */
|
||||
+ nir_execute_miss_amd(b, nir_load_var(b, vars->tmax));
|
||||
+ }
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
@@ -1477,6 +1580,98 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
+struct lower_rt_instruction_monolithic_state {
|
||||
+ struct radv_device *device;
|
||||
+ struct radv_ray_tracing_pipeline *pipeline;
|
||||
+ const struct radv_pipeline_key *key;
|
||||
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo;
|
||||
+
|
||||
+ struct rt_variables *vars;
|
||||
+};
|
||||
+
|
||||
+static bool
|
||||
+lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
|
||||
+{
|
||||
+ if (instr->type != nir_instr_type_intrinsic)
|
||||
+ return false;
|
||||
+
|
||||
+ b->cursor = nir_after_instr(instr);
|
||||
+
|
||||
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
+
|
||||
+ struct lower_rt_instruction_monolithic_state *state = data;
|
||||
+ struct rt_variables *vars = state->vars;
|
||||
+
|
||||
+ switch (intr->intrinsic) {
|
||||
+ case nir_intrinsic_execute_callable:
|
||||
+ unreachable("nir_intrinsic_execute_callable");
|
||||
+ case nir_intrinsic_trace_ray: {
|
||||
+ nir_store_var(b, vars->arg, nir_iadd_imm(b, intr->src[10].ssa, -b->shader->scratch_size), 1);
|
||||
+
|
||||
+ /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */
|
||||
+ nir_store_var(b, vars->accel_struct, intr->src[0].ssa, 0x1);
|
||||
+ nir_store_var(b, vars->cull_mask_and_flags, nir_ior(b, nir_ishl_imm(b, intr->src[2].ssa, 24), intr->src[1].ssa),
|
||||
+ 0x1);
|
||||
+ nir_store_var(b, vars->sbt_offset, nir_iand_imm(b, intr->src[3].ssa, 0xf), 0x1);
|
||||
+ nir_store_var(b, vars->sbt_stride, nir_iand_imm(b, intr->src[4].ssa, 0xf), 0x1);
|
||||
+ nir_store_var(b, vars->miss_index, nir_iand_imm(b, intr->src[5].ssa, 0xffff), 0x1);
|
||||
+ nir_store_var(b, vars->origin, intr->src[6].ssa, 0x7);
|
||||
+ nir_store_var(b, vars->tmin, intr->src[7].ssa, 0x1);
|
||||
+ nir_store_var(b, vars->direction, intr->src[8].ssa, 0x7);
|
||||
+ nir_store_var(b, vars->tmax, intr->src[9].ssa, 0x1);
|
||||
+
|
||||
+ nir_def *stack_ptr = nir_load_var(b, vars->stack_ptr);
|
||||
+ nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
|
||||
+
|
||||
+ radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, state->key, b, vars);
|
||||
+ b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size *
|
||||
+ MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
||||
+
|
||||
+ nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
|
||||
+
|
||||
+ nir_instr_remove(instr);
|
||||
+ return true;
|
||||
+ }
|
||||
+ case nir_intrinsic_rt_resume:
|
||||
+ unreachable("nir_intrinsic_rt_resume");
|
||||
+ case nir_intrinsic_rt_return_amd:
|
||||
+ unreachable("nir_intrinsic_rt_return_amd");
|
||||
+ case nir_intrinsic_execute_closest_hit_amd:
|
||||
+ unreachable("nir_intrinsic_execute_closest_hit_amd");
|
||||
+ case nir_intrinsic_execute_miss_amd:
|
||||
+ unreachable("nir_intrinsic_execute_miss_amd");
|
||||
+ default:
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+lower_rt_instructions_monolithic(nir_shader *shader, struct radv_device *device,
|
||||
+ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key,
|
||||
+ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct rt_variables *vars)
|
||||
+{
|
||||
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
+
|
||||
+ struct lower_rt_instruction_monolithic_state state = {
|
||||
+ .device = device,
|
||||
+ .pipeline = pipeline,
|
||||
+ .key = key,
|
||||
+ .pCreateInfo = pCreateInfo,
|
||||
+ .vars = vars,
|
||||
+ };
|
||||
+
|
||||
+ nir_shader_instructions_pass(shader, lower_rt_instruction_monolithic, nir_metadata_none, &state);
|
||||
+ nir_index_ssa_defs(impl);
|
||||
+
|
||||
+ /* Register storage for hit attributes */
|
||||
+ nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)];
|
||||
+
|
||||
+ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++)
|
||||
+ hit_attribs[i] = nir_local_variable_create(impl, glsl_uint_type(), "ahit_attrib");
|
||||
+
|
||||
+ lower_hit_attribs(shader, hit_attribs, 0);
|
||||
+}
|
||||
+
|
||||
/** Select the next shader based on priorities:
|
||||
*
|
||||
* Detect the priority of the shader stage by the lowest bits in the address (low to high):
|
||||
@@ -1517,13 +1712,18 @@ select_next_shader(nir_builder *b, nir_def *shader_addr, unsigned wave_size)
|
||||
void
|
||||
radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
||||
const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size,
|
||||
- bool resume_shader)
|
||||
+ bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
+ const struct radv_pipeline_key *key)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
const VkPipelineCreateFlagBits2KHR create_flags = radv_get_pipeline_create_flags(pCreateInfo);
|
||||
|
||||
struct rt_variables vars = create_rt_variables(shader, create_flags);
|
||||
+
|
||||
+ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN)
|
||||
+ lower_rt_instructions_monolithic(shader, device, pipeline, key, pCreateInfo, &vars);
|
||||
+
|
||||
lower_rt_instructions(shader, &vars, true);
|
||||
|
||||
if (stack_size) {
|
||||
@@ -1585,32 +1785,36 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
||||
if (shader_guard)
|
||||
nir_pop_if(&b, shader_guard);
|
||||
|
||||
- /* select next shader */
|
||||
b.cursor = nir_after_cf_list(&impl->body);
|
||||
|
||||
- shader_addr = nir_load_var(&b, vars.shader_addr);
|
||||
- nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
|
||||
-
|
||||
- /* store back all variables to registers */
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
|
||||
-
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
|
||||
- ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
|
||||
+ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) {
|
||||
+ nir_terminate(&b);
|
||||
+ } else {
|
||||
+ /* select next shader */
|
||||
+ shader_addr = nir_load_var(&b, vars.shader_addr);
|
||||
+ nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next);
|
||||
+
|
||||
+ /* store back all variables to registers */
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr);
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
|
||||
+
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags));
|
||||
+ ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind));
|
||||
+ }
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
|
||||
index a8ea3aab13d4..d2be3cc9ea01 100644
|
||||
--- a/src/amd/vulkan/radv_shader.h
|
||||
+++ b/src/amd/vulkan/radv_shader.h
|
||||
@@ -632,7 +632,8 @@ nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShad
|
||||
|
||||
void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
||||
const struct radv_shader_args *args, const struct radv_shader_info *info,
|
||||
- uint32_t *stack_size, bool resume_shader);
|
||||
+ uint32_t *stack_size, bool resume_shader, struct radv_device *device,
|
||||
+ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key);
|
||||
|
||||
struct radv_shader_stage;
|
||||
|
||||
--
|
||||
GitLab
|
||||
|
||||
|
||||
From 6482cce2d2895fed40631ad9f9256aa515cb5750 Mon Sep 17 00:00:00 2001
|
||||
From: Konstantin Seurer <konstantin.seurer@gmail.com>
|
||||
Date: Sat, 24 Jun 2023 16:11:16 +0200
|
||||
Subject: [PATCH 4/4] radv/rt: Use monolithic pipelines
|
||||
|
||||
Only available for non-recursive pipelines that do not have callables.
|
||||
---
|
||||
src/amd/vulkan/radv_pipeline_rt.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
index 00bdd6244432..f1ef3d90cd1b 100644
|
||||
--- a/src/amd/vulkan/radv_pipeline_rt.c
|
||||
+++ b/src/amd/vulkan/radv_pipeline_rt.c
|
||||
@@ -103,6 +103,17 @@ radv_generate_rt_pipeline_key(const struct radv_device *device, const struct rad
|
||||
}
|
||||
}
|
||||
|
||||
+ if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
|
||||
+ key.rt.monolithic = pCreateInfo->maxPipelineRayRecursionDepth <= 1;
|
||||
+
|
||||
+ for (uint32_t i = 0; i < pipeline->stage_count; i++) {
|
||||
+ if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) {
|
||||
+ key.rt.monolithic = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
return key;
|
||||
}
|
||||
|
||||
--
|
Loading…
x
Reference in New Issue
Block a user