From 081aaab17ed1e246bc11debfce114677f0938e59 Mon Sep 17 00:00:00 2001 From: ferrreo Date: Thu, 24 Aug 2023 15:50:11 +0100 Subject: [PATCH] Create 21929.patch --- debian/patches/21929.patch | 669 +++++++++++++++++++++++++++++++++++++ 1 file changed, 669 insertions(+) create mode 100644 debian/patches/21929.patch diff --git a/debian/patches/21929.patch b/debian/patches/21929.patch new file mode 100644 index 0000000..2bed3a3 --- /dev/null +++ b/debian/patches/21929.patch @@ -0,0 +1,669 @@ +From b52779b8fa24d3e6f23b4db4cfa4116f1149d6eb Mon Sep 17 00:00:00 2001 +From: Konstantin Seurer +Date: Sat, 24 Jun 2023 15:49:13 +0200 +Subject: [PATCH 1/4] radv: Add rt.monolithic to radv_pipeline_key + +--- + src/amd/vulkan/radv_shader.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h +index 9156f28f34dc..a8ea3aab13d4 100644 +--- a/src/amd/vulkan/radv_shader.h ++++ b/src/amd/vulkan/radv_shader.h +@@ -135,6 +135,10 @@ struct radv_pipeline_key { + + bool line_smooth_enabled; + } ps; ++ ++ struct { ++ bool monolithic; ++ } rt; + }; + + struct radv_nir_compiler_options { +-- +GitLab + + +From 69761a71f9dbfefe9a8bf13a310ae7004962e786 Mon Sep 17 00:00:00 2001 +From: Konstantin Seurer +Date: Sat, 24 Jun 2023 15:46:51 +0200 +Subject: [PATCH 2/4] radv/rt: Store NIR shaders separately + +In order to compile monolithic shaders with pipeline libraries, we need +to keep the NIR around for inlining recursive stages. +--- + src/amd/vulkan/radv_pipeline_cache.c | 9 ++-- + src/amd/vulkan/radv_pipeline_rt.c | 81 ++++++++++++++++++++-------- + src/amd/vulkan/radv_private.h | 1 + + src/amd/vulkan/radv_rt_shader.c | 7 ++- + 4 files changed, 67 insertions(+), 31 deletions(-) + +diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c +index 5bbbc755ae11..7e4c6f889813 100644 +--- a/src/amd/vulkan/radv_pipeline_cache.c ++++ b/src/amd/vulkan/radv_pipeline_cache.c +@@ -481,11 +481,12 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip + pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]); + + for (unsigned i = 0; i < pCreateInfo->stageCount; i++) { +- if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) { ++ if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) + pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base; +- } else if (is_library) { +- pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); +- complete &= pipeline->stages[i].shader != NULL; ++ ++ if (is_library) { ++ pipeline->stages[i].nir = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); ++ complete &= pipeline->stages[i].nir != NULL; + } + } + +diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c +index c86ea3a50846..21be900bb3dd 100644 +--- a/src/amd/vulkan/radv_pipeline_rt.c ++++ b/src/amd/vulkan/radv_pipeline_rt.c +@@ -263,7 +263,10 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, st + RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); + struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline); + for (unsigned j = 0; j < library_pipeline->stage_count; ++j) { +- stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader); ++ stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir); ++ if (library_pipeline->stages[j].shader) ++ stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader); ++ + stages[idx].stage = library_pipeline->stages[j].stage; + stages[idx].stack_size = library_pipeline->stages[j].stack_size; + memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH); +@@ -462,45 +465,71 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca + return VK_PIPELINE_COMPILE_REQUIRED; + VkResult result = VK_SUCCESS; + +- struct radv_ray_tracing_stage *stages = pipeline->stages; ++ struct radv_ray_tracing_stage *rt_stages = pipeline->stages; ++ ++ struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage)); ++ if (!stages) ++ return VK_ERROR_OUT_OF_HOST_MEMORY; + + for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { ++ if (rt_stages[idx].shader || rt_stages[idx].nir) ++ continue; ++ + int64_t stage_start = os_time_get_nano(); +- struct radv_shader_stage stage; +- radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage); + +- if (stages[idx].shader) +- goto feedback; ++ struct radv_shader_stage *stage = &stages[idx]; ++ radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, stage); + + /* precompile the shader */ +- stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout); ++ stage->nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout); ++ ++ /* Cases in which we need to keep around the NIR: ++ * - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader. ++ * - non-recursive: Non-recursive shaders are inlined into the traversal shader. ++ * - monolithic: Callable shaders (chit/miss) are inlined into the raygen shader. ++ */ ++ bool compiled = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]); ++ bool library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; ++ bool nir_needed = library || !compiled || (key->rt.monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN); ++ nir_needed &= !rt_stages[idx].nir; ++ if (nir_needed) { ++ rt_stages[idx].stack_size = stage->nir->scratch_size; ++ rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1, ++ !key->optimisations_disabled); ++ } + +- if (radv_ray_tracing_stage_is_compiled(&stages[idx])) { +- uint32_t stack_size = 0; ++ stage->feedback.duration = os_time_get_nano() - stage_start; ++ } + ++ for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { ++ int64_t stage_start = os_time_get_nano(); ++ struct radv_shader_stage *stage = &stages[idx]; ++ ++ /* Cases in which we need to compile the shader (raygen/callable/chit/miss): ++ * TODO: - monolithic: Force compilation if there already is a compiled shader ++ * since pipeline library shaders use separate compilation. ++ * - separate: Compile any recursive stage if wasn't compiled yet. ++ */ ++ bool shader_needed = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]) && !rt_stages[idx].shader; ++ if (shader_needed) { ++ uint32_t stack_size = 0; + struct radv_serialized_shader_arena_block *replay_block = + capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL; + + struct radv_shader *shader; + result = +- radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &stage, &stack_size, replay_block, &shader); +- stages[idx].stack_size = stack_size; +- stages[idx].shader = shader ? &shader->base : NULL; +- } else { +- stages[idx].stack_size = stage.nir->scratch_size; +- stages[idx].shader = +- radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, stages[idx].sha1, !key->optimisations_disabled); +- } +- ralloc_free(stage.nir); ++ radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, stage, &stack_size, replay_block, &shader); ++ if (result != VK_SUCCESS) ++ goto cleanup; + +- if (result != VK_SUCCESS) +- return result; ++ rt_stages[idx].stack_size = stack_size; ++ rt_stages[idx].shader = shader ? &shader->base : NULL; ++ } + +- feedback: + if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) { + assert(idx < creation_feedback->pipelineStageCreationFeedbackCount); +- stage.feedback.duration = os_time_get_nano() - stage_start; +- creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage.feedback; ++ stage->feedback.duration = os_time_get_nano() - stage_start; ++ creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback; + } + } + +@@ -527,6 +556,10 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca + result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &traversal_stage, NULL, NULL, + &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); + ++cleanup: ++ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) ++ ralloc_free(stages[i].nir); ++ free(stages); + return result; + } + +@@ -732,6 +765,8 @@ void + radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) + { + for (unsigned i = 0; i < pipeline->stage_count; i++) { ++ if (pipeline->stages[i].nir) ++ vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir); + if (pipeline->stages[i].shader) + vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].shader); + } +diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h +index d51cbd3e173f..06755bba70b9 100644 +--- a/src/amd/vulkan/radv_private.h ++++ b/src/amd/vulkan/radv_private.h +@@ -2364,6 +2364,7 @@ struct radv_ray_tracing_group { + }; + + struct radv_ray_tracing_stage { ++ struct vk_pipeline_cache_object *nir; + struct vk_pipeline_cache_object *shader; + gl_shader_stage stage; + uint32_t stack_size; +diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c +index cc92beebc350..3def324bcccf 100644 +--- a/src/amd/vulkan/radv_rt_shader.c ++++ b/src/amd/vulkan/radv_rt_shader.c +@@ -1132,7 +1132,7 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers + if (is_dup) + continue; + +- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader); ++ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].nir); + assert(nir_stage); + + insert_rt_case(b, nir_stage, vars, sbt_idx, data->pipeline->groups[i].handle.any_hit_index); +@@ -1262,13 +1262,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio + if (is_dup) + continue; + +- nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader); ++ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].nir); + assert(nir_stage); + + nir_shader *any_hit_stage = NULL; + if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { +- any_hit_stage = +- radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader); ++ any_hit_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].nir); + assert(any_hit_stage); + + /* reserve stack size for any_hit before it is inlined */ +-- +GitLab + + +From f3715521ae26fca036ba0a9bb2c700b3c9a59a10 Mon Sep 17 00:00:00 2001 +From: Konstantin Seurer +Date: Mon, 21 Aug 2023 13:32:53 +0200 +Subject: [PATCH 3/4] radv/rt: Add monolithic raygen lowering + +Ray traversal is inlined to allow for constant folding and avoid +spilling. +--- + src/amd/vulkan/radv_pipeline_rt.c | 11 +- + src/amd/vulkan/radv_rt_shader.c | 276 ++++++++++++++++++++++++++---- + src/amd/vulkan/radv_shader.h | 3 +- + 3 files changed, 248 insertions(+), 42 deletions(-) + +diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c +index 21be900bb3dd..00bdd6244432 100644 +--- a/src/amd/vulkan/radv_pipeline_rt.c ++++ b/src/amd/vulkan/radv_pipeline_rt.c +@@ -356,9 +356,8 @@ move_rt_instructions(nir_shader *shader) + static VkResult + radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key, +- const struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, +- uint32_t *stack_size, struct radv_serialized_shader_arena_block *replay_block, +- struct radv_shader **out_shader) ++ struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, uint32_t *stack_size, ++ struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader) + { + struct radv_shader_binary *binary; + bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags); +@@ -384,7 +383,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, + uint32_t num_resume_shaders = 0; + nir_shader **resume_shaders = NULL; + +- if (stage->stage != MESA_SHADER_INTERSECTION) { ++ bool monolithic_raygen = pipeline_key->rt.monolithic && stage->stage == MESA_SHADER_RAYGEN; ++ if (!monolithic_raygen && stage->stage != MESA_SHADER_INTERSECTION) { + nir_builder b = nir_builder_at(nir_after_cf_list(&nir_shader_get_entrypoint(stage->nir)->body)); + nir_rt_return_amd(&b); + +@@ -411,7 +411,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, + for (uint32_t i = 0; i < num_shaders; i++) { + struct radv_shader_stage temp_stage = *stage; + temp_stage.nir = shaders[i]; +- radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0); ++ radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device, ++ pipeline, pipeline_key); + radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled); + radv_postprocess_nir(device, pipeline_key, &temp_stage); + +diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c +index 3def324bcccf..362d91859700 100644 +--- a/src/amd/vulkan/radv_rt_shader.c ++++ b/src/amd/vulkan/radv_rt_shader.c +@@ -1306,6 +1306,87 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio + nir_pop_if(b, NULL); + } + ++static void ++visit_closest_hit_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline, ++ struct rt_variables *vars) ++{ ++ nir_def *sbt_idx = nir_load_var(b, vars->idx); ++ ++ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) ++ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); ++ ++ for (unsigned i = 0; i < pipeline->group_count; ++i) { ++ struct radv_ray_tracing_group *group = &pipeline->groups[i]; ++ ++ unsigned shader_id = VK_SHADER_UNUSED_KHR; ++ if (group->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR) ++ shader_id = group->recursive_shader; ++ ++ if (shader_id == VK_SHADER_UNUSED_KHR) ++ continue; ++ ++ /* Avoid emitting stages with the same shaders/handles multiple times. */ ++ bool is_dup = false; ++ for (unsigned j = 0; j < i; ++j) ++ if (pipeline->groups[j].handle.closest_hit_index == pipeline->groups[i].handle.closest_hit_index) ++ is_dup = true; ++ ++ if (is_dup) ++ continue; ++ ++ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir); ++ assert(nir_stage); ++ ++ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.closest_hit_index); ++ ralloc_free(nir_stage); ++ } ++ ++ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) ++ nir_pop_if(b, NULL); ++} ++ ++static void ++visit_miss_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline, ++ struct rt_variables *vars) ++{ ++ nir_def *sbt_idx = nir_load_var(b, vars->idx); ++ ++ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) ++ nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); ++ ++ for (unsigned i = 0; i < pipeline->group_count; ++i) { ++ struct radv_ray_tracing_group *group = &pipeline->groups[i]; ++ ++ unsigned shader_id = VK_SHADER_UNUSED_KHR; ++ if (group->type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR) ++ shader_id = group->recursive_shader; ++ ++ if (shader_id == VK_SHADER_UNUSED_KHR) ++ continue; ++ ++ if (pipeline->stages[shader_id].stage != MESA_SHADER_MISS) ++ continue; ++ ++ /* Avoid emitting stages with the same shaders/handles multiple times. */ ++ bool is_dup = false; ++ for (unsigned j = 0; j < i; ++j) ++ if (pipeline->groups[j].handle.general_index == pipeline->groups[i].handle.general_index) ++ is_dup = true; ++ ++ if (is_dup) ++ continue; ++ ++ nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir); ++ assert(nir_stage); ++ ++ insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.general_index); ++ ralloc_free(nir_stage); ++ } ++ ++ if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) ++ nir_pop_if(b, NULL); ++} ++ + static void + store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args) + { +@@ -1414,25 +1495,47 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin + /* Register storage for hit attributes */ + nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)]; + +- for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) +- hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); ++ if (!key->rt.monolithic || b->shader->info.stage != MESA_SHADER_RAYGEN) { ++ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) ++ hit_attribs[i] = ++ nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); + +- lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); ++ lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); ++ } + + /* Initialize follow-up shader. */ + nir_push_if(b, nir_load_var(b, trav_vars.hit)); + { +- for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i) +- nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i); +- nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax), +- nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr), +- nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind)); ++ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) { ++ load_sbt_entry(b, vars, nir_load_var(b, vars->idx), SBT_HIT, SBT_CLOSEST_HIT_IDX); ++ ++ nir_def *should_return = ++ nir_test_mask(b, nir_load_var(b, vars->cull_mask_and_flags), SpvRayFlagsSkipClosestHitShaderKHRMask); ++ ++ /* should_return is set if we had a hit but we won't be calling the closest hit ++ * shader and hence need to return immediately to the calling shader. */ ++ nir_push_if(b, nir_inot(b, should_return)); ++ visit_closest_hit_shaders(device, b, pipeline, vars); ++ nir_pop_if(b, NULL); ++ } else { ++ for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i) ++ nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i); ++ nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax), ++ nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr), ++ nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind)); ++ } + } + nir_push_else(b, NULL); + { +- /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer +- * for miss shaders if none of the rays miss. */ +- nir_execute_miss_amd(b, nir_load_var(b, vars->tmax)); ++ if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) { ++ load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, SBT_GENERAL_IDX); ++ ++ visit_miss_shaders(device, b, pipeline, vars); ++ } else { ++ /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer ++ * for miss shaders if none of the rays miss. */ ++ nir_execute_miss_amd(b, nir_load_var(b, vars->tmax)); ++ } + } + nir_pop_if(b, NULL); + } +@@ -1477,6 +1580,98 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ + return b.shader; + } + ++struct lower_rt_instruction_monolithic_state { ++ struct radv_device *device; ++ struct radv_ray_tracing_pipeline *pipeline; ++ const struct radv_pipeline_key *key; ++ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo; ++ ++ struct rt_variables *vars; ++}; ++ ++static bool ++lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data) ++{ ++ if (instr->type != nir_instr_type_intrinsic) ++ return false; ++ ++ b->cursor = nir_after_instr(instr); ++ ++ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); ++ ++ struct lower_rt_instruction_monolithic_state *state = data; ++ struct rt_variables *vars = state->vars; ++ ++ switch (intr->intrinsic) { ++ case nir_intrinsic_execute_callable: ++ unreachable("nir_intrinsic_execute_callable"); ++ case nir_intrinsic_trace_ray: { ++ nir_store_var(b, vars->arg, nir_iadd_imm(b, intr->src[10].ssa, -b->shader->scratch_size), 1); ++ ++ /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ ++ nir_store_var(b, vars->accel_struct, intr->src[0].ssa, 0x1); ++ nir_store_var(b, vars->cull_mask_and_flags, nir_ior(b, nir_ishl_imm(b, intr->src[2].ssa, 24), intr->src[1].ssa), ++ 0x1); ++ nir_store_var(b, vars->sbt_offset, nir_iand_imm(b, intr->src[3].ssa, 0xf), 0x1); ++ nir_store_var(b, vars->sbt_stride, nir_iand_imm(b, intr->src[4].ssa, 0xf), 0x1); ++ nir_store_var(b, vars->miss_index, nir_iand_imm(b, intr->src[5].ssa, 0xffff), 0x1); ++ nir_store_var(b, vars->origin, intr->src[6].ssa, 0x7); ++ nir_store_var(b, vars->tmin, intr->src[7].ssa, 0x1); ++ nir_store_var(b, vars->direction, intr->src[8].ssa, 0x7); ++ nir_store_var(b, vars->tmax, intr->src[9].ssa, 0x1); ++ ++ nir_def *stack_ptr = nir_load_var(b, vars->stack_ptr); ++ nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1); ++ ++ radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, state->key, b, vars); ++ b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size * ++ MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); ++ ++ nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1); ++ ++ nir_instr_remove(instr); ++ return true; ++ } ++ case nir_intrinsic_rt_resume: ++ unreachable("nir_intrinsic_rt_resume"); ++ case nir_intrinsic_rt_return_amd: ++ unreachable("nir_intrinsic_rt_return_amd"); ++ case nir_intrinsic_execute_closest_hit_amd: ++ unreachable("nir_intrinsic_execute_closest_hit_amd"); ++ case nir_intrinsic_execute_miss_amd: ++ unreachable("nir_intrinsic_execute_miss_amd"); ++ default: ++ return false; ++ } ++} ++ ++static void ++lower_rt_instructions_monolithic(nir_shader *shader, struct radv_device *device, ++ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key, ++ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct rt_variables *vars) ++{ ++ nir_function_impl *impl = nir_shader_get_entrypoint(shader); ++ ++ struct lower_rt_instruction_monolithic_state state = { ++ .device = device, ++ .pipeline = pipeline, ++ .key = key, ++ .pCreateInfo = pCreateInfo, ++ .vars = vars, ++ }; ++ ++ nir_shader_instructions_pass(shader, lower_rt_instruction_monolithic, nir_metadata_none, &state); ++ nir_index_ssa_defs(impl); ++ ++ /* Register storage for hit attributes */ ++ nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)]; ++ ++ for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) ++ hit_attribs[i] = nir_local_variable_create(impl, glsl_uint_type(), "ahit_attrib"); ++ ++ lower_hit_attribs(shader, hit_attribs, 0); ++} ++ + /** Select the next shader based on priorities: + * + * Detect the priority of the shader stage by the lowest bits in the address (low to high): +@@ -1517,13 +1712,18 @@ select_next_shader(nir_builder *b, nir_def *shader_addr, unsigned wave_size) + void + radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size, +- bool resume_shader) ++ bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, ++ const struct radv_pipeline_key *key) + { + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + const VkPipelineCreateFlagBits2KHR create_flags = radv_get_pipeline_create_flags(pCreateInfo); + + struct rt_variables vars = create_rt_variables(shader, create_flags); ++ ++ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) ++ lower_rt_instructions_monolithic(shader, device, pipeline, key, pCreateInfo, &vars); ++ + lower_rt_instructions(shader, &vars, true); + + if (stack_size) { +@@ -1585,32 +1785,36 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH + if (shader_guard) + nir_pop_if(&b, shader_guard); + +- /* select next shader */ + b.cursor = nir_after_cf_list(&impl->body); + +- shader_addr = nir_load_var(&b, vars.shader_addr); +- nir_def *next = select_next_shader(&b, shader_addr, info->wave_size); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next); +- +- /* store back all variables to registers */ +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax)); +- +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags)); +- ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind)); ++ if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) { ++ nir_terminate(&b); ++ } else { ++ /* select next shader */ ++ shader_addr = nir_load_var(&b, vars.shader_addr); ++ nir_def *next = select_next_shader(&b, shader_addr, info->wave_size); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next); ++ ++ /* store back all variables to registers */ ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax)); ++ ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags)); ++ ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind)); ++ } + + nir_metadata_preserve(impl, nir_metadata_none); + +diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h +index a8ea3aab13d4..d2be3cc9ea01 100644 +--- a/src/amd/vulkan/radv_shader.h ++++ b/src/amd/vulkan/radv_shader.h +@@ -632,7 +632,8 @@ nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShad + + void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + const struct radv_shader_args *args, const struct radv_shader_info *info, +- uint32_t *stack_size, bool resume_shader); ++ uint32_t *stack_size, bool resume_shader, struct radv_device *device, ++ struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key); + + struct radv_shader_stage; + +-- +GitLab + + +From 6482cce2d2895fed40631ad9f9256aa515cb5750 Mon Sep 17 00:00:00 2001 +From: Konstantin Seurer +Date: Sat, 24 Jun 2023 16:11:16 +0200 +Subject: [PATCH 4/4] radv/rt: Use monolithic pipelines + +Only available for non-recursive pipelines that do not have callables. +--- + src/amd/vulkan/radv_pipeline_rt.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c +index 00bdd6244432..f1ef3d90cd1b 100644 +--- a/src/amd/vulkan/radv_pipeline_rt.c ++++ b/src/amd/vulkan/radv_pipeline_rt.c +@@ -103,6 +103,17 @@ radv_generate_rt_pipeline_key(const struct radv_device *device, const struct rad + } + } + ++ if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) { ++ key.rt.monolithic = pCreateInfo->maxPipelineRayRecursionDepth <= 1; ++ ++ for (uint32_t i = 0; i < pipeline->stage_count; i++) { ++ if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) { ++ key.rt.monolithic = false; ++ break; ++ } ++ } ++ } ++ + return key; + } + +--