From ed9fb6be100cff6c2066beb0cdf8b3a17cab292c Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 24 Jun 2023 15:49:13 +0200 Subject: [PATCH 1/4] radv: Add rt.monolithic to radv_pipeline_key --- src/amd/vulkan/radv_shader.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0c53695edae7c..6eb95fdd0a097 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -135,6 +135,10 @@ struct radv_pipeline_key { bool line_smooth_enabled; } ps; + + struct { + bool monolithic; + } rt; }; struct radv_nir_compiler_options { -- From 8f45cc08361f55c1e613a11198b1ae97c519406e Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 24 Jun 2023 15:46:51 +0200 Subject: [PATCH 2/4] radv/rt: Store NIR shaders separately In order to compile monolithic shaders with pipeline libraries, we need to keep the NIR around for inlining recursive stages. --- src/amd/vulkan/radv_pipeline_cache.c | 9 +-- src/amd/vulkan/radv_pipeline_rt.c | 93 +++++++++++++++++++++------- src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_rt_shader.c | 7 +-- 4 files changed, 79 insertions(+), 31 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 5bbbc755ae11f..7e4c6f8898130 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -481,11 +481,12 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = radv_shader_ref(pipeline_obj->shaders[idx++]); for (unsigned i = 0; i < pCreateInfo->stageCount; i++) { - if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) { + if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) pipeline->stages[i].shader = &radv_shader_ref(pipeline_obj->shaders[idx++])->base; - } else if (is_library) { - pipeline->stages[i].shader = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); - complete &= pipeline->stages[i].shader != NULL; + + if (is_library) { + pipeline->stages[i].nir = radv_pipeline_cache_search_nir(device, cache, pipeline->stages[i].sha1); + complete &= pipeline->stages[i].nir != NULL; } } diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index c86ea3a508468..85afc8cb28e1b 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -263,7 +263,10 @@ radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, st RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]); struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline); for (unsigned j = 0; j < library_pipeline->stage_count; ++j) { - stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader); + stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir); + if (library_pipeline->stages[j].shader) + stages[idx].shader = vk_pipeline_cache_object_ref(library_pipeline->stages[j].shader); + stages[idx].stage = library_pipeline->stages[j].stage; stages[idx].stack_size = library_pipeline->stages[j].stack_size; memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH); @@ -462,45 +465,83 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca return VK_PIPELINE_COMPILE_REQUIRED; VkResult result = VK_SUCCESS; - struct radv_ray_tracing_stage *stages = pipeline->stages; + struct radv_ray_tracing_stage *rt_stages = pipeline->stages; + + struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage)); + if (!stages) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + bool has_callable = false; + for (uint32_t i = 0; i < pipeline->stage_count; i++) { + if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) { + has_callable = true; + break; + } + } for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { + if (rt_stages[idx].shader || rt_stages[idx].nir) + continue; + int64_t stage_start = os_time_get_nano(); - struct radv_shader_stage stage; - radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage); - if (stages[idx].shader) - goto feedback; + struct radv_shader_stage *stage = &stages[idx]; + radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, stage); /* precompile the shader */ - stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout); + stage->nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key, pipeline_layout); + + /* Cases in which we need to keep around the NIR: + * - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader. + * If there is a callable shader, we can be sure that the final pipeline won't be + * monolithic. + * - non-recursive: Non-recursive shaders are inlined into the traversal shader. + * - monolithic: Callable shaders (chit/miss) are inlined into the raygen shader. + */ + bool compiled = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]); + bool library = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + bool nir_needed = + (library && !has_callable) || !compiled || (key->rt.monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN); + nir_needed &= !rt_stages[idx].nir; + if (nir_needed) { + rt_stages[idx].stack_size = stage->nir->scratch_size; + rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1, + !key->optimisations_disabled); + } - if (radv_ray_tracing_stage_is_compiled(&stages[idx])) { - uint32_t stack_size = 0; + stage->feedback.duration = os_time_get_nano() - stage_start; + } + for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { + int64_t stage_start = os_time_get_nano(); + struct radv_shader_stage *stage = &stages[idx]; + + /* Cases in which we need to compile the shader (raygen/callable/chit/miss): + * TODO: - monolithic: Extend the loop to cover imported stages and force compilation of imported raygen + * shaders since pipeline library shaders use separate compilation. + * - separate: Compile any recursive stage if wasn't compiled yet. + * TODO: Skip chit and miss shaders in the monolithic case. + */ + bool shader_needed = radv_ray_tracing_stage_is_compiled(&rt_stages[idx]) && !rt_stages[idx].shader; + if (shader_needed) { + uint32_t stack_size = 0; struct radv_serialized_shader_arena_block *replay_block = capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL; struct radv_shader *shader; result = - radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &stage, &stack_size, replay_block, &shader); - stages[idx].stack_size = stack_size; - stages[idx].shader = shader ? &shader->base : NULL; - } else { - stages[idx].stack_size = stage.nir->scratch_size; - stages[idx].shader = - radv_pipeline_cache_nir_to_handle(device, cache, stage.nir, stages[idx].sha1, !key->optimisations_disabled); - } - ralloc_free(stage.nir); + radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, stage, &stack_size, replay_block, &shader); + if (result != VK_SUCCESS) + goto cleanup; - if (result != VK_SUCCESS) - return result; + rt_stages[idx].stack_size = stack_size; + rt_stages[idx].shader = shader ? &shader->base : NULL; + } - feedback: if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) { assert(idx < creation_feedback->pipelineStageCreationFeedbackCount); - stage.feedback.duration = os_time_get_nano() - stage_start; - creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage.feedback; + stage->feedback.duration += os_time_get_nano() - stage_start; + creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback; } } @@ -527,6 +568,10 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca result = radv_rt_nir_to_asm(device, cache, pCreateInfo, key, pipeline, &traversal_stage, NULL, NULL, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]); +cleanup: + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) + ralloc_free(stages[i].nir); + free(stages); return result; } @@ -732,6 +777,8 @@ void radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) { for (unsigned i = 0; i < pipeline->stage_count; i++) { + if (pipeline->stages[i].nir) + vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir); if (pipeline->stages[i].shader) vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].shader); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 47e315488e9f9..7ab46738b46f0 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2364,6 +2364,7 @@ struct radv_ray_tracing_group { }; struct radv_ray_tracing_stage { + struct vk_pipeline_cache_object *nir; struct vk_pipeline_cache_object *shader; gl_shader_stage stage; uint32_t stack_size; diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index cc92beebc3503..3def324bcccf3 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -1132,7 +1132,7 @@ visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct travers if (is_dup) continue; - nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].shader); + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, data->pipeline->stages[shader_id].nir); assert(nir_stage); insert_rt_case(b, nir_stage, vars, sbt_idx, data->pipeline->groups[i].handle.any_hit_index); @@ -1262,13 +1262,12 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio if (is_dup) continue; - nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].shader); + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[shader_id].nir); assert(nir_stage); nir_shader *any_hit_stage = NULL; if (any_hit_shader_id != VK_SHADER_UNUSED_KHR) { - any_hit_stage = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].shader); + any_hit_stage = radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[any_hit_shader_id].nir); assert(any_hit_stage); /* reserve stack size for any_hit before it is inlined */ -- GitLab From bba42cbc235e75a5c7ed05e55e48f71640c68ad4 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Mon, 21 Aug 2023 13:32:53 +0200 Subject: [PATCH 3/4] radv/rt: Add monolithic raygen lowering Ray traversal is inlined to allow for constant folding and avoid spilling. --- src/amd/vulkan/radv_pipeline_rt.c | 11 +- src/amd/vulkan/radv_rt_shader.c | 276 ++++++++++++++++++++++++++---- src/amd/vulkan/radv_shader.h | 3 +- 3 files changed, 248 insertions(+), 42 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 85afc8cb28e1b..12562c6cf89ba 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -356,9 +356,8 @@ move_rt_instructions(nir_shader *shader) static VkResult radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_pipeline_key *pipeline_key, - const struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, - uint32_t *stack_size, struct radv_serialized_shader_arena_block *replay_block, - struct radv_shader **out_shader) + struct radv_ray_tracing_pipeline *pipeline, struct radv_shader_stage *stage, uint32_t *stack_size, + struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader) { struct radv_shader_binary *binary; bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags); @@ -384,7 +383,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, uint32_t num_resume_shaders = 0; nir_shader **resume_shaders = NULL; - if (stage->stage != MESA_SHADER_INTERSECTION) { + bool monolithic_raygen = pipeline_key->rt.monolithic && stage->stage == MESA_SHADER_RAYGEN; + if (stage->stage != MESA_SHADER_INTERSECTION && !monolithic_raygen) { nir_builder b = nir_builder_at(nir_after_cf_list(&nir_shader_get_entrypoint(stage->nir)->body)); nir_rt_return_amd(&b); @@ -411,7 +411,8 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, for (uint32_t i = 0; i < num_shaders; i++) { struct radv_shader_stage temp_stage = *stage; temp_stage.nir = shaders[i]; - radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0); + radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device, + pipeline, pipeline_key); radv_optimize_nir(temp_stage.nir, pipeline_key->optimisations_disabled); radv_postprocess_nir(device, pipeline_key, &temp_stage); diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index 3def324bcccf3..362d918597008 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -1306,6 +1306,87 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_pop_if(b, NULL); } +static void +visit_closest_hit_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline, + struct rt_variables *vars) +{ + nir_def *sbt_idx = nir_load_var(b, vars->idx); + + if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) + nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); + + for (unsigned i = 0; i < pipeline->group_count; ++i) { + struct radv_ray_tracing_group *group = &pipeline->groups[i]; + + unsigned shader_id = VK_SHADER_UNUSED_KHR; + if (group->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR) + shader_id = group->recursive_shader; + + if (shader_id == VK_SHADER_UNUSED_KHR) + continue; + + /* Avoid emitting stages with the same shaders/handles multiple times. */ + bool is_dup = false; + for (unsigned j = 0; j < i; ++j) + if (pipeline->groups[j].handle.closest_hit_index == pipeline->groups[i].handle.closest_hit_index) + is_dup = true; + + if (is_dup) + continue; + + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir); + assert(nir_stage); + + insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.closest_hit_index); + ralloc_free(nir_stage); + } + + if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) + nir_pop_if(b, NULL); +} + +static void +visit_miss_shaders(struct radv_device *device, nir_builder *b, struct radv_ray_tracing_pipeline *pipeline, + struct rt_variables *vars) +{ + nir_def *sbt_idx = nir_load_var(b, vars->idx); + + if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) + nir_push_if(b, nir_ine_imm(b, sbt_idx, 0)); + + for (unsigned i = 0; i < pipeline->group_count; ++i) { + struct radv_ray_tracing_group *group = &pipeline->groups[i]; + + unsigned shader_id = VK_SHADER_UNUSED_KHR; + if (group->type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR) + shader_id = group->recursive_shader; + + if (shader_id == VK_SHADER_UNUSED_KHR) + continue; + + if (pipeline->stages[shader_id].stage != MESA_SHADER_MISS) + continue; + + /* Avoid emitting stages with the same shaders/handles multiple times. */ + bool is_dup = false; + for (unsigned j = 0; j < i; ++j) + if (pipeline->groups[j].handle.general_index == pipeline->groups[i].handle.general_index) + is_dup = true; + + if (is_dup) + continue; + + nir_shader *nir_stage = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[shader_id].nir); + assert(nir_stage); + + insert_rt_case(b, nir_stage, vars, sbt_idx, pipeline->groups[i].handle.general_index); + ralloc_free(nir_stage); + } + + if (!(vars->flags & VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) + nir_pop_if(b, NULL); +} + static void store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args) { @@ -1414,25 +1495,47 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin /* Register storage for hit attributes */ nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)]; - for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) - hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); + if (!key->rt.monolithic || b->shader->info.stage != MESA_SHADER_RAYGEN) { + for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) + hit_attribs[i] = + nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); - lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); + lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); + } /* Initialize follow-up shader. */ nir_push_if(b, nir_load_var(b, trav_vars.hit)); { - for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i) - nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i); - nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax), - nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr), - nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind)); + if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) { + load_sbt_entry(b, vars, nir_load_var(b, vars->idx), SBT_HIT, SBT_CLOSEST_HIT_IDX); + + nir_def *should_return = + nir_test_mask(b, nir_load_var(b, vars->cull_mask_and_flags), SpvRayFlagsSkipClosestHitShaderKHRMask); + + /* should_return is set if we had a hit but we won't be calling the closest hit + * shader and hence need to return immediately to the calling shader. */ + nir_push_if(b, nir_inot(b, should_return)); + visit_closest_hit_shaders(device, b, pipeline, vars); + nir_pop_if(b, NULL); + } else { + for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i) + nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i); + nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax), + nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr), + nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind)); + } } nir_push_else(b, NULL); { - /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer - * for miss shaders if none of the rays miss. */ - nir_execute_miss_amd(b, nir_load_var(b, vars->tmax)); + if (key->rt.monolithic && b->shader->info.stage == MESA_SHADER_RAYGEN) { + load_sbt_entry(b, vars, nir_load_var(b, vars->miss_index), SBT_MISS, SBT_GENERAL_IDX); + + visit_miss_shaders(device, b, pipeline, vars); + } else { + /* Only load the miss shader if we actually miss. It is valid to not specify an SBT pointer + * for miss shaders if none of the rays miss. */ + nir_execute_miss_amd(b, nir_load_var(b, vars->tmax)); + } } nir_pop_if(b, NULL); } @@ -1477,6 +1580,98 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ return b.shader; } +struct lower_rt_instruction_monolithic_state { + struct radv_device *device; + struct radv_ray_tracing_pipeline *pipeline; + const struct radv_pipeline_key *key; + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo; + + struct rt_variables *vars; +}; + +static bool +lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + b->cursor = nir_after_instr(instr); + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + struct lower_rt_instruction_monolithic_state *state = data; + struct rt_variables *vars = state->vars; + + switch (intr->intrinsic) { + case nir_intrinsic_execute_callable: + unreachable("nir_intrinsic_execute_callable"); + case nir_intrinsic_trace_ray: { + nir_store_var(b, vars->arg, nir_iadd_imm(b, intr->src[10].ssa, -b->shader->scratch_size), 1); + + /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ + nir_store_var(b, vars->accel_struct, intr->src[0].ssa, 0x1); + nir_store_var(b, vars->cull_mask_and_flags, nir_ior(b, nir_ishl_imm(b, intr->src[2].ssa, 24), intr->src[1].ssa), + 0x1); + nir_store_var(b, vars->sbt_offset, nir_iand_imm(b, intr->src[3].ssa, 0xf), 0x1); + nir_store_var(b, vars->sbt_stride, nir_iand_imm(b, intr->src[4].ssa, 0xf), 0x1); + nir_store_var(b, vars->miss_index, nir_iand_imm(b, intr->src[5].ssa, 0xffff), 0x1); + nir_store_var(b, vars->origin, intr->src[6].ssa, 0x7); + nir_store_var(b, vars->tmin, intr->src[7].ssa, 0x1); + nir_store_var(b, vars->direction, intr->src[8].ssa, 0x7); + nir_store_var(b, vars->tmax, intr->src[9].ssa, 0x1); + + nir_def *stack_ptr = nir_load_var(b, vars->stack_ptr); + nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1); + + radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, state->key, b, vars); + b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size * + MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); + + nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1); + + nir_instr_remove(instr); + return true; + } + case nir_intrinsic_rt_resume: + unreachable("nir_intrinsic_rt_resume"); + case nir_intrinsic_rt_return_amd: + unreachable("nir_intrinsic_rt_return_amd"); + case nir_intrinsic_execute_closest_hit_amd: + unreachable("nir_intrinsic_execute_closest_hit_amd"); + case nir_intrinsic_execute_miss_amd: + unreachable("nir_intrinsic_execute_miss_amd"); + default: + return false; + } +} + +static void +lower_rt_instructions_monolithic(nir_shader *shader, struct radv_device *device, + struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key, + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct rt_variables *vars) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + struct lower_rt_instruction_monolithic_state state = { + .device = device, + .pipeline = pipeline, + .key = key, + .pCreateInfo = pCreateInfo, + .vars = vars, + }; + + nir_shader_instructions_pass(shader, lower_rt_instruction_monolithic, nir_metadata_none, &state); + nir_index_ssa_defs(impl); + + /* Register storage for hit attributes */ + nir_variable *hit_attribs[RADV_MAX_HIT_ATTRIB_SIZE / sizeof(uint32_t)]; + + for (uint32_t i = 0; i < ARRAY_SIZE(hit_attribs); i++) + hit_attribs[i] = nir_local_variable_create(impl, glsl_uint_type(), "ahit_attrib"); + + lower_hit_attribs(shader, hit_attribs, 0); +} + /** Select the next shader based on priorities: * * Detect the priority of the shader stage by the lowest bits in the address (low to high): @@ -1517,13 +1712,18 @@ select_next_shader(nir_builder *b, nir_def *shader_addr, unsigned wave_size) void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size, - bool resume_shader) + bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, + const struct radv_pipeline_key *key) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); const VkPipelineCreateFlagBits2KHR create_flags = radv_get_pipeline_create_flags(pCreateInfo); struct rt_variables vars = create_rt_variables(shader, create_flags); + + if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) + lower_rt_instructions_monolithic(shader, device, pipeline, key, pCreateInfo, &vars); + lower_rt_instructions(shader, &vars, true); if (stack_size) { @@ -1585,32 +1785,36 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH if (shader_guard) nir_pop_if(&b, shader_guard); - /* select next shader */ b.cursor = nir_after_cf_list(&impl->body); - shader_addr = nir_load_var(&b, vars.shader_addr); - nir_def *next = select_next_shader(&b, shader_addr, info->wave_size); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next); - - /* store back all variables to registers */ - ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax)); - - ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags)); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind)); + if (key->rt.monolithic && shader->info.stage == MESA_SHADER_RAYGEN) { + nir_terminate(&b); + } else { + /* select next shader */ + shader_addr = nir_load_var(&b, vars.shader_addr); + nir_def *next = select_next_shader(&b, shader_addr, info->wave_size); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.uniform_shader_addr, next); + + /* store back all variables to registers */ + ac_nir_store_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base, nir_load_var(&b, vars.stack_ptr)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_addr, shader_addr); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_record, nir_load_var(&b, vars.shader_record_ptr)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.payload_offset, nir_load_var(&b, vars.arg)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.accel_struct, nir_load_var(&b, vars.accel_struct)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags, nir_load_var(&b, vars.cull_mask_and_flags)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_offset, nir_load_var(&b, vars.sbt_offset)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_stride, nir_load_var(&b, vars.sbt_stride)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.miss_index, nir_load_var(&b, vars.miss_index)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_origin, nir_load_var(&b, vars.origin)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmin, nir_load_var(&b, vars.tmin)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax)); + + ac_nir_store_arg(&b, &args->ac, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags, nir_load_var(&b, vars.geometry_id_and_flags)); + ac_nir_store_arg(&b, &args->ac, args->ac.rt.hit_kind, nir_load_var(&b, vars.hit_kind)); + } nir_metadata_preserve(impl, nir_metadata_none); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 6eb95fdd0a097..969f9a56ab7f5 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -635,7 +635,8 @@ nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShad void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const struct radv_shader_args *args, const struct radv_shader_info *info, - uint32_t *stack_size, bool resume_shader); + uint32_t *stack_size, bool resume_shader, struct radv_device *device, + struct radv_ray_tracing_pipeline *pipeline, const struct radv_pipeline_key *key); struct radv_shader_stage; -- From 5c9dd4efece8f352d00d1310b556928cccb239c8 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 24 Jun 2023 16:11:16 +0200 Subject: [PATCH 4/4] radv/rt: Use monolithic pipelines Only available for non-recursive pipelines that do not have callables. --- src/amd/vulkan/radv_pipeline_rt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 12562c6cf89ba..97449b9cbafac 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -103,6 +103,17 @@ radv_generate_rt_pipeline_key(const struct radv_device *device, const struct rad } } + if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) { + key.rt.monolithic = pCreateInfo->maxPipelineRayRecursionDepth <= 1; + + for (uint32_t i = 0; i < pipeline->stage_count; i++) { + if (pipeline->stages[i].stage == MESA_SHADER_CALLABLE) { + key.rt.monolithic = false; + break; + } + } + } + return key; } --