Module: Mesa Branch: main Commit: 662f86c53384d44bc296aa6a3bc8b35c6e59d86c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=662f86c53384d44bc296aa6a3bc8b35c6e59d86c
Author: Konstantin Seurer <[email protected]> Date: Sat Dec 16 17:09:52 2023 +0100 radv/bvh: Stop emitting leaf nodes inside the encoder Avoids unnecessary copies. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26725> --- src/amd/vulkan/bvh/build_helpers.h | 22 -------- src/amd/vulkan/bvh/build_interface.h | 1 + src/amd/vulkan/bvh/bvh.h | 24 --------- src/amd/vulkan/bvh/encode.comp | 69 +------------------------ src/amd/vulkan/bvh/leaf.comp | 77 ++++++++++++++++++++-------- src/amd/vulkan/radv_acceleration_structure.c | 11 ++-- 6 files changed, 65 insertions(+), 139 deletions(-) diff --git a/src/amd/vulkan/bvh/build_helpers.h b/src/amd/vulkan/bvh/build_helpers.h index 30d312716f1..436faba7038 100644 --- a/src/amd/vulkan/bvh/build_helpers.h +++ b/src/amd/vulkan/bvh/build_helpers.h @@ -240,9 +240,6 @@ TYPE(radv_bvh_box32_node, 4); TYPE(radv_ir_header, 4); TYPE(radv_ir_node, 4); TYPE(radv_ir_box_node, 4); -TYPE(radv_ir_triangle_node, 4); -TYPE(radv_ir_aabb_node, 4); -TYPE(radv_ir_instance_node, 8); TYPE(radv_global_sync_data, 4); @@ -313,25 +310,6 @@ ir_type_to_bvh_type(uint32_t type) return RADV_BVH_INVALID_NODE; } -radv_aabb -calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix) -{ - radv_aabb aabb; - radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(base_ptr)); - - for (uint32_t comp = 0; comp < 3; ++comp) { - aabb.min[comp] = otw_matrix[comp][3]; - aabb.max[comp] = otw_matrix[comp][3]; - for (uint32_t col = 0; col < 3; ++col) { - aabb.min[comp] += - min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); - aabb.max[comp] += - max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); - } - } - return aabb; -} - float aabb_surface_area(radv_aabb aabb) { diff --git a/src/amd/vulkan/bvh/build_interface.h b/src/amd/vulkan/bvh/build_interface.h index fe988711370..2ee640d9cc5 100644 --- a/src/amd/vulkan/bvh/build_interface.h +++ b/src/amd/vulkan/bvh/build_interface.h @@ -33,6 +33,7 @@ #endif struct leaf_args { + VOID_REF ir; VOID_REF bvh; REF(radv_ir_header) header; REF(key_id_pair) ids; diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 3687ccabe4e..52e06ec5db3 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -120,30 +120,6 @@ struct radv_ir_box_node { uint32_t bvh_offset; }; -struct radv_ir_aabb_node { - radv_ir_node base; - uint32_t primitive_id; - uint32_t geometry_id_and_flags; -}; - -struct radv_ir_triangle_node { - radv_ir_node base; - float coords[3][3]; - uint32_t triangle_id; - uint32_t id; - uint32_t geometry_id_and_flags; -}; - -struct radv_ir_instance_node { - radv_ir_node base; - /* See radv_bvh_instance_node */ - uint64_t base_ptr; - uint32_t custom_instance_and_mask; - uint32_t sbt_offset_and_flags; - mat3x4 otw_matrix; - uint32_t instance_id; -}; - struct radv_global_sync_data { uint32_t task_counts[2]; uint32_t task_started_counter; diff --git a/src/amd/vulkan/bvh/encode.comp b/src/amd/vulkan/bvh/encode.comp index fd06ada6516..12c03e179df 100644 --- a/src/amd/vulkan/bvh/encode.comp +++ b/src/amd/vulkan/bvh/encode.comp @@ -50,89 +50,26 @@ void set_parent(uint32_t child, uint32_t parent) DEREF(REF(uint32_t)(addr)) = parent; } -uint32_t -encode_sbt_offset_and_flags(uint32_t src) -{ - uint32_t flags = src >> 24; - uint32_t ret = src & 0xffffffu; - if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0) - ret |= RADV_INSTANCE_FORCE_OPAQUE; - if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0) - ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE; - if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0) - ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE; - if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0) - ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING; - return ret; -} - -void -encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node) -{ - switch (type) { - case radv_ir_node_triangle: { - radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node)); - REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node); - - DEREF(dst).coords = src.coords; - DEREF(dst).triangle_id = src.triangle_id; - DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags; - DEREF(dst).id = src.id; - break; - } - case radv_ir_node_aabb: { - radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node)); - REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node); - - DEREF(dst).primitive_id = src.primitive_id; - DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags; - break; - } - case radv_ir_node_instance: { - radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node)); - REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node); - uint32_t bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset; - - DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset); - DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; - DEREF(dst).sbt_offset_and_flags = encode_sbt_offset_and_flags(src.sbt_offset_and_flags); - DEREF(dst).instance_id = src.instance_id; - DEREF(dst).bvh_offset = bvh_offset; - - mat4 transform = mat4(src.otw_matrix); - - mat4 inv_transform = transpose(inverse(transpose(transform))); - DEREF(dst).wto_matrix = mat3x4(inv_transform); - DEREF(dst).otw_matrix = mat3x4(transform); - break; - } - } -} - void main() { /* Revert the order so we start at the root */ uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x; - uint32_t intermediate_leaf_node_size; uint32_t output_leaf_node_size; switch (args.geometry_type) { case VK_GEOMETRY_TYPE_TRIANGLES_KHR: - intermediate_leaf_node_size = SIZEOF(radv_ir_triangle_node); output_leaf_node_size = SIZEOF(radv_bvh_triangle_node); break; case VK_GEOMETRY_TYPE_AABBS_KHR: - intermediate_leaf_node_size = SIZEOF(radv_ir_aabb_node); output_leaf_node_size = SIZEOF(radv_bvh_aabb_node); break; default: /* instances */ - intermediate_leaf_node_size = SIZEOF(radv_ir_instance_node); output_leaf_node_size = SIZEOF(radv_bvh_instance_node); break; } - uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * intermediate_leaf_node_size; + uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * SIZEOF(radv_ir_node); uint32_t dst_leaf_offset = id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node); uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * output_leaf_node_size; @@ -233,10 +170,8 @@ main() REF(radv_ir_box_node) child_node = REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset); DEREF(child_node).bvh_offset = dst_offset; } else { - uint32_t child_index = offset / intermediate_leaf_node_size; + uint32_t child_index = offset / SIZEOF(radv_ir_node); dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; - - encode_leaf_node(type, args.intermediate_bvh + offset, args.output_bvh + dst_offset); } radv_aabb child_aabb = diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp index f79ccfaf2ce..58ab8eeceec 100644 --- a/src/amd/vulkan/bvh/leaf.comp +++ b/src/amd/vulkan/bvh/leaf.comp @@ -209,7 +209,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id) vertices.vertex[i] = transform * vertices.vertex[i]; } - REF(radv_ir_triangle_node) node = REF(radv_ir_triangle_node)(dst_ptr); + REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr); bounds.min = vec3(INFINITY); bounds.max = vec3(-INFINITY); @@ -221,9 +221,6 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id) bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); } - DEREF(node).base.aabb = bounds; - DEREF(node).base.cost = 0.0; - DEREF(node).triangle_id = global_id; DEREF(node).geometry_id_and_flags = args.geometry_id; DEREF(node).id = 9; @@ -234,7 +231,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, uint32_t global_id) bool build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id) { - REF(radv_ir_aabb_node) node = REF(radv_ir_aabb_node)(dst_ptr); + REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr); for (uint32_t vec = 0; vec < 2; vec++) for (uint32_t comp = 0; comp < 3; comp++) { @@ -252,21 +249,51 @@ build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t if (isnan(bounds.min.x)) return false; - DEREF(node).base.aabb = bounds; - DEREF(node).base.cost = 0.0; DEREF(node).primitive_id = global_id; DEREF(node).geometry_id_and_flags = args.geometry_id; return true; } +radv_aabb +calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 otw_matrix) +{ + radv_aabb aabb; + for (uint32_t comp = 0; comp < 3; ++comp) { + aabb.min[comp] = otw_matrix[comp][3]; + aabb.max[comp] = otw_matrix[comp][3]; + for (uint32_t col = 0; col < 3; ++col) { + aabb.min[comp] += + min(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); + aabb.max[comp] += + max(otw_matrix[comp][col] * header.aabb.min[col], otw_matrix[comp][col] * header.aabb.max[col]); + } + } + return aabb; +} + +uint32_t +encode_sbt_offset_and_flags(uint32_t src) +{ + uint32_t flags = src >> 24; + uint32_t ret = src & 0xffffffu; + if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0) + ret |= RADV_INSTANCE_FORCE_OPAQUE; + if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0) + ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE; + if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0) + ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE; + if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0) + ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING; + return ret; +} + bool build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id) { - REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr); + REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr); AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); - DEREF(node).base_ptr = instance.accelerationStructureReference; /* An inactive instance is one whose acceleration structure handle is VK_NULL_HANDLE. Since the active terminology is * only relevant for BVH updates, which we do not implement, we can also skip instances with mask == 0. @@ -274,20 +301,23 @@ build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint3 if (instance.accelerationStructureReference == 0 || instance.custom_instance_and_mask < (1u << 24u)) return false; - DEREF(node).otw_matrix = instance.transform; - radv_accel_struct_header instance_header = DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); - bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix); + DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference + instance_header.bvh_offset); + DEREF(node).bvh_offset = instance_header.bvh_offset; + + mat4 transform = mat4(instance.transform); + mat4 inv_transform = transpose(inverse(transpose(transform))); + DEREF(node).wto_matrix = mat3x4(inv_transform); + DEREF(node).otw_matrix = mat3x4(transform); + + bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform)); DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask; - DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags; + DEREF(node).sbt_offset_and_flags = encode_sbt_offset_and_flags(instance.sbt_offset_and_flags); DEREF(node).instance_id = global_id; - DEREF(node).base.aabb = bounds; - DEREF(node).base.cost = 0.0; - return true; } @@ -303,13 +333,13 @@ main(void) uint32_t dst_stride; uint32_t node_type; if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { - dst_stride = SIZEOF(radv_ir_triangle_node); + dst_stride = SIZEOF(radv_bvh_triangle_node); node_type = radv_ir_node_triangle; } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - dst_stride = SIZEOF(radv_ir_aabb_node); + dst_stride = SIZEOF(radv_bvh_aabb_node); node_type = radv_ir_node_aabb; } else { - dst_stride = SIZEOF(radv_ir_instance_node); + dst_stride = SIZEOF(radv_bvh_instance_node); node_type = radv_ir_node_instance; } @@ -333,7 +363,14 @@ main(void) is_active = build_instance(bounds, src_ptr, dst_ptr, global_id); } - DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE; + if (is_active) { + REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id); + DEREF(ir_node).aabb = bounds; + DEREF(ir_node).cost = 0.0; + } + + uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node); + DEREF(id_ptr).id = is_active ? pack_ir_node_id(ir_offset, node_type) : RADV_BVH_INVALID_NODE; uvec4 ballot = subgroupBallot(is_active); if (subgroupElect()) diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index d37c552fe89..10f461956f6 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -157,18 +157,14 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, } uint32_t bvh_leaf_size; - uint32_t ir_leaf_size; switch (geometry_type) { case VK_GEOMETRY_TYPE_TRIANGLES_KHR: - ir_leaf_size = sizeof(struct radv_ir_triangle_node); bvh_leaf_size = sizeof(struct radv_bvh_triangle_node); break; case VK_GEOMETRY_TYPE_AABBS_KHR: - ir_leaf_size = sizeof(struct radv_ir_aabb_node); bvh_leaf_size = sizeof(struct radv_bvh_aabb_node); break; case VK_GEOMETRY_TYPE_INSTANCES_KHR: - ir_leaf_size = sizeof(struct radv_ir_instance_node); bvh_leaf_size = sizeof(struct radv_bvh_instance_node); break; default: @@ -242,7 +238,7 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count, offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space); scratch->ir_offset = offset; - offset += ir_leaf_size * leaf_count; + offset += sizeof(struct radv_ir_node) * leaf_count; scratch->internal_node_offset = offset; offset += sizeof(struct radv_ir_box_node) * internal_count; @@ -639,8 +635,11 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount, radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { + RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure); + struct leaf_args leaf_consts = { - .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, + .ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, + .bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset, .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0], };
