Module: Mesa
Branch: main
Commit: 662f86c53384d44bc296aa6a3bc8b35c6e59d86c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=662f86c53384d44bc296aa6a3bc8b35c6e59d86c

Author: Konstantin Seurer <[email protected]>
Date:   Sat Dec 16 17:09:52 2023 +0100

radv/bvh: Stop emitting leaf nodes inside the encoder

Avoids unnecessary copies.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26725>

---

 src/amd/vulkan/bvh/build_helpers.h           | 22 --------
 src/amd/vulkan/bvh/build_interface.h         |  1 +
 src/amd/vulkan/bvh/bvh.h                     | 24 ---------
 src/amd/vulkan/bvh/encode.comp               | 69 +------------------------
 src/amd/vulkan/bvh/leaf.comp                 | 77 ++++++++++++++++++++--------
 src/amd/vulkan/radv_acceleration_structure.c | 11 ++--
 6 files changed, 65 insertions(+), 139 deletions(-)

diff --git a/src/amd/vulkan/bvh/build_helpers.h 
b/src/amd/vulkan/bvh/build_helpers.h
index 30d312716f1..436faba7038 100644
--- a/src/amd/vulkan/bvh/build_helpers.h
+++ b/src/amd/vulkan/bvh/build_helpers.h
@@ -240,9 +240,6 @@ TYPE(radv_bvh_box32_node, 4);
 TYPE(radv_ir_header, 4);
 TYPE(radv_ir_node, 4);
 TYPE(radv_ir_box_node, 4);
-TYPE(radv_ir_triangle_node, 4);
-TYPE(radv_ir_aabb_node, 4);
-TYPE(radv_ir_instance_node, 8);
 
 TYPE(radv_global_sync_data, 4);
 
@@ -313,25 +310,6 @@ ir_type_to_bvh_type(uint32_t type)
    return RADV_BVH_INVALID_NODE;
 }
 
-radv_aabb
-calculate_instance_node_bounds(uint64_t base_ptr, mat3x4 otw_matrix)
-{
-   radv_aabb aabb;
-   radv_accel_struct_header header = 
DEREF(REF(radv_accel_struct_header)(base_ptr));
-
-   for (uint32_t comp = 0; comp < 3; ++comp) {
-      aabb.min[comp] = otw_matrix[comp][3];
-      aabb.max[comp] = otw_matrix[comp][3];
-      for (uint32_t col = 0; col < 3; ++col) {
-         aabb.min[comp] +=
-            min(otw_matrix[comp][col] * header.aabb.min[col], 
otw_matrix[comp][col] * header.aabb.max[col]);
-         aabb.max[comp] +=
-            max(otw_matrix[comp][col] * header.aabb.min[col], 
otw_matrix[comp][col] * header.aabb.max[col]);
-      }
-   }
-   return aabb;
-}
-
 float
 aabb_surface_area(radv_aabb aabb)
 {
diff --git a/src/amd/vulkan/bvh/build_interface.h 
b/src/amd/vulkan/bvh/build_interface.h
index fe988711370..2ee640d9cc5 100644
--- a/src/amd/vulkan/bvh/build_interface.h
+++ b/src/amd/vulkan/bvh/build_interface.h
@@ -33,6 +33,7 @@
 #endif
 
 struct leaf_args {
+   VOID_REF ir;
    VOID_REF bvh;
    REF(radv_ir_header) header;
    REF(key_id_pair) ids;
diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h
index 3687ccabe4e..52e06ec5db3 100644
--- a/src/amd/vulkan/bvh/bvh.h
+++ b/src/amd/vulkan/bvh/bvh.h
@@ -120,30 +120,6 @@ struct radv_ir_box_node {
    uint32_t bvh_offset;
 };
 
-struct radv_ir_aabb_node {
-   radv_ir_node base;
-   uint32_t primitive_id;
-   uint32_t geometry_id_and_flags;
-};
-
-struct radv_ir_triangle_node {
-   radv_ir_node base;
-   float coords[3][3];
-   uint32_t triangle_id;
-   uint32_t id;
-   uint32_t geometry_id_and_flags;
-};
-
-struct radv_ir_instance_node {
-   radv_ir_node base;
-   /* See radv_bvh_instance_node */
-   uint64_t base_ptr;
-   uint32_t custom_instance_and_mask;
-   uint32_t sbt_offset_and_flags;
-   mat3x4 otw_matrix;
-   uint32_t instance_id;
-};
-
 struct radv_global_sync_data {
    uint32_t task_counts[2];
    uint32_t task_started_counter;
diff --git a/src/amd/vulkan/bvh/encode.comp b/src/amd/vulkan/bvh/encode.comp
index fd06ada6516..12c03e179df 100644
--- a/src/amd/vulkan/bvh/encode.comp
+++ b/src/amd/vulkan/bvh/encode.comp
@@ -50,89 +50,26 @@ void set_parent(uint32_t child, uint32_t parent)
    DEREF(REF(uint32_t)(addr)) = parent;
 }
 
-uint32_t
-encode_sbt_offset_and_flags(uint32_t src)
-{
-   uint32_t flags = src >> 24;
-   uint32_t ret = src & 0xffffffu;
-   if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
-      ret |= RADV_INSTANCE_FORCE_OPAQUE;
-   if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
-      ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
-   if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 
0)
-      ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
-   if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
-      ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
-   return ret;
-}
-
-void
-encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node)
-{
-   switch (type) {
-   case radv_ir_node_triangle: {
-      radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node));
-      REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node);
-
-      DEREF(dst).coords = src.coords;
-      DEREF(dst).triangle_id = src.triangle_id;
-      DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
-      DEREF(dst).id = src.id;
-      break;
-   }
-   case radv_ir_node_aabb: {
-      radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node));
-      REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node);
-
-      DEREF(dst).primitive_id = src.primitive_id;
-      DEREF(dst).geometry_id_and_flags = src.geometry_id_and_flags;
-      break;
-   }
-   case radv_ir_node_instance: {
-      radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node));
-      REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node);
-      uint32_t bvh_offset = 
DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
-
-      DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
-      DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
-      DEREF(dst).sbt_offset_and_flags = 
encode_sbt_offset_and_flags(src.sbt_offset_and_flags);
-      DEREF(dst).instance_id = src.instance_id;
-      DEREF(dst).bvh_offset = bvh_offset;
-
-      mat4 transform = mat4(src.otw_matrix);
-
-      mat4 inv_transform = transpose(inverse(transpose(transform)));
-      DEREF(dst).wto_matrix = mat3x4(inv_transform);
-      DEREF(dst).otw_matrix = mat3x4(transform);
-      break;
-   }
-   }
-}
-
 void
 main()
 {
    /* Revert the order so we start at the root */
    uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - 
gl_GlobalInvocationID.x;
 
-   uint32_t intermediate_leaf_node_size;
    uint32_t output_leaf_node_size;
    switch (args.geometry_type) {
    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
-      intermediate_leaf_node_size = SIZEOF(radv_ir_triangle_node);
       output_leaf_node_size = SIZEOF(radv_bvh_triangle_node);
       break;
    case VK_GEOMETRY_TYPE_AABBS_KHR:
-      intermediate_leaf_node_size = SIZEOF(radv_ir_aabb_node);
       output_leaf_node_size = SIZEOF(radv_bvh_aabb_node);
       break;
    default: /* instances */
-      intermediate_leaf_node_size = SIZEOF(radv_ir_instance_node);
       output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
       break;
    }
 
-   uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * 
intermediate_leaf_node_size;
+   uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * 
SIZEOF(radv_ir_node);
    uint32_t dst_leaf_offset =
       id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
    uint32_t dst_internal_offset = dst_leaf_offset + args.leaf_node_count * 
output_leaf_node_size;
@@ -233,10 +170,8 @@ main()
             REF(radv_ir_box_node) child_node = 
REF(radv_ir_box_node)OFFSET(args.intermediate_bvh, offset);
             DEREF(child_node).bvh_offset = dst_offset;
          } else {
-            uint32_t child_index = offset / intermediate_leaf_node_size;
+            uint32_t child_index = offset / SIZEOF(radv_ir_node);
             dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
-
-            encode_leaf_node(type, args.intermediate_bvh + offset, 
args.output_bvh + dst_offset);
          }
 
          radv_aabb child_aabb =
diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp
index f79ccfaf2ce..58ab8eeceec 100644
--- a/src/amd/vulkan/bvh/leaf.comp
+++ b/src/amd/vulkan/bvh/leaf.comp
@@ -209,7 +209,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, 
uint32_t global_id)
          vertices.vertex[i] = transform * vertices.vertex[i];
    }
 
-   REF(radv_ir_triangle_node) node = REF(radv_ir_triangle_node)(dst_ptr);
+   REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr);
 
    bounds.min = vec3(INFINITY);
    bounds.max = vec3(-INFINITY);
@@ -221,9 +221,6 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, 
uint32_t global_id)
          bounds.max[comp] = max(bounds.max[comp], 
vertices.vertex[coord][comp]);
       }
 
-   DEREF(node).base.aabb = bounds;
-   DEREF(node).base.cost = 0.0;
-
    DEREF(node).triangle_id = global_id;
    DEREF(node).geometry_id_and_flags = args.geometry_id;
    DEREF(node).id = 9;
@@ -234,7 +231,7 @@ build_triangle(inout radv_aabb bounds, VOID_REF dst_ptr, 
uint32_t global_id)
 bool
 build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, 
uint32_t global_id)
 {
-   REF(radv_ir_aabb_node) node = REF(radv_ir_aabb_node)(dst_ptr);
+   REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr);
 
    for (uint32_t vec = 0; vec < 2; vec++)
       for (uint32_t comp = 0; comp < 3; comp++) {
@@ -252,21 +249,51 @@ build_aabb(inout radv_aabb bounds, VOID_REF src_ptr, 
VOID_REF dst_ptr, uint32_t
    if (isnan(bounds.min.x))
       return false;
 
-   DEREF(node).base.aabb = bounds;
-   DEREF(node).base.cost = 0.0;
    DEREF(node).primitive_id = global_id;
    DEREF(node).geometry_id_and_flags = args.geometry_id;
 
    return true;
 }
 
+radv_aabb
+calculate_instance_node_bounds(radv_accel_struct_header header, mat3x4 
otw_matrix)
+{
+   radv_aabb aabb;
+   for (uint32_t comp = 0; comp < 3; ++comp) {
+      aabb.min[comp] = otw_matrix[comp][3];
+      aabb.max[comp] = otw_matrix[comp][3];
+      for (uint32_t col = 0; col < 3; ++col) {
+         aabb.min[comp] +=
+            min(otw_matrix[comp][col] * header.aabb.min[col], 
otw_matrix[comp][col] * header.aabb.max[col]);
+         aabb.max[comp] +=
+            max(otw_matrix[comp][col] * header.aabb.min[col], 
otw_matrix[comp][col] * header.aabb.max[col]);
+      }
+   }
+   return aabb;
+}
+
+uint32_t
+encode_sbt_offset_and_flags(uint32_t src)
+{
+   uint32_t flags = src >> 24;
+   uint32_t ret = src & 0xffffffu;
+   if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
+      ret |= RADV_INSTANCE_FORCE_OPAQUE;
+   if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
+      ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
+   if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 
0)
+      ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
+   if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
+      ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
+   return ret;
+}
+
 bool
 build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, 
uint32_t global_id)
 {
-   REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr);
+   REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr);
 
    AccelerationStructureInstance instance = 
DEREF(REF(AccelerationStructureInstance)(src_ptr));
-   DEREF(node).base_ptr = instance.accelerationStructureReference;
 
    /* An inactive instance is one whose acceleration structure handle is 
VK_NULL_HANDLE. Since the active terminology is
     * only relevant for BVH updates, which we do not implement, we can also 
skip instances with mask == 0.
@@ -274,20 +301,23 @@ build_instance(inout radv_aabb bounds, VOID_REF src_ptr, 
VOID_REF dst_ptr, uint3
    if (instance.accelerationStructureReference == 0 || 
instance.custom_instance_and_mask < (1u << 24u))
       return false;
 
-   DEREF(node).otw_matrix = instance.transform;
-
    radv_accel_struct_header instance_header =
       
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
 
-   bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, 
DEREF(node).otw_matrix);
+   DEREF(node).bvh_ptr = addr_to_node(instance.accelerationStructureReference 
+ instance_header.bvh_offset);
+   DEREF(node).bvh_offset = instance_header.bvh_offset;
+
+   mat4 transform = mat4(instance.transform);
+   mat4 inv_transform = transpose(inverse(transpose(transform)));
+   DEREF(node).wto_matrix = mat3x4(inv_transform);
+   DEREF(node).otw_matrix = mat3x4(transform);
+
+   bounds = calculate_instance_node_bounds(instance_header, mat3x4(transform));
 
    DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask;
-   DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
+   DEREF(node).sbt_offset_and_flags = 
encode_sbt_offset_and_flags(instance.sbt_offset_and_flags);
    DEREF(node).instance_id = global_id;
 
-   DEREF(node).base.aabb = bounds;
-   DEREF(node).base.cost = 0.0;
-
    return true;
 }
 
@@ -303,13 +333,13 @@ main(void)
    uint32_t dst_stride;
    uint32_t node_type;
    if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
-      dst_stride = SIZEOF(radv_ir_triangle_node);
+      dst_stride = SIZEOF(radv_bvh_triangle_node);
       node_type = radv_ir_node_triangle;
    } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
-      dst_stride = SIZEOF(radv_ir_aabb_node);
+      dst_stride = SIZEOF(radv_bvh_aabb_node);
       node_type = radv_ir_node_aabb;
    } else {
-      dst_stride = SIZEOF(radv_ir_instance_node);
+      dst_stride = SIZEOF(radv_bvh_instance_node);
       node_type = radv_ir_node_instance;
    }
 
@@ -333,7 +363,14 @@ main(void)
       is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
    }
 
-   DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : 
RADV_BVH_INVALID_NODE;
+   if (is_active) {
+      REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
+      DEREF(ir_node).aabb = bounds;
+      DEREF(ir_node).cost = 0.0;
+   }
+
+   uint32_t ir_offset = primitive_id * SIZEOF(radv_ir_node);
+   DEREF(id_ptr).id = is_active ? pack_ir_node_id(ir_offset, node_type) : 
RADV_BVH_INVALID_NODE;
 
    uvec4 ballot = subgroupBallot(is_active);
    if (subgroupElect())
diff --git a/src/amd/vulkan/radv_acceleration_structure.c 
b/src/amd/vulkan/radv_acceleration_structure.c
index d37c552fe89..10f461956f6 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -157,18 +157,14 @@ get_build_layout(struct radv_device *device, uint32_t 
leaf_count,
    }
 
    uint32_t bvh_leaf_size;
-   uint32_t ir_leaf_size;
    switch (geometry_type) {
    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
-      ir_leaf_size = sizeof(struct radv_ir_triangle_node);
       bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
       break;
    case VK_GEOMETRY_TYPE_AABBS_KHR:
-      ir_leaf_size = sizeof(struct radv_ir_aabb_node);
       bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
       break;
    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
-      ir_leaf_size = sizeof(struct radv_ir_instance_node);
       bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
       break;
    default:
@@ -242,7 +238,7 @@ get_build_layout(struct radv_device *device, uint32_t 
leaf_count,
       offset += MAX3(requirements.internal_size, ploc_scratch_space, 
lbvh_node_space);
 
       scratch->ir_offset = offset;
-      offset += ir_leaf_size * leaf_count;
+      offset += sizeof(struct radv_ir_node) * leaf_count;
 
       scratch->internal_node_offset = offset;
       offset += sizeof(struct radv_ir_box_node) * internal_count;
@@ -639,8 +635,11 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
                         
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
    for (uint32_t i = 0; i < infoCount; ++i) {
+      RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, 
pInfos[i].dstAccelerationStructure);
+
       struct leaf_args leaf_consts = {
-         .bvh = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.ir_offset,
+         .ir = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.ir_offset,
+         .bvh = vk_acceleration_structure_get_va(accel_struct) + 
bvh_states[i].accel_struct.leaf_nodes_offset,
          .header = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.header_offset,
          .ids = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_buffer_offset[0],
       };

Reply via email to