Module: Mesa
Branch: main
Commit: 0b55a3cf644dea132dbeb3294ead904effb2bb0f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0b55a3cf644dea132dbeb3294ead904effb2bb0f

Author: Friedrich Vock <[email protected]>
Date:   Sun Dec 17 22:45:03 2023 +0100

radv/rt: Acceleration structure updates

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26729>

---

 src/amd/vulkan/bvh/build_interface.h         |  10 ++
 src/amd/vulkan/bvh/meson.build               |   5 +
 src/amd/vulkan/bvh/update.comp               | 163 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_acceleration_structure.c | 154 ++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h                |   2 +
 5 files changed, 317 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/bvh/build_interface.h 
b/src/amd/vulkan/bvh/build_interface.h
index 09c49230537..c89686ffd4e 100644
--- a/src/amd/vulkan/bvh/build_interface.h
+++ b/src/amd/vulkan/bvh/build_interface.h
@@ -118,4 +118,14 @@ struct header_args {
    uint32_t instance_count;
 };
 
+struct update_args {
+   REF(radv_accel_struct_header) src;
+   REF(radv_accel_struct_header) dst;
+   REF(radv_aabb) leaf_bounds;
+   REF(uint32_t) internal_ready_count;
+   uint32_t leaf_node_count;
+
+   radv_bvh_geometry_data geom_data;
+};
+
 #endif
diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build
index 17af9fba496..017a0bfc28e 100644
--- a/src/amd/vulkan/bvh/meson.build
+++ b/src/amd/vulkan/bvh/meson.build
@@ -65,6 +65,11 @@ bvh_shaders = [
     'ploc_internal',
     [],
   ],
+  [
+    'update.comp',
+    'update',
+    [],
+  ],
 ]
 
 bvh_include_dir = dir_source_root + '/src/amd/vulkan/bvh'
diff --git a/src/amd/vulkan/bvh/update.comp b/src/amd/vulkan/bvh/update.comp
new file mode 100644
index 00000000000..905f807ebe6
--- /dev/null
+++ b/src/amd/vulkan/bvh/update.comp
@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2023 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_KHR_memory_scope_semantics : require
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+#include "build_interface.h"
+
+layout(push_constant) uniform CONSTS {
+    update_args args;
+};
+
+uint32_t fetch_parent_node(VOID_REF bvh, uint32_t node)
+{
+    uint64_t addr = bvh - node / 8 * 4 - 4;
+    return DEREF(REF(uint32_t)(addr));
+}
+
+void main() {
+    uint32_t bvh_offset = DEREF(args.src).bvh_offset;
+
+    VOID_REF src_bvh = OFFSET(args.src, bvh_offset);
+    VOID_REF dst_bvh = OFFSET(args.dst, bvh_offset);
+
+    uint32_t leaf_node_size;
+    if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
+        leaf_node_size = SIZEOF(radv_bvh_triangle_node);
+    else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR)
+        leaf_node_size = SIZEOF(radv_bvh_aabb_node);
+    else
+        leaf_node_size = SIZEOF(radv_bvh_instance_node);
+
+    uint32_t leaf_node_id = args.geom_data.first_id + gl_GlobalInvocationID.x;
+    uint32_t first_leaf_offset = id_to_offset(RADV_BVH_ROOT_NODE) + 
SIZEOF(radv_bvh_box32_node);
+
+    uint32_t dst_offset = leaf_node_id * leaf_node_size + first_leaf_offset;
+    VOID_REF dst_ptr = OFFSET(dst_bvh, dst_offset);
+    uint32_t src_offset = gl_GlobalInvocationID.x * args.geom_data.stride;
+
+    radv_aabb bounds;
+    bool is_active;
+    if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
+        is_active = build_triangle(bounds, dst_ptr, args.geom_data, 
gl_GlobalInvocationID.x);
+    } else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) {
+        VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
+        is_active = build_aabb(bounds, src_ptr, dst_ptr, 
args.geom_data.geometry_id, gl_GlobalInvocationID.x);
+    } else {
+        VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
+        /* arrayOfPointers */
+        if (args.geom_data.stride == 8) {
+            src_ptr = DEREF(REF(VOID_REF)(src_ptr));
+        }
+
+        is_active = build_instance(bounds, src_ptr, dst_ptr, 
gl_GlobalInvocationID.x);
+    }
+
+    if (!is_active)
+        return;
+
+    DEREF(INDEX(radv_aabb, args.leaf_bounds, leaf_node_id)) = bounds;
+    memoryBarrier(gl_ScopeDevice,
+        gl_StorageSemanticsBuffer,
+        gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | 
gl_SemanticsMakeVisible);
+
+    uint32_t node_id = pack_node_id(dst_offset, 0);
+    uint32_t parent_id = fetch_parent_node(src_bvh, node_id);
+    uint32_t internal_nodes_offset = first_leaf_offset + args.leaf_node_count 
* leaf_node_size;
+    while (parent_id != RADV_BVH_INVALID_NODE) {
+        uint32_t offset = id_to_offset(parent_id);
+
+        uint32_t parent_index = (offset - internal_nodes_offset) / 
SIZEOF(radv_bvh_box32_node) + 1;
+        if (parent_id == RADV_BVH_ROOT_NODE)
+            parent_index = 0;
+
+        /* Make accesses to internal nodes in dst_bvh available and visible */
+        memoryBarrier(gl_ScopeDevice,
+                      gl_StorageSemanticsBuffer,
+                      gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | 
gl_SemanticsMakeVisible);
+
+        radv_bvh_box32_node node = 
DEREF(REF(radv_bvh_box32_node)OFFSET(src_bvh, offset));
+        uint32_t valid_child_count = 0;
+        for (uint32_t i = 0; i < 4; ++valid_child_count, ++i)
+            if (node.children[i] == RADV_BVH_INVALID_NODE)
+                break;
+
+        /* Check if all children have been processed. As this is an atomic the 
last path coming from
+         * a child will pass here, while earlier paths break.
+         */
+        uint32_t ready_child_count = atomicAdd(
+            DEREF(INDEX(uint32_t, args.internal_ready_count, parent_index)), 
1, gl_ScopeDevice,
+            gl_StorageSemanticsBuffer,
+            gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | 
gl_SemanticsMakeVisible);
+
+        if (ready_child_count != valid_child_count - 1)
+            break;
+
+        for (uint32_t i = 0; i < valid_child_count; ++i) {
+            uint32_t child_offset = id_to_offset(node.children[i]);
+            if (child_offset == dst_offset)
+                node.coords[i] = bounds;
+            else if (child_offset >= internal_nodes_offset) {
+                radv_aabb child_bounds = radv_aabb(vec3(INFINITY), 
vec3(-INFINITY));
+                radv_bvh_box32_node child_node = 
DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, child_offset));
+                for (uint32_t j = 0; j < 4; ++j) {
+                    if (child_node.children[j] == RADV_BVH_INVALID_NODE)
+                        break;
+                    child_bounds.min = min(child_bounds.min, 
child_node.coords[j].min);
+                    child_bounds.max = max(child_bounds.max, 
child_node.coords[j].max);
+                }
+                node.coords[i] = child_bounds;
+            } else {
+                uint32_t child_index = (child_offset - first_leaf_offset) / 
leaf_node_size;
+                node.coords[i] = DEREF(INDEX(radv_aabb, args.leaf_bounds, 
child_index));
+            }
+        }
+
+        DEREF(REF(radv_bvh_box32_node)OFFSET(dst_bvh, offset)) = node;
+
+        if (parent_id == RADV_BVH_ROOT_NODE) {
+            radv_aabb root_bounds = radv_aabb(vec3(INFINITY), vec3(-INFINITY));
+            for (uint32_t i = 0; i < valid_child_count; ++i) {
+                root_bounds.min = min(root_bounds.min, node.coords[i].min);
+                root_bounds.max = max(root_bounds.max, node.coords[i].max);
+            }
+            DEREF(args.dst).aabb = root_bounds;
+        }
+
+        parent_id = fetch_parent_node(src_bvh, parent_id);
+    }
+}
diff --git a/src/amd/vulkan/radv_acceleration_structure.c 
b/src/amd/vulkan/radv_acceleration_structure.c
index 1b1dfcd5d83..0b2b82a660b 100644
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -73,12 +73,17 @@ static const uint32_t header_spv[] = {
 #include "bvh/header.spv.h"
 };
 
+static const uint32_t update_spv[] = {
+#include "bvh/update.spv.h"
+};
+
 #define KEY_ID_PAIR_SIZE 8
 #define MORTON_BIT_SIZE  24
 
 enum internal_build_type {
    INTERNAL_BUILD_TYPE_LBVH,
    INTERNAL_BUILD_TYPE_PLOC,
+   INTERNAL_BUILD_TYPE_UPDATE,
 };
 
 struct build_config {
@@ -96,9 +101,16 @@ struct acceleration_structure_layout {
 
 struct scratch_layout {
    uint32_t size;
+   uint32_t update_size;
 
    uint32_t header_offset;
 
+   /* Used for UPDATE only. */
+
+   uint32_t internal_ready_count_offset;
+
+   /* Used for BUILD only. */
+
    uint32_t sort_buffer_offset[2];
    uint32_t sort_internal_offset;
 
@@ -124,6 +136,10 @@ build_config(uint32_t leaf_count, const 
VkAccelerationStructureBuildGeometryInfo
    else
       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
 
+   if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
+       build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
+      config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
+
    if (build_info->flags & 
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
       config.compact = true;
 
@@ -234,6 +250,18 @@ get_build_layout(struct radv_device *device, uint32_t 
leaf_count,
       offset += sizeof(struct radv_ir_box_node) * internal_count;
 
       scratch->size = offset;
+
+      if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) 
{
+         uint32_t update_offset = 0;
+
+         update_offset += sizeof(radv_aabb) * leaf_count;
+         scratch->internal_ready_count_offset = update_offset;
+
+         update_offset += sizeof(uint32_t) * internal_count;
+         scratch->update_size = update_offset;
+      } else {
+         scratch->update_size = offset;
+      }
    }
 }
 
@@ -260,7 +288,7 @@ radv_GetAccelerationStructureBuildSizesKHR(VkDevice 
_device, VkAccelerationStruc
    get_build_layout(device, leaf_count, pBuildInfo, &accel_struct, &scratch);
 
    pSizeInfo->accelerationStructureSize = accel_struct.size;
-   pSizeInfo->updateScratchSize = scratch.size;
+   pSizeInfo->updateScratchSize = scratch.update_size;
    pSizeInfo->buildScratchSize = scratch.size;
 }
 
@@ -305,6 +333,7 @@ radv_device_finish_accel_struct_build_state(struct 
radv_device *device)
                         &state->alloc);
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.header_pipeline, &state->alloc);
    radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.morton_pipeline, &state->alloc);
+   radv_DestroyPipeline(radv_device_to_handle(device), 
state->accel_struct_build.update_pipeline, &state->alloc);
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.copy_p_layout, &state->alloc);
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.ploc_p_layout, &state->alloc);
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.lbvh_generate_ir_p_layout,
@@ -315,6 +344,7 @@ radv_device_finish_accel_struct_build_state(struct 
radv_device *device)
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.encode_p_layout, &state->alloc);
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.header_p_layout, &state->alloc);
    radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.morton_p_layout, &state->alloc);
+   radv_DestroyPipelineLayout(radv_device_to_handle(device), 
state->accel_struct_build.update_p_layout, &state->alloc);
 
    if (state->accel_struct_build.radix_sort)
       radix_sort_vk_destroy(state->accel_struct_build.radix_sort, 
radv_device_to_handle(device), &state->alloc);
@@ -558,6 +588,12 @@ radv_device_init_accel_struct_build_state(struct 
radv_device *device)
    if (result != VK_SUCCESS)
       goto exit;
 
+   result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), 
sizeof(struct update_args),
+                                      
&device->meta_state.accel_struct_build.update_pipeline,
+                                      
&device->meta_state.accel_struct_build.update_p_layout);
+   if (result != VK_SUCCESS)
+      goto exit;
+
    device->meta_state.accel_struct_build.radix_sort =
       radv_create_radix_sort_u64(radv_device_to_handle(device), 
&device->meta_state.alloc, device->meta_state.cache);
 exit:
@@ -673,6 +709,9 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
                         
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
    for (uint32_t i = 0; i < infoCount; ++i) {
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
+
       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, 
pInfos[i].dstAccelerationStructure);
 
       struct leaf_args leaf_consts = {
@@ -712,6 +751,8 @@ morton_generate(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
                         
cmd_buffer->device->meta_state.accel_struct_build.morton_pipeline);
 
    for (uint32_t i = 0; i < infoCount; ++i) {
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
       const struct morton_args consts = {
          .bvh = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.ir_offset,
          .header = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.header_offset,
@@ -795,6 +836,8 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    for (uint32_t i = 0; i < infoCount; ++i) {
       if (!bvh_states[i].node_count)
          continue;
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
 
       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_buffer_offset[0];
       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_internal_offset;
@@ -843,6 +886,8 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    for (uint32_t i = 0; i < infoCount; ++i) {
       if (!bvh_states[i].node_count)
          continue;
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
 
       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_buffer_offset[0];
       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_internal_offset;
@@ -872,6 +917,8 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    for (uint32_t i = 0; i < infoCount; ++i) {
       if (!bvh_states[i].node_count)
          continue;
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
 
       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.sort_internal_offset;
 
@@ -920,6 +967,8 @@ morton_sort(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
       for (uint32_t i = 0; i < infoCount; i++) {
          if (!bvh_states[i].node_count)
             continue;
+         if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+            continue;
 
          bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * 
RS_RADIX_LOG2;
 
@@ -1041,6 +1090,8 @@ encode_nodes(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
    for (uint32_t i = 0; i < infoCount; ++i) {
       if (compact != bvh_states[i].config.compact)
          continue;
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
 
       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, 
pInfos[i].dstAccelerationStructure);
 
@@ -1093,6 +1144,8 @@ init_header(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
                         
cmd_buffer->device->meta_state.accel_struct_build.header_pipeline);
 
    for (uint32_t i = 0; i < infoCount; ++i) {
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, 
pInfos[i].dstAccelerationStructure);
       size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
 
@@ -1146,6 +1199,8 @@ init_geometry_infos(VkCommandBuffer commandBuffer, 
uint32_t infoCount,
                     const VkAccelerationStructureBuildRangeInfoKHR *const 
*ppBuildRangeInfos)
 {
    for (uint32_t i = 0; i < infoCount; ++i) {
+      if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
       RADV_FROM_HANDLE(vk_acceleration_structure, accel_struct, 
pInfos[i].dstAccelerationStructure);
 
       uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct 
radv_accel_struct_geometry_info);
@@ -1170,6 +1225,51 @@ init_geometry_infos(VkCommandBuffer commandBuffer, 
uint32_t infoCount,
    }
 }
 
+static void
+update(VkCommandBuffer commandBuffer, uint32_t infoCount, const 
VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
+       const VkAccelerationStructureBuildRangeInfoKHR *const 
*ppBuildRangeInfos, struct bvh_state *bvh_states)
+{
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+   radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+                        
cmd_buffer->device->meta_state.accel_struct_build.update_pipeline);
+   for (uint32_t i = 0; i < infoCount; ++i) {
+      if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE)
+         continue;
+
+      uint32_t leaf_node_count = 0;
+      for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
+         leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
+      }
+
+      VK_FROM_HANDLE(vk_acceleration_structure, src_bvh, 
pInfos[i].srcAccelerationStructure);
+      VK_FROM_HANDLE(vk_acceleration_structure, dst_bvh, 
pInfos[i].dstAccelerationStructure);
+      struct update_args update_consts = {
+         .src = vk_acceleration_structure_get_va(src_bvh),
+         .dst = vk_acceleration_structure_get_va(dst_bvh),
+         .leaf_bounds = pInfos[i].scratchData.deviceAddress,
+         .internal_ready_count =
+            pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.internal_ready_count_offset,
+         .leaf_node_count = leaf_node_count,
+      };
+
+      for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
+         const VkAccelerationStructureGeometryKHR *geom =
+            pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : 
pInfos[i].ppGeometries[j];
+
+         const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = 
&ppBuildRangeInfos[i][j];
+
+         update_consts.geom_data = fill_geometry_data(pInfos[i].type, 
&bvh_states[i], j, geom, build_range_info);
+
+         vk_common_CmdPushConstants(commandBuffer, 
cmd_buffer->device->meta_state.accel_struct_build.update_p_layout,
+                                    VK_SHADER_STAGE_COMPUTE_BIT, 0, 
sizeof(update_consts), &update_consts);
+         radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 
1, 1);
+
+         bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
+         bvh_states[i].node_count += build_range_info->primitiveCount;
+      }
+   }
+}
+
 VKAPI_ATTR void VKAPI_CALL
 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t 
infoCount,
                                        const 
VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
@@ -1203,23 +1303,41 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer 
commandBuffer, uint32_t i
                        &bvh_states[i].scratch);
       bvh_states[i].config = build_config(leaf_node_count, pInfos + i);
 
-      /* The internal node count is updated in lbvh_build_internal for LBVH
-       * and from the PLOC shader for PLOC. */
-      struct radv_ir_header header = {
-         .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
-         .max_bounds = {0x80000000, 0x80000000, 0x80000000},
-         .dispatch_size_y = 1,
-         .dispatch_size_z = 1,
-         .sync_data =
-            {
-               .current_phase_end_counter = TASK_INDEX_INVALID,
-               /* Will be updated by the first PLOC shader invocation */
-               .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
-            },
-      };
+      if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
+         /* The internal node count is updated in lbvh_build_internal for LBVH
+          * and from the PLOC shader for PLOC. */
+         struct radv_ir_header header = {
+            .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
+            .max_bounds = {0x80000000, 0x80000000, 0x80000000},
+            .dispatch_size_y = 1,
+            .dispatch_size_z = 1,
+            .sync_data =
+               {
+                  .current_phase_end_counter = TASK_INDEX_INVALID,
+                  /* Will be updated by the first PLOC shader invocation */
+                  .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
+               },
+         };
 
-      radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.header_offset,
-                            &header, sizeof(header));
+         radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress 
+ bvh_states[i].scratch.header_offset,
+                               &header, sizeof(header));
+      } else {
+         /* Prepare ready counts for internal nodes */
+         radv_fill_buffer(cmd_buffer, NULL, NULL,
+                          pInfos[i].scratchData.deviceAddress + 
bvh_states[i].scratch.internal_ready_count_offset,
+                          bvh_states[i].scratch.update_size - 
bvh_states[i].scratch.internal_ready_count_offset, 0x0);
+         if (pInfos[i].srcAccelerationStructure != 
pInfos[i].dstAccelerationStructure) {
+            VK_FROM_HANDLE(vk_acceleration_structure, src_as, 
pInfos[i].srcAccelerationStructure);
+            VK_FROM_HANDLE(vk_acceleration_structure, dst_as, 
pInfos[i].dstAccelerationStructure);
+
+            RADV_FROM_HANDLE(radv_buffer, src_as_buffer, src_as->buffer);
+            RADV_FROM_HANDLE(radv_buffer, dst_as_buffer, dst_as->buffer);
+
+            /* Copy header/metadata */
+            radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, 
src_as_buffer->offset + src_as->offset,
+                             dst_as_buffer->offset + dst_as->offset, 
bvh_states[i].accel_struct.bvh_offset);
+         }
+      }
    }
 
    build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, 
bvh_states, flush_bits);
@@ -1246,6 +1364,8 @@ radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer 
commandBuffer, uint32_t i
    if (cmd_buffer->device->rra_trace.accel_structs)
       init_geometry_infos(commandBuffer, infoCount, pInfos, bvh_states, 
ppBuildRangeInfos);
 
+   update(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states);
+
    free(bvh_states);
    radv_meta_restore(&saved_state, cmd_buffer);
 }
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 367eba65632..a73d278f643 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -662,6 +662,8 @@ struct radv_meta_state {
       VkPipeline encode_compact_pipeline;
       VkPipelineLayout header_p_layout;
       VkPipeline header_pipeline;
+      VkPipelineLayout update_p_layout;
+      VkPipeline update_pipeline;
       VkPipelineLayout copy_p_layout;
       VkPipeline copy_pipeline;
 

Reply via email to