This patch adds recommended SDMA engines with limited XGMI SDMA engines. It will help improve overall performance for device to device copies with this optimization.
Signed-off-by: Shane Xiao <[email protected]> Suggested-by: Jonathan Kim <[email protected]> --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 42 ++++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9bbee484d57c..3835b5d96355 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1267,33 +1267,43 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, { struct kfd_node *gpu = outbound_link->gpu; struct amdgpu_device *adev = gpu->adev; - int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu); + unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu); + unsigned int sdma_eng_id_mask = (1 << num_sdma_engines) - 1; + unsigned int xgmi_sdma_eng_id_mask = + ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && - kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && - (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && + num_xgmi_nodes == 8); if (support_rec_eng) { int src_socket_id = adev->gmc.xgmi.physical_node_id; int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0; outbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift); inbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; - } else { - int num_sdma_eng = kfd_get_num_sdma_engines(gpu); - int i, eng_offset = 0; + 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift); - if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { - eng_offset = num_sdma_eng; - num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + /* If recommended engine is out of range, need to reset the mask */ + if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) { + outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; } - - for (i = 0; i < num_sdma_eng; i++) { - outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) { + inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + } + } else { + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + num_xgmi_sdma_engines && to_dev->gpu) { + outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + } else { + outbound_link->rec_sdma_eng_id_mask = sdma_eng_id_mask; + inbound_link->rec_sdma_eng_id_mask = sdma_eng_id_mask; } } } -- 2.25.1
