On 10/29/2022 4:17 AM, Felix Kuehling wrote:
> On 2022-10-27 04:14, Ma Jun wrote:
>> For some GPUs with more CUs, the original sibling_map[32]
>>
>> in struct crat_subtype_cache is not enough
>>
>> to save the cache information when create the VCRAT table,
>>
>> so skip filling the struct crat_subtype_cache info instead
>>
>> fill struct kfd_cache_properties directly to fix this problem.
>>
>> v3:
>> - Drop processor id calc function
>> v2:
>> - Remove unnecessary sys interface "cache_ext"
>>
>> Signed-off-by: Ma Jun <[email protected]>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c     | 307 +++-------------------
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.h     |  12 +
>>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 238 ++++++++++++++++-
>>   drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   5 +-
>>   4 files changed, 278 insertions(+), 284 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index d25ac9cbe5b2..8b7e34b45740 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> [snip]
>> +int get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info 
>> **pcache_info)
>>   {
>> -    struct kfd_gpu_cache_info *pcache_info;
>>      struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
>>      int num_of_cache_types = 0;
>> -    int i, j, k;
>> -    int ct = 0;
>> -    int mem_available = available_size;
>> -    unsigned int cu_processor_id;
>> -    int ret;
>> -    unsigned int num_cu_shared;
>>   
>>      switch (kdev->adev->asic_type) {
> [snip]
>>
>>      default:
>>              switch (KFD_GC_VERSION(kdev)) {
> [snip]
>>              case IP_VERSION(11, 0, 0):
>>              case IP_VERSION(11, 0, 1):
>>              case IP_VERSION(11, 0, 2):
>>              case IP_VERSION(11, 0, 3):
>> -                    pcache_info = cache_info;
>> +                    *pcache_info = cache_info;
> 
> This won't work. cache_info is a local variable. It will be out of scope 
> as soon as this function returns. You'll need to allocate this in some 
> data structure that will persist after the function returns. Maybe 
> expect the caller to pass in a pointer to an array in their own stack frame.

Yes, this is my mistake. Will fix in next version

> 
> 
>>                      num_of_cache_types =
>> -                            kfd_fill_gpu_cache_info_from_gfx_config(kdev, 
>> pcache_info);
>> +                            kfd_fill_gpu_cache_info_from_gfx_config(kdev, 
>> *pcache_info);
> [snip]
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> index e0680d265a66..dc231e248258 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> 
> [snip]
> 
>  > int kfd_topology_add_device(struct kfd_dev *gpu)
>>   {
>>      uint32_t gpu_id;
>> @@ -1759,6 +1970,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>>                      topology_crat_proximity_domain--;
>>                      return res;
>>              }
>> +
>>              res = kfd_parse_crat_table(crat_image,
>>                                         &temp_topology_device_list,
>>                                         proximity_domain);
>> @@ -1771,23 +1983,31 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>>   
>>              kfd_topology_update_device_list(&temp_topology_device_list,
>>                      &topology_device_list);
>> +            up_write(&topology_lock);
> 
> I'm not sure if dropping and re-taking the topology lock here could lead 
> to race conditions. But this could be avoided, if you moved the 
> responsibility for topology locking out of kfd_assign_gpu and into the 
> caller (kfd_topology_add_device).

Thanks for review.Will fix in next version.

Regards,
Ma Jun
> 
> Regards,
>    Felix
> 
> 
>> +
>> +            dev = kfd_assign_gpu(gpu);
>> +            if (WARN_ON(!dev)) {
>> +                    res = -ENODEV;
>> +                    goto err;
>> +            }
>> +
>> +            down_write(&topology_lock);
>> +
>> +            /* Fill the cache affinity information here for the GPUs
>> +             * using VCRAT
>> +             */
>> +            kfd_fill_cache_non_crat_info(dev, gpu);
>>   
>>              /* Update the SYSFS tree, since we added another topology
>>               * device
>>               */
>>              res = kfd_topology_update_sysfs();
>>              up_write(&topology_lock);
>> -
>>              if (!res)
>>                      sys_props.generation_count++;
>>              else
>>                      pr_err("Failed to update GPU (ID: 0x%x) to sysfs 
>> topology. res=%d\n",
>>                                              gpu_id, res);
>> -            dev = kfd_assign_gpu(gpu);
>> -            if (WARN_ON(!dev)) {
>> -                    res = -ENODEV;
>> -                    goto err;
>> -            }
>>      }
>>   
>>      dev->gpu_id = gpu_id;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> index dc4e239c8f8f..3e8ac87f0ac9 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>> @@ -87,6 +87,8 @@ struct kfd_mem_properties {
>>      struct attribute        attr_used;
>>   };
>>   
>> +#define CACHE_SIBLINGMAP_SIZE 64
>> +
>>   struct kfd_cache_properties {
>>      struct list_head        list;
>>      uint32_t                processor_id_low;
>> @@ -97,10 +99,11 @@ struct kfd_cache_properties {
>>      uint32_t                cache_assoc;
>>      uint32_t                cache_latency;
>>      uint32_t                cache_type;
>> -    uint8_t                 sibling_map[CRAT_SIBLINGMAP_SIZE];
>> +    uint8_t                 sibling_map[CACHE_SIBLINGMAP_SIZE];
>>      struct kfd_dev          *gpu;
>>      struct kobject          *kobj;
>>      struct attribute        attr;
>> +    uint32_t                sibling_map_size;
>>   };
>>   
>>   struct kfd_iolink_properties {

Reply via email to