amdkfd: Disable SVM per GPU, not per process

Felix Kuehling Fri, 11 Jun 2021 15:00:47 -0700

Am 2021-06-11 um 1:55 p.m. schrieb philip yang:
>
>
> On 2021-06-10 7:29 p.m., Felix Kuehling wrote:
>> When some GPUs don't support SVM, don't disabe it for the entire process.
>> That would be inconsistent with the information the process got from the
>> topology, which indicates SVM support per GPU.
>>
>> Instead disable SVM support only for the unsupported GPUs. This is done
>> by checking any per-device attributes against the bitmap of supported
>> GPUs. Also use the supported GPU bitmap to initialize access bitmaps for
>> new SVM address ranges.
>>
>> Don't handle recoverable page faults from unsupported GPUs. (I don't
>> think there will be unsupported GPUs that can generate recoverable page
>> faults. But better safe than sorry.)
>>
>> Signed-off-by: Felix Kuehling <[email protected]>
>
> It's smart way to handle SVM support and non support GPUs mixed on one
> system.
>
> One suggestion inline. With or without the suggest change, this is
>
> Reviewed-by: Philip Yang <[email protected]>
>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     |  3 --
>>  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |  4 --
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h        |  2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c     |  1 -
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c         | 55 ++++++++++++--------
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h         |  7 +++
>>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c    |  6 +--
>>  7 files changed, 44 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 5788a4656fa1..67541c30327a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1797,9 +1797,6 @@ static int kfd_ioctl_svm(struct file *filep, struct 
>> kfd_process *p, void *data)
>>      struct kfd_ioctl_svm_args *args = data;
>>      int r = 0;
>>  
>> -    if (p->svm_disabled)
>> -            return -EPERM;
>> -
>>      pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
>>               args->start_addr, args->size, args->op, args->nattr);
>>  
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> index 91c50739b756..a9b329f0f862 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> @@ -405,10 +405,6 @@ int kfd_init_apertures(struct kfd_process *process)
>>                      case CHIP_POLARIS12:
>>                      case CHIP_VEGAM:
>>                              kfd_init_apertures_vi(pdd, id);
>> -                            /* VI GPUs cannot support SVM with only
>> -                             * 40 bits of virtual address space.
>> -                             */
>> -                            process->svm_disabled = true;
>>                              break;
>>                      case CHIP_VEGA10:
>>                      case CHIP_VEGA12:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 329684ee5d6e..6dc22fa1e555 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -743,6 +743,7 @@ struct svm_range_list {
>>      spinlock_t                      deferred_list_lock;
>>      atomic_t                        evicted_ranges;
>>      struct delayed_work             restore_work;
>> +    DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
>>  };
>>  
>>  /* Process data */
>> @@ -826,7 +827,6 @@ struct kfd_process {
>>  
>>      /* shared virtual memory registered by this process */
>>      struct svm_range_list svms;
>> -    bool svm_disabled;
>>  
>>      bool xnack_enabled;
>>  };
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index f1f40bba5c60..09b98a83f670 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -1260,7 +1260,6 @@ static struct kfd_process *create_process(const struct 
>> task_struct *thread)
>>      process->mm = thread->mm;
>>      process->lead_thread = thread->group_leader;
>>      process->n_pdds = 0;
>> -    process->svm_disabled = false;
>>      INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
>>      INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
>>      process->last_restore_timestamp = get_jiffies_64();
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 0f18bd0dc64e..420ca667be32 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -281,7 +281,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
>> uint64_t start,
>>  
>>      p = container_of(svms, struct kfd_process, svms);
>>      if (p->xnack_enabled)
>> -            bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE);
>> +            bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
>> +                        MAX_GPU_INSTANCE);
>>  
>>      svm_range_set_default_attributes(&prange->preferred_loc,
>>                                       &prange->prefetch_loc,
>> @@ -580,33 +581,25 @@ svm_range_check_attr(struct kfd_process *p,
>>      int gpuidx;
>>  
>>      for (i = 0; i < nattr; i++) {
>
> Because we are here, maybe use local variable to short the two lines
> kfd_process_gpuidx_from_gpuid into one line
>
> uint32_t val = attrs[i].value;
>
That's a good suggestion. I should also move the gpuidx declaration
inside the loop. I'll fix those two things before I submit.


Thanks,
  Felix


>> +            gpuidx = MAX_GPU_INSTANCE;
>> +
>>              switch (attrs[i].type) {
>>              case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
>>                      if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
>> -                        attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED 
>> &&
>> -                        kfd_process_gpuidx_from_gpuid(p,
>> -                                                      attrs[i].value) < 0) {
>> -                            pr_debug("no GPU 0x%x found\n", attrs[i].value);
>> -                            return -EINVAL;
>> -                    }
>> +                        attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED)
>> +                            gpuidx = kfd_process_gpuidx_from_gpuid(p,
>> +                                                           attrs[i].value);
>>                      break;
>>              case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
>> -                    if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
>> -                        kfd_process_gpuidx_from_gpuid(p,
>> -                                                      attrs[i].value) < 0) {
>> -                            pr_debug("no GPU 0x%x found\n", attrs[i].value);
>> -                            return -EINVAL;
>> -                    }
>> +                    if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM)
>> +                            gpuidx = kfd_process_gpuidx_from_gpuid(p,
>> +                                                           attrs[i].value);
>>                      break;
>>              case KFD_IOCTL_SVM_ATTR_ACCESS:
>>              case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>>              case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
>>                      gpuidx = kfd_process_gpuidx_from_gpuid(p,
>>                                                             attrs[i].value);
>> -                    if (gpuidx < 0) {
>> -                            pr_debug("no GPU 0x%x found\n", attrs[i].value);
>> -                            return -EINVAL;
>> -                    }
>>                      break;
>>              case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
>>                      break;
>> @@ -618,6 +611,15 @@ svm_range_check_attr(struct kfd_process *p,
>>                      pr_debug("unknown attr type 0x%x\n", attrs[i].type);
>>                      return -EINVAL;
>>              }
>> +
>> +            if (gpuidx < 0) {
>> +                    pr_debug("no GPU 0x%x found\n", attrs[i].value);
>> +                    return -EINVAL;
>> +            } else if (gpuidx < MAX_GPU_INSTANCE &&
>> +                       !test_bit(gpuidx, p->svms.bitmap_supported)) {
>> +                    pr_debug("GPU 0x%x not supported\n", attrs[i].value);
>> +                    return -EINVAL;
>> +            }
>>      }
>>  
>>      return 0;
>> @@ -1855,7 +1857,7 @@ static void svm_range_drain_retry_fault(struct 
>> svm_range_list *svms)
>>  
>>      p = container_of(svms, struct kfd_process, svms);
>>  
>> -    for (i = 0; i < p->n_pdds; i++) {
>> +    for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
>>              pdd = p->pdds[i];
>>              if (!pdd)
>>                      continue;
>> @@ -2325,6 +2327,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
>> unsigned int pasid,
>>      bool write_locked = false;
>>      int r = 0;
>>  
>> +    if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
>> +            pr_debug("device does not support SVM\n");
>> +            return -EFAULT;
>> +    }
>> +
>>      p = kfd_lookup_process_by_pasid(pasid);
>>      if (!p) {
>>              pr_debug("kfd process not founded pasid 0x%x\n", pasid);
>> @@ -2472,6 +2479,7 @@ void svm_range_list_fini(struct kfd_process *p)
>>  int svm_range_list_init(struct kfd_process *p)
>>  {
>>      struct svm_range_list *svms = &p->svms;
>> +    int i;
>>  
>>      svms->objects = RB_ROOT_CACHED;
>>      mutex_init(&svms->lock);
>> @@ -2482,6 +2490,10 @@ int svm_range_list_init(struct kfd_process *p)
>>      INIT_LIST_HEAD(&svms->deferred_range_list);
>>      spin_lock_init(&svms->deferred_list_lock);
>>  
>> +    for (i = 0; i < p->n_pdds; i++)
>> +            if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
>> +                    bitmap_set(svms->bitmap_supported, i, 1);
>> +
>>      return 0;
>>  }
>>  
>> @@ -2978,14 +2990,15 @@ svm_range_get_attr(struct kfd_process *p, uint64_t 
>> start, uint64_t size,
>>              svm_range_set_default_attributes(&location, &prefetch_loc,
>>                                               &granularity, &flags);
>>              if (p->xnack_enabled)
>> -                    bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
>> +                    bitmap_copy(bitmap_access, svms->bitmap_supported,
>> +                                MAX_GPU_INSTANCE);
>>              else
>>                      bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
>>              bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
>>              goto fill_values;
>>      }
>> -    bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
>> -    bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE);
>> +    bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE);
>> +    bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE);
>>  
>>      while (node) {
>>              struct interval_tree_node *next;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> index 573f984b81fe..0c0fc399395e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> @@ -175,6 +175,11 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t 
>> *dma_addr,
>>  void svm_range_free_dma_mappings(struct svm_range *prange);
>>  void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
>>  
>> +/* SVM API and HMM page migration work together, device memory type
>> + * is initialized to not 0 when page migration register device memory.
>> + */
>> +#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
>> +
>>  #else
>>  
>>  struct kfd_process;
>> @@ -201,6 +206,8 @@ static inline int svm_range_schedule_evict_svm_bo(
>>      return -EINVAL;
>>  }
>>  
>> +#define KFD_IS_SVM_API_SUPPORTED(dev) false
>> +
>>  #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
>>  
>>  #endif /* KFD_SVM_H_ */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> index f668b8cc2b57..ff4e296c1c58 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>> @@ -36,6 +36,7 @@
>>  #include "kfd_topology.h"
>>  #include "kfd_device_queue_manager.h"
>>  #include "kfd_iommu.h"
>> +#include "kfd_svm.h"
>>  #include "amdgpu_amdkfd.h"
>>  #include "amdgpu_ras.h"
>>  
>> @@ -1441,10 +1442,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>>              dev->node_props.capability |= (adev->ras_enabled != 0) ?
>>                      HSA_CAP_RASEVENTNOTIFY : 0;
>>  
>> -    /* SVM API and HMM page migration work together, device memory type
>> -     * is initialized to not 0 when page migration register device memory.
>> -     */
>> -    if (adev->kfd.dev->pgmap.type != 0)
>> +    if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
>>              dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
>>  
>>      kfd_debug_print_topology();
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/1] drm/amdkfd: Disable SVM per GPU, not per process

Reply via email to