[AMD Official Use Only - AMD Internal Distribution Only] Reviewed-by: Tao Zhou <[email protected]>
> -----Original Message----- > From: Chai, Thomas <[email protected]> > Sent: Friday, July 18, 2025 11:26 AM > To: [email protected] > Cc: Chai, Thomas <[email protected]>; Zhang, Hawking > <[email protected]>; Zhou1, Tao <[email protected]>; Chai, Thomas > <[email protected]> > Subject: [PATCH V2] drm/amdgpu: add command to check address validity > > Add command to check address validity and remove unused command codes. > > v2: > The command interface adds new parameters to support multiple check address > strategies. > > Signed-off-by: YiPeng Chai <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 63 +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++ > 2 files changed, 66 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 15bde4904996..185b9e538f98 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -128,6 +128,9 @@ const char *get_ras_block_str(struct ras_common_if > *ras_block) > > #define MAX_FLUSH_RETIRE_DWORK_TIMES 100 > > +#define BYPASS_ALLOCATED_ADDRESS 0x0 > +#define BYPASS_INITIALIZATION_ADDRESS 0x1 > + > enum amdgpu_ras_retire_page_reservation { > AMDGPU_RAS_RETIRE_PAGE_RESERVED, > AMDGPU_RAS_RETIRE_PAGE_PENDING, > @@ -207,6 +210,49 @@ static int amdgpu_reserve_page_direct(struct > amdgpu_device *adev, uint64_t addre > return 0; > } > > +static int amdgpu_check_address_validity(struct amdgpu_device *adev, > + uint64_t address, uint64_t flags) > +{ > + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > + struct amdgpu_vram_block_info blk_info; > + uint64_t page_pfns[32] = {0}; > + int i, ret, count; > + > + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) > + return 0; > + > + if ((address >= adev->gmc.mc_vram_size) || > + (address >= RAS_UMC_INJECT_ADDR_LIMIT)) > + return -EFAULT; > + > + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, > + address, page_pfns, ARRAY_SIZE(page_pfns)); > + if (count <= 0) > + return -EPERM; > + > + for (i = 0; i < count; i++) { > + memset(&blk_info, 0, sizeof(blk_info)); > + ret = amdgpu_vram_mgr_query_address_block_info(&adev- > >mman.vram_mgr, > + page_pfns[i] << > AMDGPU_GPU_PAGE_SHIFT, &blk_info); > + if (!ret) { > + /* The input address that needs to be checked is > allocated by > + * current calling process, so it is necessary to > exclude > + * the calling process. > + */ > + if ((flags == BYPASS_ALLOCATED_ADDRESS) && > + ((blk_info.task.pid != task_pid_nr(current)) || > + strncmp(blk_info.task.comm, current->comm, > TASK_COMM_LEN))) > + return -EACCES; > + else if ((flags == BYPASS_INITIALIZATION_ADDRESS) && > + (blk_info.task.pid == con->init_task_pid) && > + !strncmp(blk_info.task.comm, > con->init_task_comm, > TASK_COMM_LEN)) > + return -EACCES; > + } > + } > + > + return 0; > +} > + > static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, > size_t size, loff_t *pos) > { > @@ -297,6 +343,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file > *f, > op = 2; > else if (strstr(str, "retire_page") != NULL) > op = 3; > + else if (strstr(str, "check_address") != NULL) > + op = 4; > else if (str[0] && str[1] && str[2] && str[3]) > /* ascii string, but commands are not matched. */ > return -EINVAL; > @@ -310,6 +358,15 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file > *f, > data->op = op; > data->inject.address = address; > > + return 0; > + } else if (op == 4) { > + if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) > != 2 && > + sscanf(str, "%*s %llu %llu", &address, &value) != 2) > + return -EINVAL; > + > + data->op = op; > + data->inject.address = address; > + data->inject.value = value; > return 0; > } > > @@ -500,6 +557,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file > *f, > return size; > else > return ret; > + } else if (data.op == 4) { > + ret = amdgpu_check_address_validity(adev, data.inject.address, > data.inject.value); > + return ret ? ret : size; > } > > if (!amdgpu_ras_is_supported(adev, data.head.block)) @@ -4103,6 +4163,9 > @@ int amdgpu_ras_init(struct amdgpu_device *adev) > goto release_con; > } > > + con->init_task_pid = task_pid_nr(current); > + get_task_comm(con->init_task_comm, current); > + > dev_info(adev->dev, "RAS INFO: ras initialized successfully, " > "hardware ability[%x] ras_mask[%x]\n", > adev->ras_hw_enabled, adev->ras_enabled); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 927d6bff734a..7f10a7402160 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -570,6 +570,9 @@ struct amdgpu_ras { > struct ras_event_manager *event_mgr; > > uint64_t reserved_pages_in_bytes; > + > + pid_t init_task_pid; > + char init_task_comm[TASK_COMM_LEN]; > }; > > struct ras_fs_data { > -- > 2.34.1
