1 hole with u8, 0 holes with u32. Is good? On Wed, Aug 25, 2021 at 7:16 AM Das, Nirmoy <[email protected]> wrote:
> > On 8/25/2021 1:09 PM, Christian König wrote: > > I suggest to give this command here at least a try (remembered the name > after a moment): > > pahole drivers/gpu/drm/amd/amdgpu/amdgpu.o -C amdgpu_debugfs_regs2_iocdata > > It has a rather nifty output with padding holes, byte addresses, cache > lines etc for your structure. > > Christian. > > Am 25.08.21 um 13:04 schrieb Tom St Denis: > > I tested it by forcing bit patterns into the ioctl data and printing it > out in the kernel log. I'm not siloed into it one way or the other. I'll > just change it to u32. > > On Wed, Aug 25, 2021 at 7:03 AM Christian König < > [email protected]> wrote: > >> Using u8 is ok as well, just make sure that you don't have any hidden >> padding. >> >> Nirmoy had a tool to double check for paddings which I once more forgot >> the name of. >> > > Yes, pahole. pkg name: dwarves > > > Nirmoy > > > >> Christian. >> >> Am 25.08.21 um 12:40 schrieb Tom St Denis: >> >> The struct works as is but I'll change them to u32. The offset is an >> artefact of the fact this was an IOCTL originally. I'm working both ends >> in parallel trying to make the changes at the same time because I'm only >> submitting the kernel patch if I've tested it in userspace. >> >> I'll send a v4 in a bit this morning.... >> >> Tom >> >> On Wed, Aug 25, 2021 at 2:35 AM Christian König < >> [email protected]> wrote: >> >>> >>> >>> Am 24.08.21 um 15:36 schrieb Tom St Denis: >>> > This new debugfs interface uses an IOCTL interface in order to pass >>> > along state information like SRBM and GRBM bank switching. This >>> > new interface also allows a full 32-bit MMIO address range which >>> > the previous didn't. With this new design we have room to grow >>> > the flexibility of the file as need be. >>> > >>> > (v2): Move read/write to .read/.write, fix style, add comment >>> > for IOCTL data structure >>> > >>> > Signed-off-by: Tom St Denis <[email protected]> >>> > --- >>> > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 162 >>> ++++++++++++++++++++ >>> > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ >>> > 2 files changed, 194 insertions(+) >>> > >>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> > index 277128846dd1..8e8f5743c8f5 100644 >>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> > @@ -279,6 +279,156 @@ static ssize_t amdgpu_debugfs_regs_write(struct >>> file *f, const char __user *buf, >>> > return amdgpu_debugfs_process_reg_op(false, f, (char __user >>> *)buf, size, pos); >>> > } >>> > >>> > +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file >>> *file) >>> > +{ >>> > + struct amdgpu_debugfs_regs2_data *rd; >>> > + >>> > + rd = kzalloc(sizeof *rd, GFP_KERNEL); >>> > + if (!rd) >>> > + return -ENOMEM; >>> > + rd->adev = file_inode(file)->i_private; >>> > + file->private_data = rd; >>> > + >>> > + return 0; >>> > +} >>> > + >>> > +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct >>> file *file) >>> > +{ >>> > + kfree(file->private_data); >>> > + return 0; >>> > +} >>> > + >>> > +static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user >>> *buf, size_t size, int write_en) >>> > +{ >>> > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> > + struct amdgpu_device *adev = rd->adev; >>> > + ssize_t result = 0; >>> > + int r; >>> > + uint32_t value; >>> > + >>> > + if (size & 0x3 || rd->state.offset & 0x3) >>> > + return -EINVAL; >>> > + >>> > + if (rd->state.id.use_grbm) { >>> > + if (rd->state.id.grbm.se >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.se%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456597676%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=S7Fz7mMEeVYFqLF0paV%2BDr53Ty2e87lVIWFTvfZTJ%2Bs%3D&reserved=0> >>> == 0x3FF) >>> > + rd->state.id.grbm.se >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.se%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456597676%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=S7Fz7mMEeVYFqLF0paV%2BDr53Ty2e87lVIWFTvfZTJ%2Bs%3D&reserved=0> >>> = 0xFFFFFFFF; >>> > + if (rd->state.id.grbm.sh >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.sh%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456607632%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=iHxDEyZ7lz%2FGOdgSFHGbbAT2NPcQCQR0g58ISQMHGhY%3D&reserved=0> >>> == 0x3FF) >>> > + rd->state.id.grbm.sh >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.sh%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456607632%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=iHxDEyZ7lz%2FGOdgSFHGbbAT2NPcQCQR0g58ISQMHGhY%3D&reserved=0> >>> = 0xFFFFFFFF; >>> > + if (rd->state.id.grbm.instance == 0x3FF) >>> > + rd->state.id.grbm.instance = 0xFFFFFFFF; >>> > + } >>> > + >>> > + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); >>> > + if (r < 0) { >>> > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> > + return r; >>> > + } >>> > + >>> > + r = amdgpu_virt_enable_access_debugfs(adev); >>> > + if (r < 0) { >>> > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> > + return r; >>> > + } >>> > + >>> > + if (rd->state.id.use_grbm) { >>> > + if ((rd->state.id.grbm.sh >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.sh%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456607632%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=iHxDEyZ7lz%2FGOdgSFHGbbAT2NPcQCQR0g58ISQMHGhY%3D&reserved=0> >>> != 0xFFFFFFFF && rd->state.id.grbm.sh >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.sh%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456617591%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=jhS3kKxQAAn18tfFNC7pYnZKEKps4zVXdsX%2FIFLH7a4%3D&reserved=0> >>> >= adev->gfx.config.max_sh_per_se) || >>> > + (rd->state.id.grbm.se >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.se%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456617591%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=J1WzngpR3KOnPsstL3mpbhltQIp0idV6B22%2BZqZpXoQ%3D&reserved=0> >>> != 0xFFFFFFFF && rd->state.id.grbm.se >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.se%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456617591%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=J1WzngpR3KOnPsstL3mpbhltQIp0idV6B22%2BZqZpXoQ%3D&reserved=0> >>> >= adev->gfx.config.max_shader_engines)) { >>> > + >>> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >>> > + >>> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> > + amdgpu_virt_disable_access_debugfs(adev); >>> > + return -EINVAL; >>> > + } >>> > + mutex_lock(&adev->grbm_idx_mutex); >>> > + amdgpu_gfx_select_se_sh(adev, rd->state.id.grbm.se >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.se%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456627544%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=mElFqeL4bHf%2FcNlWTVyoxT6VhHnoLKVOCwCpsWiqLkM%3D&reserved=0> >>> , >>> > + rd-> >>> state.id.grbm.sh >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.grbm.sh%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456627544%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=R2jlWr%2BtB%2FKypqHKT36JV%2Fnk2CvO2oLYIspKmfbco58%3D&reserved=0> >>> , >>> > + >>> rd->state.id.grbm.instance); >>> > + } >>> > + >>> > + if (rd->state.id.use_srbm) { >>> > + mutex_lock(&adev->srbm_mutex); >>> > + amdgpu_gfx_select_me_pipe_q(adev, rd->state.id.srbm.me >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id.srbm.me%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456637501%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=v8e4VpGjaYi1LcWVBxTYx7f4v%2BUW9%2F6FTpYvGqygJc0%3D&reserved=0>, >>> rd->state.id.srbm.pipe, >>> > + >>> rd->state.id.srbm.queue, rd->state.id.srbm.vmid); >>> > + } >>> > + >>> > + if (rd->state.id.pg_lock) >>> > + mutex_lock(&adev->pm.mutex); >>> > + >>> > + while (size) { >>> > + if (!write_en) { >>> > + value = RREG32(rd->state.offset >> 2); >>> > + r = put_user(value, (uint32_t *)buf); >>> > + } else { >>> > + r = get_user(value, (uint32_t *)buf); >>> > + if (!r) >>> > + amdgpu_mm_wreg_mmio_rlc(adev, >>> rd->state.offset >> 2, value); >>> > + } >>> > + if (r) { >>> > + result = r; >>> > + goto end; >>> > + } >>> > + rd->state.offset += 4; >>> > + size -= 4; >>> > + result += 4; >>> > + buf += 4; >>> > + } >>> > +end: >>> > + if (rd->state.id.use_grbm) { >>> > + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, >>> 0xffffffff); >>> > + mutex_unlock(&adev->grbm_idx_mutex); >>> > + } >>> > + >>> > + if (rd->state.id.use_srbm) { >>> > + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); >>> > + mutex_unlock(&adev->srbm_mutex); >>> > + } >>> > + >>> > + if (rd->state.id.pg_lock) >>> > + mutex_unlock(&adev->pm.mutex); >>> > + >>> > + // in umr (the likely user of this) flags are set per file >>> operation >>> > + // which means they're never "unset" explicitly. To avoid >>> breaking >>> > + // this convention we unset the flags after each operation >>> > + // flags are for a single call (need to be set for every >>> read/write) >>> > + rd->state.id.use_grbm = 0; >>> > + rd->state.id.use_srbm = 0; >>> > + rd->state.id.pg_lock = 0; >>> > + >>> > + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >>> > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> > + >>> > + amdgpu_virt_disable_access_debugfs(adev); >>> > + return result; >>> > +} >>> > + >>> > +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int >>> cmd, unsigned long data) >>> > +{ >>> > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> > + >>> > + switch (cmd) { >>> > + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: >>> > + if (copy_from_user(&rd->state.id >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456637501%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=owOCPq7DugPj8bKIZW%2BPL9O1HXIViFHCq8ZqSkIT4X4%3D&reserved=0>, >>> (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->state.id >>> <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fstate.id%2F&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf02eba2f047347488d5508d967b8bf96%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637654865456637501%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=owOCPq7DugPj8bKIZW%2BPL9O1HXIViFHCq8ZqSkIT4X4%3D&reserved=0> >>> )) >>> > + return -EINVAL; >>> > + break; >>> > + default: >>> > + return -EINVAL; >>> > + } >>> > + return 0; >>> > +} >>> > + >>> > +static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user >>> *buf, size_t size, loff_t *pos) >>> > +{ >>> > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> > + rd->state.offset = *pos; >>> > + return amdgpu_debugfs_regs2_op(f, buf, size, 0); >>> > +} >>> > + >>> > +static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char >>> __user *buf, size_t size, loff_t *pos) >>> > +{ >>> > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> > + rd->state.offset = *pos; >>> > + return amdgpu_debugfs_regs2_op(f, (char __user *)buf, size, 1); >>> > +} >>> > + >>> > >>> > /** >>> > * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register >>> > @@ -1091,6 +1241,16 @@ static ssize_t >>> amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, >>> > return result; >>> > } >>> > >>> > +static const struct file_operations amdgpu_debugfs_regs2_fops = { >>> > + .owner = THIS_MODULE, >>> > + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, >>> > + .read = amdgpu_debugfs_regs2_read, >>> > + .write = amdgpu_debugfs_regs2_write, >>> > + .open = amdgpu_debugfs_regs2_open, >>> > + .release = amdgpu_debugfs_regs2_release, >>> > + .llseek = default_llseek >>> > +}; >>> > + >>> > static const struct file_operations amdgpu_debugfs_regs_fops = { >>> > .owner = THIS_MODULE, >>> > .read = amdgpu_debugfs_regs_read, >>> > @@ -1148,6 +1308,7 @@ static const struct file_operations >>> amdgpu_debugfs_gfxoff_fops = { >>> > >>> > static const struct file_operations *debugfs_regs[] = { >>> > &amdgpu_debugfs_regs_fops, >>> > + &amdgpu_debugfs_regs2_fops, >>> > &amdgpu_debugfs_regs_didt_fops, >>> > &amdgpu_debugfs_regs_pcie_fops, >>> > &amdgpu_debugfs_regs_smc_fops, >>> > @@ -1160,6 +1321,7 @@ static const struct file_operations >>> *debugfs_regs[] = { >>> > >>> > static const char *debugfs_regs_names[] = { >>> > "amdgpu_regs", >>> > + "amdgpu_regs2", >>> > "amdgpu_regs_didt", >>> > "amdgpu_regs_pcie", >>> > "amdgpu_regs_smc", >>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> > index 141a8474e24f..ec044df5d428 100644 >>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> > @@ -22,6 +22,8 @@ >>> > * OTHER DEALINGS IN THE SOFTWARE. >>> > * >>> > */ >>> > +#include <linux/ioctl.h> >>> > +#include <uapi/drm/amdgpu_drm.h> >>> > >>> > /* >>> > * Debugfs >>> > @@ -38,3 +40,33 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device >>> *adev); >>> > void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); >>> > void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); >>> > int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); >>> > + >>> > +/* >>> > + * MMIO debugfs IOCTL structure >>> > + */ >>> > +struct amdgpu_debugfs_regs2_iocdata { >>> > + __u8 use_srbm, use_grbm, pg_lock; >>> >>> You should consider using u32 here as well or add explicitly padding. >>> >>> > + struct { >>> > + __u32 se, sh, instance; >>> > + } grbm; >>> > + struct { >>> > + __u32 me, pipe, queue, vmid; >>> > + } srbm; >>> > +}; >>> > + >>> > +/* >>> > + * MMIO debugfs state data (per file* handle) >>> > + */ >>> > +struct amdgpu_debugfs_regs2_data { >>> > + struct amdgpu_device *adev; >>> > + struct { >>> > + struct amdgpu_debugfs_regs2_iocdata id; >>> > + __u32 offset; >>> >>> What is the offset good for here? >>> >>> Regards, >>> Christian. >>> >>> > + } state; >>> > +}; >>> > + >>> > +enum AMDGPU_DEBUGFS_REGS2_CMDS { >>> > + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, >>> > +}; >>> > + >>> > +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, >>> AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) >>> >>> >> >
