The current code using NV90F1_CTRL_CMD_VASPACE_COPY_SERVER_RESERVED_PDES not only requires changes to support the new page table layout used on Hopper/Blackwell GPUs, but is also broken in that it always mirrors the PDEs used for virtual address 0, rather than the area reserved for RM.
This works fine for the non-NVK case where the kernel has full control of the VMM layout and things end up in the right place, but NVK puts its kernel reserved area much higher in the address space. Fixing the code to work at any VA is not enough as some parts of RM want the reserved area in a specific location, and NVK would then hit other assertions in RM instead. Fortunately, it appears that RM never needs to allocate anything within its reserved area for DRM clients, and the COPY_SERVER_RESERVED_PDES control call primarily serves to allow RM to locate the root page table when initialising a channel's instance block. Flag VMMs allocated by the DRM driver as externally owned, and use NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY to inform RM of the root page table in a similar way to NVIDIA's UVM driver. The COPY_SERVER_RESERVED_PDES paths are kept for the golden context image and gr scrubber channel, where RM needs the reserved area. Signed-off-by: Ben Skeggs <[email protected]> Reviewed-by: Dave Airlie <[email protected]> Reviewed-by: Timur Tabi <[email protected]> Tested-by: Timur Tabi <[email protected]> --- .../drm/nouveau/nvkm/subdev/gsp/rm/handles.h | 1 + .../drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c | 3 +- .../nvkm/subdev/gsp/rm/r535/nvrm/vmm.h | 35 +++++++++++++++++ .../drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c | 39 ++++++++++++++----- .../drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c | 2 +- .../gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h | 2 +- 6 files changed, 69 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h index 0308fb7b9647..8d070e68aa3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/handles.h @@ -11,6 +11,7 @@ #define NVKM_RM_DEVICE 0xde1d0000 #define NVKM_RM_SUBDEVICE 0x5d1d0000 #define NVKM_RM_DISP 0x00730000 +#define NVKM_RM_VASPACE_GOLDEN 0xa5000000 #define NVKM_RM_VASPACE(id) (0x90f10000 | (id)) #define NVKM_RM_CHAN(chid) (0xf1f00000 | (chid)) #define NVKM_RM_THREED 0x97000000 diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c index 0ea6279ef9e6..e9812eae97a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gr.c @@ -276,7 +276,6 @@ r535_gr_oneinit(struct nvkm_gr *base) struct nvkm_device *device = subdev->device; struct nvkm_gsp *gsp = device->gsp; struct nvkm_rm *rm = gsp->rm; - struct nvkm_mmu *mmu = device->mmu; struct { struct nvkm_memory *inst; struct nvkm_vmm *vmm; @@ -295,7 +294,7 @@ r535_gr_oneinit(struct nvkm_gr *base) if (ret) goto done; - ret = mmu->func->promote_vmm(golden.vmm); + ret = r535_mmu_vaspace_new(golden.vmm, NVKM_RM_VASPACE_GOLDEN, false); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h index f58edf62e4ae..cdbe55eacf41 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/nvrm/vmm.h @@ -23,6 +23,8 @@ typedef struct #define NV_VASPACE_ALLOCATION_INDEX_GPU_NEW 0x00 //<! Create new VASpace, by default +#define NV_VASPACE_ALLOCATION_FLAGS_IS_EXTERNALLY_OWNED BIT(3) + #define GMMU_FMT_MAX_LEVELS 6U #define NV90F1_CTRL_CMD_VASPACE_COPY_SERVER_RESERVED_PDES (0x90f10106U) /* finn: Evaluated from "(FINN_FERMI_VASPACE_A_VASPACE_INTERFACE_ID << 8) | NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS_MESSAGE_ID" */ @@ -87,4 +89,37 @@ typedef struct NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS { NvU8 pageShift; } levels[GMMU_FMT_MAX_LEVELS]; } NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS; + +#define NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY (0x801813U) /* finn: Evaluated from "(FINN_NV01_DEVICE_0_DMA_INTERFACE_ +ID << 8) | NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS_MESSAGE_ID" */ + +typedef struct NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS { + NV_DECLARE_ALIGNED(NvU64 physAddress, 8); + NvU32 numEntries; + NvU32 flags; + NvHandle hVASpace; + NvU32 chId; + NvU32 subDeviceId; // ID+1, 0 for BC + NvU32 pasid; +} NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS; + +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE 1:0 +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_VIDMEM (0x00000000U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_SYSMEM_COH (0x00000001U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_APERTURE_SYSMEM_NONCOH (0x00000002U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_PRESERVE_PDES 2:2 +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_PRESERVE_PDES_FALSE (0x00000000U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_PRESERVE_PDES_TRUE (0x00000001U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_ALL_CHANNELS 3:3 +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_ALL_CHANNELS_FALSE (0x00000000U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_ALL_CHANNELS_TRUE (0x00000001U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_IGNORE_CHANNEL_BUSY 4:4 +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_IGNORE_CHANNEL_BUSY_FALSE (0x00000000U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_IGNORE_CHANNEL_BUSY_TRUE (0x00000001U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_EXTEND_VASPACE 5:5 +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_EXTEND_VASPACE_FALSE (0x00000000U) +#define NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS_EXTEND_VASPACE_TRUE (0x00000001U) + +#define SPLIT_VAS_SERVER_RM_MANAGED_VA_START 0x100000000ULL // 4GB +#define SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE 0x20000000ULL // 512MB #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c index 81ceafd30e41..0b1b41bf6230 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c @@ -21,10 +21,11 @@ */ #include <subdev/mmu/vmm.h> +#include <nvhw/drf.h> #include "nvrm/vmm.h" int -r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle) +r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external) { struct nvkm_mmu *mmu = vmm->mmu; struct nvkm_gsp *gsp = mmu->subdev.device->gsp; @@ -37,12 +38,14 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle) return PTR_ERR(args); args->index = NV_VASPACE_ALLOCATION_INDEX_GPU_NEW; + if (external) + args->flags = NV_VASPACE_ALLOCATION_FLAGS_IS_EXTERNALLY_OWNED; ret = nvkm_gsp_rm_alloc_wr(&vmm->rm.object, args); if (ret) return ret; - { + if (!external) { NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS *ctrl; mutex_lock(&vmm->mutex.vmm); @@ -52,6 +55,11 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle) if (ret) return ret; + /* Some parts of RM expect the server-reserved area to be in a specific location. */ + if (WARN_ON(vmm->rm.rsvd->addr != SPLIT_VAS_SERVER_RM_MANAGED_VA_START || + vmm->rm.rsvd->size != SPLIT_VAS_SERVER_RM_MANAGED_VA_SIZE)) + return -EINVAL; + ctrl = nvkm_gsp_rm_ctrl_get(&vmm->rm.object, NV90F1_CTRL_CMD_VASPACE_COPY_SERVER_RESERVED_PDES, sizeof(*ctrl)); @@ -70,14 +78,27 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle) ctrl->levels[1].size = 0x1000; ctrl->levels[1].aperture = 1; ctrl->levels[1].pageShift = 0x26; - if (vmm->pd->pde[0]->pde[0]) { - ctrl->levels[2].physAddress = vmm->pd->pde[0]->pde[0]->pt[0]->addr; - ctrl->levels[2].size = 0x1000; - ctrl->levels[2].aperture = 1; - ctrl->levels[2].pageShift = 0x1d; - } + ctrl->levels[2].physAddress = vmm->pd->pde[0]->pde[0]->pt[0]->addr; + ctrl->levels[2].size = 0x1000; + ctrl->levels[2].aperture = 1; + ctrl->levels[2].pageShift = 0x1d; ret = nvkm_gsp_rm_ctrl_wr(&vmm->rm.object, ctrl); + } else { + NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS *ctrl; + + ctrl = nvkm_gsp_rm_ctrl_get(&gsp->internal.device.object, + NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY, + sizeof(*ctrl)); + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl); + + ctrl->physAddress = vmm->pd->pt[0]->addr; + ctrl->numEntries = 1 << vmm->func->page[0].desc->bits; + ctrl->flags = NVDEF(NV0080_CTRL_DMA_SET_PAGE_DIRECTORY, FLAGS, APERTURE, VIDMEM); + ctrl->hVASpace = vmm->rm.object.handle; + + ret = nvkm_gsp_rm_ctrl_wr(&gsp->internal.device.object, ctrl); } return ret; @@ -93,7 +114,7 @@ r535_mmu_promote_vmm(struct nvkm_vmm *vmm) if (id < 0) return id; - ret = r535_mmu_vaspace_new(vmm, NVKM_RM_VASPACE(id)); + ret = r535_mmu_vaspace_new(vmm, NVKM_RM_VASPACE(id), true); if (ret) { ida_free(&mmu->rm.vmm_ids, id); return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c index a899da67c15e..742ef80188fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r570/gr.c @@ -119,7 +119,7 @@ r570_gr_scrubber_init(struct r535_gr *gr) if (ret) goto done; - ret = r535_mmu_vaspace_new(gr->scrubber.vmm, KGRAPHICS_SCRUBBER_HANDLE_VAS); + ret = r535_mmu_vaspace_new(gr->scrubber.vmm, KGRAPHICS_SCRUBBER_HANDLE_VAS, false); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h index 2865683fdfca..14e5bb5ad30e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/rm.h @@ -148,7 +148,7 @@ extern const struct nvkm_rm_api_alloc r535_alloc; extern const struct nvkm_rm_api_client r535_client; void r535_gsp_client_dtor(struct nvkm_gsp_client *); extern const struct nvkm_rm_api_device r535_device; -int r535_mmu_vaspace_new(struct nvkm_vmm *, u32 handle); +int r535_mmu_vaspace_new(struct nvkm_vmm *, u32 handle, bool external); extern const struct nvkm_rm_api_fbsr r535_fbsr; void r535_fbsr_resume(struct nvkm_gsp *); int r535_fbsr_memlist(struct nvkm_gsp_device *, u32 handle, enum nvkm_memory_target, -- 2.49.0
