Update the way drm_coredump_printer is used based on its documentation and Xe's code: the main idea is to generate the final version in one go and then use memcpy to return the chunks requested by the caller of amdgpu_devcoredump_read.
This cuts the time to copy the dump from 40s to ~0s on my machine. Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> --- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 43 +++++++++++++++++-- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 7 +++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 7b50741dc097..de70747a099d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, } #else +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) + const char *hw_ip_names[MAX_HWIP] = { [GC_HWIP] = "GC", [HDP_HWIP] = "HDP", @@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, } static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) +__amdgpu_devcoredump_read(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) { struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; struct amdgpu_ip_block *ip_block; @@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, iter.data = buffer; iter.offset = 0; - iter.start = offset; iter.remain = count; p = drm_coredump_printer(&iter); @@ -321,8 +320,44 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, return count - iter.remain; } +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct amdgpu_coredump_info *coredump = data; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + if (!coredump->read.buffer) { + /* Do a one-time preparation of the coredump output because + * repeatingly calling drm_coredump_printer is very slow. + */ + coredump->read.size = + __amdgpu_devcoredump_read(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump); + coredump->read.buffer = kvmalloc(coredump->read.size, GFP_USER); + if (!coredump->read.buffer) + return -ENODEV; + + __amdgpu_devcoredump_read(coredump->read.buffer, coredump->read.size, coredump); + } + + if (offset >= coredump->read.size) + return 0; + + byte_copied = count < coredump->read.size - offset ? count : + coredump->read.size - offset; + memcpy(buffer, coredump->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void amdgpu_devcoredump_free(void *data) { + struct amdgpu_coredump_info *coredump = data; + + kvfree(coredump->read.buffer); kfree(data); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index ef9772c6bcc9..33f2f6fdfcf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -38,6 +38,13 @@ struct amdgpu_coredump_info { bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; + /* Readable form of coredevdump, generate once to speed up + * reading it (see drm_coredump_printer's documentation). + */ + struct { + ssize_t size; + char *buffer; + } read; }; #endif -- 2.43.0
