Update the way drm_coredump_printer is used based on its documentation
and Xe's code: the main idea is to generate the final version in one go
and then use memcpy to return the chunks requested by the caller of
amdgpu_devcoredump_read.

This cuts the time to copy the dump from 40s to ~0s on my machine.

Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
---
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c  | 43 +++++++++++++++++--
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h  |  7 +++
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 7b50741dc097..de70747a099d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
 }
 #else
 
+#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
+
 const char *hw_ip_names[MAX_HWIP] = {
        [GC_HWIP]               = "GC",
        [HDP_HWIP]              = "HDP",
@@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct 
amdgpu_device *adev,
 }
 
 static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
-                       void *data, size_t datalen)
+__amdgpu_devcoredump_read(char *buffer, size_t count, struct 
amdgpu_coredump_info *coredump)
 {
        struct drm_printer p;
-       struct amdgpu_coredump_info *coredump = data;
        struct drm_print_iterator iter;
        struct amdgpu_vm_fault_info *fault_info;
        struct amdgpu_ip_block *ip_block;
@@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t 
count,
 
        iter.data = buffer;
        iter.offset = 0;
-       iter.start = offset;
        iter.remain = count;
 
        p = drm_coredump_printer(&iter);
@@ -321,8 +320,44 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
        return count - iter.remain;
 }
 
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+                       void *data, size_t datalen)
+{
+       struct amdgpu_coredump_info *coredump = data;
+       ssize_t byte_copied;
+
+       if (!coredump)
+               return -ENODEV;
+
+       if (!coredump->read.buffer) {
+               /* Do a one-time preparation of the coredump output because
+                * repeatingly calling drm_coredump_printer is very slow.
+                */
+               coredump->read.size =
+                       __amdgpu_devcoredump_read(NULL, 
AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
+               coredump->read.buffer = kvmalloc(coredump->read.size, GFP_USER);
+               if (!coredump->read.buffer)
+                       return -ENODEV;
+
+               __amdgpu_devcoredump_read(coredump->read.buffer, 
coredump->read.size, coredump);
+       }
+
+       if (offset >= coredump->read.size)
+               return 0;
+
+       byte_copied = count < coredump->read.size - offset ? count :
+               coredump->read.size - offset;
+       memcpy(buffer, coredump->read.buffer + offset, byte_copied);
+
+       return byte_copied;
+}
+
 static void amdgpu_devcoredump_free(void *data)
 {
+       struct amdgpu_coredump_info *coredump = data;
+
+       kvfree(coredump->read.buffer);
        kfree(data);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
index ef9772c6bcc9..33f2f6fdfcf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -38,6 +38,13 @@ struct amdgpu_coredump_info {
        bool                            skip_vram_check;
        bool                            reset_vram_lost;
        struct amdgpu_ring              *ring;
+       /* Readable form of coredevdump, generate once to speed up
+        * reading it (see drm_coredump_printer's documentation).
+        */
+       struct {
+               ssize_t size;
+               char *buffer;
+       } read;
 };
 #endif
 
-- 
2.43.0

Reply via email to