On Wed, Feb 11, 2026 at 5:28 AM Pierre-Eric Pelloux-Prayer <[email protected]> wrote: > > Update the way drm_coredump_printer is used based on its documentation > and Xe's code: the main idea is to generate the final version in one go > and then use memcpy to return the chunks requested by the caller of > amdgpu_devcoredump_read. > > The generation is moved to a separate worker thread. > > This cuts the time to copy the dump from 40s to ~0s on my machine. > > Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Acked-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++ > .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 66 +++++++++++++++++-- > .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 9 +++ > 3 files changed, 74 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 057c8bd2ad89..ae81a428cfb5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1200,6 +1200,11 @@ struct amdgpu_device { > > struct amdgpu_reset_domain *reset_domain; > > +#ifdef CONFIG_DEV_COREDUMP > + /* If a coredump state capture is in progress don't start a new one. > */ > + bool coredump_in_progress; > +#endif > + > struct mutex benchmark_mutex; > > bool scpm_enabled; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > index 42a969512dcc..0808ca98ccd9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c > @@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > } > #else > > +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) > + > const char *hw_ip_names[MAX_HWIP] = { > [GC_HWIP] = "GC", > [HDP_HWIP] = "HDP", > @@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct > amdgpu_device *adev, > } > > static ssize_t > -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, > - void *data, size_t datalen) > +amdgpu_devcoredump_format(char *buffer, size_t count, struct > amdgpu_coredump_info *coredump) > { > struct drm_printer p; > - struct amdgpu_coredump_info *coredump = data; > struct drm_print_iterator iter; > struct amdgpu_vm_fault_info *fault_info; > struct amdgpu_ip_block *ip_block; > @@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, > size_t count, > > iter.data = buffer; > iter.offset = 0; > - iter.start = offset; > iter.remain = count; > > p = drm_coredump_printer(&iter); > @@ -323,11 +322,60 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, > size_t count, > return count - iter.remain; > } > > +static ssize_t > +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, > + void *data, size_t datalen) > +{ > + struct amdgpu_coredump_info *coredump = data; > + ssize_t byte_copied; > + > + if (!coredump) > + return -ENODEV; > + > + flush_work(&coredump->work); > + > + if (!coredump->formatted) > + return -ENODEV; > + > + if (offset >= coredump->formatted_size) > + return 0; > + > + byte_copied = count < coredump->formatted_size - offset ? count : > + coredump->formatted_size - offset; > + memcpy(buffer, coredump->formatted + offset, byte_copied); > + > + return byte_copied; > +} > + > static void amdgpu_devcoredump_free(void *data) > { > + struct amdgpu_coredump_info *coredump = data; > + > + cancel_work_sync(&coredump->work); > + coredump->adev->coredump_in_progress = false; > + kfree(coredump->formatted); > kfree(data); > } > > +static void amdgpu_devcoredump_deferred_work(struct work_struct *work) > +{ > + struct amdgpu_coredump_info *coredump = container_of(work, > typeof(*coredump), work); > + > + dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, > GFP_NOWAIT, > + amdgpu_devcoredump_read, amdgpu_devcoredump_free); > + > + /* Do a one-time preparation of the coredump output because > + * repeatingly calling drm_coredump_printer is very slow. > + */ > + coredump->formatted_size = > + amdgpu_devcoredump_format(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, > coredump); > + coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL); > + if (!coredump->formatted) > + return; > + amdgpu_devcoredump_format(coredump->formatted, > coredump->formatted_size, coredump); > + coredump->adev->coredump_in_progress = false; > +} > + > void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, > bool vram_lost, struct amdgpu_job *job) > { > @@ -335,10 +383,15 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > struct amdgpu_coredump_info *coredump; > struct drm_sched_job *s_job; > > + if (adev->coredump_in_progress) > + return; > + > coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); > if (!coredump) > return; > > + adev->coredump_in_progress = true; > + > coredump->skip_vram_check = skip_vram_check; > coredump->reset_vram_lost = vram_lost; > > @@ -361,8 +414,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool > skip_vram_check, > > ktime_get_ts64(&coredump->reset_time); > > - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, > - amdgpu_devcoredump_read, amdgpu_devcoredump_free); > + /* Kick off coredump formatting to a worker thread. */ > + INIT_WORK(&coredump->work, amdgpu_devcoredump_deferred_work); > + queue_work(system_unbound_wq, &coredump->work); > > drm_info(dev, "AMDGPU device coredump file has been created\n"); > drm_info(dev, "Check your > /sys/class/drm/card%d/device/devcoredump/data\n", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > index ef9772c6bcc9..4c37a852b74a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h > @@ -35,9 +35,18 @@ struct amdgpu_coredump_info { > struct amdgpu_device *adev; > struct amdgpu_task_info reset_task_info; > struct timespec64 reset_time; > + > + struct work_struct work; > + > bool skip_vram_check; > bool reset_vram_lost; > struct amdgpu_ring *ring; > + /* Readable form of coredevdump, generate once to speed up > + * reading it (see drm_coredump_printer's documentation). > + */ > + ssize_t formatted_size; > + char *formatted; > + > }; > #endif > > -- > 2.43.0 >
