On Tue, May 16, 2017 at 02:52:53PM -0700, Daniele Ceraolo Spurio wrote:
> We're currently deleting the GuC logs if the FW fails to load, but those
> are still useful to understand why the loading failed. Keeping the
> object around allows us to access them after driver load is completed.
> 
> v2: keep the object around instead of using kernel memory (chris)
>     don't store the logs in the gpu_error struct (Chris)
>     add a check on guc_log_level to avoid snapshotting empty logs
> 
> v3: use separate debugfs for error log (Chris)
> 
> v4: rebased
> 
> v5: clean up obj selection, move err_load inside guc_log, move err_load
>     cleanup, rename functions (Michal)
> 
> Cc: Chris Wilson <[email protected]>
> Cc: Oscar Mateo <[email protected]>
> Cc: Michal Wajdeczko <[email protected]>
> Signed-off-by: Daniele Ceraolo Spurio <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  | 38 
> +++++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_guc_log.c | 17 ++++++++++++++++
>  drivers/gpu/drm/i915/intel_uc.c      | 14 +++++++++++--
>  drivers/gpu/drm/i915/intel_uc.h      |  5 +++++
>  4 files changed, 58 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index bd9abef..740395c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2607,27 +2607,36 @@ static int i915_guc_stage_pool(struct seq_file *m, 
> void *data)
>  
>  static int i915_guc_log_dump(struct seq_file *m, void *data)
>  {
> -     struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -     struct drm_i915_gem_object *obj;
> -     int i = 0, pg;
> -
> -     if (!dev_priv->guc.log.vma)
> -             return 0;
> +     struct drm_info_node *node = m->private;
> +     struct drm_i915_private *dev_priv = node_to_i915(node);
> +     bool dump_load_err = !!node->info_ent->data;
> +     struct drm_i915_gem_object *obj = NULL;
> +     u32 *log;
> +     int i = 0;
>  
> -     obj = dev_priv->guc.log.vma->obj;
> -     for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
> -             u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
> +     if (dump_load_err)
> +             obj = dev_priv->guc.log.load_err;
> +     else if (dev_priv->guc.log.vma)
> +             obj = dev_priv->guc.log.vma->obj;
>  
> -             for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
> -                     seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> -                                *(log + i), *(log + i + 1),
> -                                *(log + i + 2), *(log + i + 3));
> +     if (!obj)
> +             return 0;
>  
> -             kunmap_atomic(log);
> +     log = i915_gem_object_pin_map(obj, I915_MAP_WC);
> +     if (IS_ERR(log)) {
> +             seq_puts(m, "Failed to pin object\n");

Hmm, quite unusual to report internal message. Maybe:

        DRM_DEBUG("Failed to pin object\n");
        seq_puts(m, "(log data unaccessible)\n");


> +             return PTR_ERR(log);
>       }
>  
> +     for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
> +             seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> +                        *(log + i), *(log + i + 1),
> +                        *(log + i + 2), *(log + i + 3));
> +
>       seq_putc(m, '\n');
>  
> +     i915_gem_object_unpin_map(obj);
> +
>       return 0;
>  }
>  
> @@ -4811,6 +4820,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, 
> struct file *file)
>       {"i915_guc_info", i915_guc_info, 0},
>       {"i915_guc_load_status", i915_guc_load_status_info, 0},
>       {"i915_guc_log_dump", i915_guc_log_dump, 0},
> +     {"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},

bikeshed: quite long name, maybe "i915_guc_load_log_dump" ?

>       {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
>       {"i915_huc_load_status", i915_huc_load_status_info, 0},
>       {"i915_frequency_info", i915_frequency_info, 0},
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c 
> b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..31e7bec 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private 
> *dev_priv)
>       guc_log_runtime_destroy(&dev_priv->guc);
>       mutex_unlock(&dev_priv->drm.struct_mutex);
>  }
> +
> +void intel_guc_log_capture_load_err(struct intel_guc *guc)
> +{
> +     if (!guc->log.vma || i915.guc_log_level < 0)
> +             return;
> +
> +     if (!guc->log.load_err)
> +             guc->log.load_err = i915_gem_object_get(guc->log.vma->obj);
> +
> +     return;
> +}
> +
> +void intel_guc_log_free_load_err(struct intel_guc *guc)
> +{
> +     if (guc->log.load_err)
> +             i915_gem_object_put(guc->log.load_err);
> +}

Based on the discussion from IM, it looks that it would be better to keep
guc->load_err object directly in struct intel_guc, and at the same time
move both capture/free functions to guc_uc.c as static:

        void guc_capture_load_err_log(struct intel_guc *guc)
        void guc_free_load_err_log(struct intel_guc *guc)


> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 07c5658..dfea7d0 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>  
>       guc_disable_communication(guc);
>       gen9_reset_guc_interrupts(dev_priv);
> +     intel_guc_log_free_load_err(guc);

Hmm, this will free the log from previous failed load, but I was under
impression that you want to keep it... Maybe log reset should be done
as explicit .write operation ?

>  
>       /* We need to notify the guc whenever we change the GGTT */
>       i915_ggtt_enable_guc(dev_priv);
> @@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>  
>       /* Did we succeded or run out of retries? */
>       if (ret)
> -             goto err_submission;
> +             goto err_log_capture;
>  
>       ret = guc_enable_communication(guc);
>       if (ret)
> -             goto err_submission;
> +             goto err_log_capture;
>  
>       intel_guc_auth_huc(dev_priv);
>       if (i915.enable_guc_submission) {
> @@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>  err_interrupts:
>       guc_disable_communication(guc);
>       gen9_disable_guc_interrupts(dev_priv);
> +err_log_capture:
> +     intel_guc_log_capture_load_err(guc);
>  err_submission:
>       if (i915.enable_guc_submission)
>               i915_guc_submission_fini(dev_priv);
> @@ -407,6 +410,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>  
>  void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>  {
> +     /*
> +      * The err load log exists if guc failed to load, so we need to check
> +      * for it before checking the module params as we might have updated
> +      * those to reflect the load failure.

Hmm, but it looks that for now we only touch enable_guc_loading in sanitize
functions. On load failure we just disable enable_guc_submission param (which
btw is a bad decission IMHO)

> +      */
> +     intel_guc_log_free_load_err(&dev_priv->guc);
> +
>       if (!i915.enable_guc_loading)
>               return;
>  
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 7618b71..05fa751 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -176,6 +176,9 @@ struct intel_guc_log {
>       u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
>       u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
>       u32 flush_count[GUC_MAX_LOG_BUFFER];
> +
> +     /* Log snapshot if GuC errors during load */
> +     struct drm_i915_gem_object *load_err;

As mentioned above, please move it back to guc struct, but keep
close to the fw/log members.


Thanks,
Michal

>  };
>  
>  struct intel_guc {
> @@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
>  int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
>  void i915_guc_log_register(struct drm_i915_private *dev_priv);
>  void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
> +void intel_guc_log_capture_load_err(struct intel_guc *guc);
> +void intel_guc_log_free_load_err(struct intel_guc *guc);
>  
>  static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>  {
> -- 
> 1.9.1
> 
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to