On Mon, May 14, 2018 at 10:19:27AM +0100, Brian Starkey wrote:
> Hi Alex,
> 
> On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote:
> >Status register contains a lot of bits for reporting internal errors
> >inside Mali DP. Currently, we just silently ignore all of the erorrs,
> 
> Being picky: s/erorrs/errors/
> 
> >that doesn't help when we are investigating different bugs, especially
> >on the FPGA models which have a lot of constrains, so we could easily
> 
> s/constrains/constraints/
> 
> >end up in AXI or underrun errors.
> >
> >Add a new file called debug that contains an agregate of the
> 
> s/agregate/aggregate/
> 
> >errors reported by the Mali DP hardware.
> >
> >E.g:
> >[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
> >[DE] num_errors : 167
> >[DE] last_error_status  : 0x00000001
> >[DE] last_error_vblank : 385
> >[SE] num_errors : 3
> >[SE] last_error_status  : 0x00e23001
> >[SE] last_error_vblank : 201
> >
> >This a morphosis of the patch presented here [1], where the errors
> >where reported continuously via trace_printk.
> >
> >[1] 
> >https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html
> >
> >Signed-off-by: Alexandru Gheorghe <[email protected]>
> >---
> >drivers/gpu/drm/arm/malidp_drv.c  | 61 
> >+++++++++++++++++++++++++++++++++++++++
> >drivers/gpu/drm/arm/malidp_drv.h  | 15 ++++++++++
> >drivers/gpu/drm/arm/malidp_hw.c   | 46 ++++++++++++++++++++++++-----
> >drivers/gpu/drm/arm/malidp_hw.h   |  1 +
> >drivers/gpu/drm/arm/malidp_regs.h |  6 ++++
> >5 files changed, 122 insertions(+), 7 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/arm/malidp_drv.c 
> >b/drivers/gpu/drm/arm/malidp_drv.c
> >index 8d20faa..70ce19a 100644
> >--- a/drivers/gpu/drm/arm/malidp_drv.c
> >+++ b/drivers/gpu/drm/arm/malidp_drv.c
> >@@ -17,6 +17,7 @@
> >#include <linux/of_graph.h>
> >#include <linux/of_reserved_mem.h>
> >#include <linux/pm_runtime.h>
> >+#include <linux/debugfs.h>
> >
> >#include <drm/drmP.h>
> >#include <drm/drm_atomic.h>
> >@@ -29,6 +30,7 @@
> >#include <drm/drm_gem_framebuffer_helper.h>
> >#include <drm/drm_modeset_helper.h>
> >#include <drm/drm_of.h>
> >+#include <drm/drm_debugfs.h>
> >
> >#include "malidp_drv.h"
> >#include "malidp_regs.h"
> >@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file 
> >*file_priv,
> >     return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
> >}
> >
> >+#ifdef CONFIG_DEBUG_FS
> >+
> >+static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
> >+{
> >+    atomic_set(&error_stats->num_errors, 0);
> >+    atomic_set(&error_stats->last_error_status, 0);
> >+    atomic64_set(&error_stats->last_error_vblank, -1);
> >+}
> >+
> >+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
> >+              u64 vblank)
> >+{
> >+    atomic_set(&error_stats->last_error_status, status);
> >+    atomic64_set(&error_stats->last_error_vblank, vblank);
> >+    atomic_inc(&error_stats->num_errors);
> >+}
> >+
> 
> Do we already have a lock we could use to make sure the status printed
> is consistent? I know this is "only debug", but it could be annoying
> if we can't trust that the last_error_status does actually match the
> last_error_vblank.
>

No, we don't have any lock. Yes that would be annoying, I will add
one.
 
> >+void malidp_error_stats_dump(const char *prefix,
> >+                         struct malidp_error_stats *error_stats,
> >+                         struct seq_file *m)
> >+{
> >+    seq_printf(m, "[%s] num_errors : %d\n", prefix,
> >+               atomic_read(&error_stats->num_errors));
> >+    seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
> >+               atomic_read(&error_stats->last_error_status));
> >+    seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
> >+               atomic64_read(&error_stats->last_error_vblank));
> >+}
> >+
> >+static int malidp_show_stats(struct seq_file *m, void *arg)
> >+{
> >+    struct drm_info_node *node = (struct drm_info_node *)m->private;
> >+    struct drm_device *drm = node->minor->dev;
> >+    struct malidp_drm *malidp = drm->dev_private;
> >+
> >+    malidp_error_stats_dump("DE", &malidp->de_errors, m);
> >+    malidp_error_stats_dump("SE", &malidp->se_errors, m);
> >+    return 0;
> >+}
> >+
> >+static struct drm_info_list malidp_debugfs_list[] = {
> >+    { "debug", malidp_show_stats, 0 },
> >+};
> 
> I think it would be pretty useful to have a way to reset the counters.
> Maybe by just writing anything to the file?
> 

drm_debugfs_fops doesn't allow writing. I see no reason using our own
debugs_fops, but do you think worth the trouble?

> Thanks,
> -Brian
> 
> >+
> >+static int malidp_debugfs_init(struct drm_minor *minor)
> >+{
> >+    struct malidp_drm *malidp = minor->dev->dev_private;
> >+
> >+    malidp_error_stats_init(&malidp->de_errors);
> >+    malidp_error_stats_init(&malidp->se_errors);
> >+    return drm_debugfs_create_files(malidp_debugfs_list,
> >+            ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
> >+}
> >+
> >+#endif //CONFIG_DEBUG_FS
> >+
> >static struct drm_driver malidp_driver = {
> >     .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
> >                        DRIVER_PRIME,
> >@@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = {
> >     .gem_prime_vmap = drm_gem_cma_prime_vmap,
> >     .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
> >     .gem_prime_mmap = drm_gem_cma_prime_mmap,
> >+#ifdef CONFIG_DEBUG_FS
> >+    .debugfs_init = malidp_debugfs_init,
> >+#endif
> >     .fops = &fops,
> >     .name = "mali-dp",
> >     .desc = "ARM Mali Display Processor driver",
> >diff --git a/drivers/gpu/drm/arm/malidp_drv.h 
> >b/drivers/gpu/drm/arm/malidp_drv.h
> >index c70989b..c49056c 100644
> >--- a/drivers/gpu/drm/arm/malidp_drv.h
> >+++ b/drivers/gpu/drm/arm/malidp_drv.h
> >@@ -18,6 +18,12 @@
> >#include <drm/drmP.h>
> >#include "malidp_hw.h"
> >
> >+struct malidp_error_stats {
> >+    atomic_t num_errors;
> >+    atomic_t last_error_status;
> >+    atomic64_t last_error_vblank;
> >+};
> >+
> >struct malidp_drm {
> >     struct malidp_hw_device *dev;
> >     struct drm_crtc crtc;
> >@@ -25,6 +31,10 @@ struct malidp_drm {
> >     struct drm_pending_vblank_event *event;
> >     atomic_t config_valid;
> >     u32 core_id;
> >+#ifdef CONFIG_DEBUG_FS
> >+    struct malidp_error_stats de_errors;
> >+    struct malidp_error_stats se_errors;
> >+#endif
> >};
> >
> >#define crtc_to_malidp_device(x) container_of(x, struct malidp_drm, crtc)
> >@@ -62,6 +72,11 @@ struct malidp_crtc_state {
> >int malidp_de_planes_init(struct drm_device *drm);
> >int malidp_crtc_init(struct drm_device *drm);
> >
> >+#ifdef CONFIG_DEBUG_FS
> >+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
> >+              u64 vblank);
> >+#endif
> >+
> >/* often used combination of rotational bits */
> >#define MALIDP_ROTATED_MASK  (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270)
> >
> >diff --git a/drivers/gpu/drm/arm/malidp_hw.c 
> >b/drivers/gpu/drm/arm/malidp_hw.c
> >index d789b46..ec40a44 100644
> >--- a/drivers/gpu/drm/arm/malidp_hw.c
> >+++ b/drivers/gpu/drm/arm/malidp_hw.c
> >@@ -632,10 +632,16 @@ const struct malidp_hw 
> >malidp_device[MALIDP_MAX_DEVICES] = {
> >                                         MALIDP500_DE_IRQ_VSYNC |
> >                                         MALIDP500_DE_IRQ_GLOBAL,
> >                             .vsync_irq = MALIDP500_DE_IRQ_VSYNC,
> >+                            .err_mask = MALIDP_DE_IRQ_UNDERRUN |
> >+                                        MALIDP500_DE_IRQ_AXI_ERR |
> >+                                        MALIDP500_DE_IRQ_SATURATION,
> >                     },
> >                     .se_irq_map = {
> >                             .irq_mask = MALIDP500_SE_IRQ_CONF_MODE,
> >                             .vsync_irq = 0,
> >+                            .err_mask = MALIDP500_SE_IRQ_INIT_BUSY |
> >+                                        MALIDP500_SE_IRQ_AXI_ERROR |
> >+                                        MALIDP500_SE_IRQ_OVERRUN,
> >                     },
> >                     .dc_irq_map = {
> >                             .irq_mask = MALIDP500_DE_IRQ_CONF_VALID,
> >@@ -669,10 +675,15 @@ const struct malidp_hw 
> >malidp_device[MALIDP_MAX_DEVICES] = {
> >                             .irq_mask = MALIDP_DE_IRQ_UNDERRUN |
> >                                         MALIDP550_DE_IRQ_VSYNC,
> >                             .vsync_irq = MALIDP550_DE_IRQ_VSYNC,
> >+                            .err_mask = MALIDP_DE_IRQ_UNDERRUN |
> >+                                        MALIDP550_DE_IRQ_SATURATION |
> >+                                        MALIDP550_DE_IRQ_AXI_ERR,
> >                     },
> >                     .se_irq_map = {
> >-                            .irq_mask = MALIDP550_SE_IRQ_EOW |
> >-                                        MALIDP550_SE_IRQ_AXI_ERR,
> >+                            .irq_mask = MALIDP550_SE_IRQ_EOW,
> >+                            .err_mask  = MALIDP550_SE_IRQ_AXI_ERR |
> >+                                         MALIDP550_SE_IRQ_OVR |
> >+                                         MALIDP550_SE_IRQ_IBSY,
> >                     },
> >                     .dc_irq_map = {
> >                             .irq_mask = MALIDP550_DC_IRQ_CONF_VALID,
> >@@ -707,10 +718,20 @@ const struct malidp_hw 
> >malidp_device[MALIDP_MAX_DEVICES] = {
> >                                         MALIDP650_DE_IRQ_DRIFT |
> >                                         MALIDP550_DE_IRQ_VSYNC,
> >                             .vsync_irq = MALIDP550_DE_IRQ_VSYNC,
> >+                            .err_mask = MALIDP_DE_IRQ_UNDERRUN |
> >+                                        MALIDP650_DE_IRQ_DRIFT |
> >+                                        MALIDP550_DE_IRQ_SATURATION |
> >+                                        MALIDP550_DE_IRQ_AXI_ERR |
> >+                                        MALIDP650_DE_IRQ_ACEV1 |
> >+                                        MALIDP650_DE_IRQ_ACEV2 |
> >+                                        MALIDP650_DE_IRQ_ACEG |
> >+                                        MALIDP650_DE_IRQ_AXIEP,
> >                     },
> >                     .se_irq_map = {
> >-                            .irq_mask = MALIDP550_SE_IRQ_EOW |
> >-                                        MALIDP550_SE_IRQ_AXI_ERR,
> >+                            .irq_mask = MALIDP550_SE_IRQ_EOW,
> >+                            .err_mask = MALIDP550_SE_IRQ_AXI_ERR |
> >+                                        MALIDP550_SE_IRQ_OVR |
> >+                                        MALIDP550_SE_IRQ_IBSY,
> >                     },
> >                     .dc_irq_map = {
> >                             .irq_mask = MALIDP550_DC_IRQ_CONF_VALID,
> >@@ -799,10 +820,17 @@ static irqreturn_t malidp_de_irq(int irq, void *arg)
> >             return ret;
> >
> >     mask = malidp_hw_read(hwdev, MALIDP_REG_MASKIRQ);
> >-    status &= mask;
> >+    /* keep the status of the enabled interrupts, plus the error bits */
> >+    status &= (mask | de->err_mask);
> >     if ((status & de->vsync_irq) && malidp->crtc.enabled)
> >             drm_crtc_handle_vblank(&malidp->crtc);
> >
> >+#ifdef CONFIG_DEBUG_FS
> >+    if (status & de->err_mask) {
> >+            malidp_error(&malidp->de_errors, status,
> >+                         drm_crtc_vblank_count(&malidp->crtc));
> >+    }
> >+#endif
> >     malidp_hw_clear_irq(hwdev, MALIDP_DE_BLOCK, status);
> >
> >     return (ret == IRQ_NONE) ? IRQ_HANDLED : ret;
> >@@ -878,11 +906,15 @@ static irqreturn_t malidp_se_irq(int irq, void *arg)
> >             return IRQ_NONE;
> >
> >     status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS);
> >-    if (!(status & se->irq_mask))
> >+    if (!(status & (se->irq_mask | se->err_mask)))
> >             return IRQ_NONE;
> >
> >+#ifdef CONFIG_DEBUG_FS
> >+    if (status & se->err_mask)
> >+            malidp_error(&malidp->se_errors, status,
> >+                         drm_crtc_vblank_count(&malidp->crtc));
> >+#endif
> >     mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ);
> >-    status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS);
> >     status &= mask;
> >     /* ToDo: status decoding and firing up of VSYNC and page flip events */
> >
> >diff --git a/drivers/gpu/drm/arm/malidp_hw.h 
> >b/drivers/gpu/drm/arm/malidp_hw.h
> >index b5dd6c7..b45f99f 100644
> >--- a/drivers/gpu/drm/arm/malidp_hw.h
> >+++ b/drivers/gpu/drm/arm/malidp_hw.h
> >@@ -52,6 +52,7 @@ struct malidp_format_id {
> >struct malidp_irq_map {
> >     u32 irq_mask;           /* mask of IRQs that can be enabled in the 
> > block */
> >     u32 vsync_irq;          /* IRQ bit used for signaling during VSYNC */
> >+    u32 err_mask;           /* mask of bits that represent errors */
> >};
> >
> >struct malidp_layer {
> >diff --git a/drivers/gpu/drm/arm/malidp_regs.h 
> >b/drivers/gpu/drm/arm/malidp_regs.h
> >index 149024f..7c37390 100644
> >--- a/drivers/gpu/drm/arm/malidp_regs.h
> >+++ b/drivers/gpu/drm/arm/malidp_regs.h
> >@@ -53,6 +53,8 @@
> >#define MALIDP550_DE_IRQ_AXI_ERR             (1 << 16)
> >#define MALIDP550_SE_IRQ_EOW                 (1 << 0)
> >#define MALIDP550_SE_IRQ_AXI_ERR             (1 << 16)
> >+#define MALIDP550_SE_IRQ_OVR                        (1 << 17)
> >+#define MALIDP550_SE_IRQ_IBSY                       (1 << 18)
> >#define MALIDP550_DC_IRQ_CONF_VALID          (1 << 0)
> >#define MALIDP550_DC_IRQ_CONF_MODE           (1 << 4)
> >#define MALIDP550_DC_IRQ_CONF_ACTIVE         (1 << 16)
> >@@ -60,6 +62,10 @@
> >#define MALIDP550_DC_IRQ_SE                  (1 << 24)
> >
> >#define MALIDP650_DE_IRQ_DRIFT                       (1 << 4)
> >+#define MALIDP650_DE_IRQ_ACEV1                      (1 << 17)
> >+#define MALIDP650_DE_IRQ_ACEV2                      (1 << 18)
> >+#define MALIDP650_DE_IRQ_ACEG                       (1 << 19)
> >+#define MALIDP650_DE_IRQ_AXIEP                      (1 << 28)
> >
> >/* bit masks that are common between products */
> >#define   MALIDP_CFG_VALID           (1 << 0)
> >-- 
> >2.7.4
> >

-- 
Cheers,
Alex G
_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to