Hi Alex,

On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote:
Status register contains a lot of bits for reporting internal errors
inside Mali DP. Currently, we just silently ignore all of the erorrs,

Being picky: s/erorrs/errors/

that doesn't help when we are investigating different bugs, especially
on the FPGA models which have a lot of constrains, so we could easily

s/constrains/constraints/

end up in AXI or underrun errors.

Add a new file called debug that contains an agregate of the

s/agregate/aggregate/

errors reported by the Mali DP hardware.

E.g:
[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug
[DE] num_errors : 167
[DE] last_error_status  : 0x00000001
[DE] last_error_vblank : 385
[SE] num_errors : 3
[SE] last_error_status  : 0x00e23001
[SE] last_error_vblank : 201

This a morphosis of the patch presented here [1], where the errors
where reported continuously via trace_printk.

[1] https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html

Signed-off-by: Alexandru Gheorghe <[email protected]>
---
drivers/gpu/drm/arm/malidp_drv.c  | 61 +++++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/arm/malidp_drv.h  | 15 ++++++++++
drivers/gpu/drm/arm/malidp_hw.c   | 46 ++++++++++++++++++++++++-----
drivers/gpu/drm/arm/malidp_hw.h   |  1 +
drivers/gpu/drm/arm/malidp_regs.h |  6 ++++
5 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 8d20faa..70ce19a 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -17,6 +17,7 @@
#include <linux/of_graph.h>
#include <linux/of_reserved_mem.h>
#include <linux/pm_runtime.h>
+#include <linux/debugfs.h>

#include <drm/drmP.h>
#include <drm/drm_atomic.h>
@@ -29,6 +30,7 @@
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_modeset_helper.h>
#include <drm/drm_of.h>
+#include <drm/drm_debugfs.h>

#include "malidp_drv.h"
#include "malidp_regs.h"
@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file *file_priv,
        return drm_gem_cma_dumb_create_internal(file_priv, drm, args);
}

+#ifdef CONFIG_DEBUG_FS
+
+static void malidp_error_stats_init(struct malidp_error_stats *error_stats)
+{
+       atomic_set(&error_stats->num_errors, 0);
+       atomic_set(&error_stats->last_error_status, 0);
+       atomic64_set(&error_stats->last_error_vblank, -1);
+}
+
+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
+                 u64 vblank)
+{
+       atomic_set(&error_stats->last_error_status, status);
+       atomic64_set(&error_stats->last_error_vblank, vblank);
+       atomic_inc(&error_stats->num_errors);
+}
+

Do we already have a lock we could use to make sure the status printed
is consistent? I know this is "only debug", but it could be annoying
if we can't trust that the last_error_status does actually match the
last_error_vblank.

+void malidp_error_stats_dump(const char *prefix,
+                            struct malidp_error_stats *error_stats,
+                            struct seq_file *m)
+{
+       seq_printf(m, "[%s] num_errors : %d\n", prefix,
+                  atomic_read(&error_stats->num_errors));
+       seq_printf(m, "[%s] last_error_status  : 0x%08x\n", prefix,
+                  atomic_read(&error_stats->last_error_status));
+       seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix,
+                  atomic64_read(&error_stats->last_error_vblank));
+}
+
+static int malidp_show_stats(struct seq_file *m, void *arg)
+{
+       struct drm_info_node *node = (struct drm_info_node *)m->private;
+       struct drm_device *drm = node->minor->dev;
+       struct malidp_drm *malidp = drm->dev_private;
+
+       malidp_error_stats_dump("DE", &malidp->de_errors, m);
+       malidp_error_stats_dump("SE", &malidp->se_errors, m);
+       return 0;
+}
+
+static struct drm_info_list malidp_debugfs_list[] = {
+       { "debug", malidp_show_stats, 0 },
+};

I think it would be pretty useful to have a way to reset the counters.
Maybe by just writing anything to the file?

Thanks,
-Brian

+
+static int malidp_debugfs_init(struct drm_minor *minor)
+{
+       struct malidp_drm *malidp = minor->dev->dev_private;
+
+       malidp_error_stats_init(&malidp->de_errors);
+       malidp_error_stats_init(&malidp->se_errors);
+       return drm_debugfs_create_files(malidp_debugfs_list,
+               ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor);
+}
+
+#endif //CONFIG_DEBUG_FS
+
static struct drm_driver malidp_driver = {
        .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
                           DRIVER_PRIME,
@@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = {
        .gem_prime_vmap = drm_gem_cma_prime_vmap,
        .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
        .gem_prime_mmap = drm_gem_cma_prime_mmap,
+#ifdef CONFIG_DEBUG_FS
+       .debugfs_init = malidp_debugfs_init,
+#endif
        .fops = &fops,
        .name = "mali-dp",
        .desc = "ARM Mali Display Processor driver",
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index c70989b..c49056c 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -18,6 +18,12 @@
#include <drm/drmP.h>
#include "malidp_hw.h"

+struct malidp_error_stats {
+       atomic_t num_errors;
+       atomic_t last_error_status;
+       atomic64_t last_error_vblank;
+};
+
struct malidp_drm {
        struct malidp_hw_device *dev;
        struct drm_crtc crtc;
@@ -25,6 +31,10 @@ struct malidp_drm {
        struct drm_pending_vblank_event *event;
        atomic_t config_valid;
        u32 core_id;
+#ifdef CONFIG_DEBUG_FS
+       struct malidp_error_stats de_errors;
+       struct malidp_error_stats se_errors;
+#endif
};

#define crtc_to_malidp_device(x) container_of(x, struct malidp_drm, crtc)
@@ -62,6 +72,11 @@ struct malidp_crtc_state {
int malidp_de_planes_init(struct drm_device *drm);
int malidp_crtc_init(struct drm_device *drm);

+#ifdef CONFIG_DEBUG_FS
+void malidp_error(struct malidp_error_stats *error_stats, u32 status,
+                 u64 vblank);
+#endif
+
/* often used combination of rotational bits */
#define MALIDP_ROTATED_MASK     (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270)

diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index d789b46..ec40a44 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -632,10 +632,16 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] 
= {
                                            MALIDP500_DE_IRQ_VSYNC |
                                            MALIDP500_DE_IRQ_GLOBAL,
                                .vsync_irq = MALIDP500_DE_IRQ_VSYNC,
+                               .err_mask = MALIDP_DE_IRQ_UNDERRUN |
+                                           MALIDP500_DE_IRQ_AXI_ERR |
+                                           MALIDP500_DE_IRQ_SATURATION,
                        },
                        .se_irq_map = {
                                .irq_mask = MALIDP500_SE_IRQ_CONF_MODE,
                                .vsync_irq = 0,
+                               .err_mask = MALIDP500_SE_IRQ_INIT_BUSY |
+                                           MALIDP500_SE_IRQ_AXI_ERROR |
+                                           MALIDP500_SE_IRQ_OVERRUN,
                        },
                        .dc_irq_map = {
                                .irq_mask = MALIDP500_DE_IRQ_CONF_VALID,
@@ -669,10 +675,15 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] 
= {
                                .irq_mask = MALIDP_DE_IRQ_UNDERRUN |
                                            MALIDP550_DE_IRQ_VSYNC,
                                .vsync_irq = MALIDP550_DE_IRQ_VSYNC,
+                               .err_mask = MALIDP_DE_IRQ_UNDERRUN |
+                                           MALIDP550_DE_IRQ_SATURATION |
+                                           MALIDP550_DE_IRQ_AXI_ERR,
                        },
                        .se_irq_map = {
-                               .irq_mask = MALIDP550_SE_IRQ_EOW |
-                                           MALIDP550_SE_IRQ_AXI_ERR,
+                               .irq_mask = MALIDP550_SE_IRQ_EOW,
+                               .err_mask  = MALIDP550_SE_IRQ_AXI_ERR |
+                                            MALIDP550_SE_IRQ_OVR |
+                                            MALIDP550_SE_IRQ_IBSY,
                        },
                        .dc_irq_map = {
                                .irq_mask = MALIDP550_DC_IRQ_CONF_VALID,
@@ -707,10 +718,20 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] 
= {
                                            MALIDP650_DE_IRQ_DRIFT |
                                            MALIDP550_DE_IRQ_VSYNC,
                                .vsync_irq = MALIDP550_DE_IRQ_VSYNC,
+                               .err_mask = MALIDP_DE_IRQ_UNDERRUN |
+                                           MALIDP650_DE_IRQ_DRIFT |
+                                           MALIDP550_DE_IRQ_SATURATION |
+                                           MALIDP550_DE_IRQ_AXI_ERR |
+                                           MALIDP650_DE_IRQ_ACEV1 |
+                                           MALIDP650_DE_IRQ_ACEV2 |
+                                           MALIDP650_DE_IRQ_ACEG |
+                                           MALIDP650_DE_IRQ_AXIEP,
                        },
                        .se_irq_map = {
-                               .irq_mask = MALIDP550_SE_IRQ_EOW |
-                                           MALIDP550_SE_IRQ_AXI_ERR,
+                               .irq_mask = MALIDP550_SE_IRQ_EOW,
+                               .err_mask = MALIDP550_SE_IRQ_AXI_ERR |
+                                           MALIDP550_SE_IRQ_OVR |
+                                           MALIDP550_SE_IRQ_IBSY,
                        },
                        .dc_irq_map = {
                                .irq_mask = MALIDP550_DC_IRQ_CONF_VALID,
@@ -799,10 +820,17 @@ static irqreturn_t malidp_de_irq(int irq, void *arg)
                return ret;

        mask = malidp_hw_read(hwdev, MALIDP_REG_MASKIRQ);
-       status &= mask;
+       /* keep the status of the enabled interrupts, plus the error bits */
+       status &= (mask | de->err_mask);
        if ((status & de->vsync_irq) && malidp->crtc.enabled)
                drm_crtc_handle_vblank(&malidp->crtc);

+#ifdef CONFIG_DEBUG_FS
+       if (status & de->err_mask) {
+               malidp_error(&malidp->de_errors, status,
+                            drm_crtc_vblank_count(&malidp->crtc));
+       }
+#endif
        malidp_hw_clear_irq(hwdev, MALIDP_DE_BLOCK, status);

        return (ret == IRQ_NONE) ? IRQ_HANDLED : ret;
@@ -878,11 +906,15 @@ static irqreturn_t malidp_se_irq(int irq, void *arg)
                return IRQ_NONE;

        status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS);
-       if (!(status & se->irq_mask))
+       if (!(status & (se->irq_mask | se->err_mask)))
                return IRQ_NONE;

+#ifdef CONFIG_DEBUG_FS
+       if (status & se->err_mask)
+               malidp_error(&malidp->se_errors, status,
+                            drm_crtc_vblank_count(&malidp->crtc));
+#endif
        mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ);
-       status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS);
        status &= mask;
        /* ToDo: status decoding and firing up of VSYNC and page flip events */

diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
index b5dd6c7..b45f99f 100644
--- a/drivers/gpu/drm/arm/malidp_hw.h
+++ b/drivers/gpu/drm/arm/malidp_hw.h
@@ -52,6 +52,7 @@ struct malidp_format_id {
struct malidp_irq_map {
        u32 irq_mask;           /* mask of IRQs that can be enabled in the 
block */
        u32 vsync_irq;          /* IRQ bit used for signaling during VSYNC */
+       u32 err_mask;           /* mask of bits that represent errors */
};

struct malidp_layer {
diff --git a/drivers/gpu/drm/arm/malidp_regs.h 
b/drivers/gpu/drm/arm/malidp_regs.h
index 149024f..7c37390 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -53,6 +53,8 @@
#define MALIDP550_DE_IRQ_AXI_ERR                (1 << 16)
#define MALIDP550_SE_IRQ_EOW                    (1 << 0)
#define MALIDP550_SE_IRQ_AXI_ERR                (1 << 16)
+#define MALIDP550_SE_IRQ_OVR                   (1 << 17)
+#define MALIDP550_SE_IRQ_IBSY                  (1 << 18)
#define MALIDP550_DC_IRQ_CONF_VALID             (1 << 0)
#define MALIDP550_DC_IRQ_CONF_MODE              (1 << 4)
#define MALIDP550_DC_IRQ_CONF_ACTIVE            (1 << 16)
@@ -60,6 +62,10 @@
#define MALIDP550_DC_IRQ_SE                     (1 << 24)

#define MALIDP650_DE_IRQ_DRIFT                  (1 << 4)
+#define MALIDP650_DE_IRQ_ACEV1                 (1 << 17)
+#define MALIDP650_DE_IRQ_ACEV2                 (1 << 18)
+#define MALIDP650_DE_IRQ_ACEG                  (1 << 19)
+#define MALIDP650_DE_IRQ_AXIEP                 (1 << 28)

/* bit masks that are common between products */
#define   MALIDP_CFG_VALID              (1 << 0)
--
2.7.4

_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to