'i915_gpu_state' debugfs entry can be used to
capture the current gpu state. This is similar to
what one would get from 'i915_error_state' if gpu
error state would have been captured.

The motivation for this was to enhance our toolbox
so that we can direct bug reporters to do things like:

'grep -i suspend /sys/kernel/debug/dri/0/i915_gpu_state'

pre and postmortem to gain insight in triaging and
save ourselves from writing some new debugfs entries, when
the information is already in our error state.

v2: - use symmetrical put/get (Chris)
    - document the api (Daniel)
    - take a mutex when capturing

Signed-off-by: Mika Kuoppala <[email protected]>
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 129 ++++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_drv.h       |   5 ++
 drivers/gpu/drm/i915/i915_gpu_error.c |  57 +++++++++++++--
 3 files changed, 141 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d0e445e..30f56f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -888,58 +888,41 @@ static int i915_hws_info(struct seq_file *m, void *data)
        return 0;
 }
 
-static ssize_t
-i915_error_state_write(struct file *filp,
-                      const char __user *ubuf,
-                      size_t cnt,
-                      loff_t *ppos)
-{
-       struct i915_error_state_file_priv *error_priv = filp->private_data;
-       struct drm_device *dev = error_priv->dev;
-       int ret;
-
-       DRM_DEBUG_DRIVER("Resetting error state\n");
-
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
-       if (ret)
-               return ret;
-
-       i915_destroy_error_state(dev);
-       mutex_unlock(&dev->struct_mutex);
-
-       return cnt;
-}
-
-static int i915_error_state_open(struct inode *inode, struct file *file)
+static int i915_gpu_state_open(struct inode *inode, struct file *file)
 {
        struct drm_device *dev = inode->i_private;
-       struct i915_error_state_file_priv *error_priv;
+       struct i915_error_state_file_priv *state_priv;
+       int ret = 0;
 
-       error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
-       if (!error_priv)
+       state_priv = kzalloc(sizeof(*state_priv), GFP_KERNEL);
+       if (!state_priv)
                return -ENOMEM;
 
-       error_priv->dev = dev;
-
-       i915_error_state_get(dev, error_priv);
-
-       file->private_data = error_priv;
+       state_priv->dev = dev;
 
-       return 0;
-}
+       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       if (ret)
+               return ret;
 
-static int i915_error_state_release(struct inode *inode, struct file *file)
-{
-       struct i915_error_state_file_priv *error_priv = file->private_data;
+       ret = i915_gpu_idle(dev);
+       if (ret)
+               goto unlock;
 
-       i915_error_state_put(error_priv);
-       kfree(error_priv);
+       state_priv->error = i915_gpu_state_capture(dev);
+       if (state_priv->error == NULL) {
+               kfree(state_priv);
+               ret = -ENOMEM;
+               goto unlock;
+       }
 
-       return 0;
+       file->private_data = state_priv;
+unlock:
+       mutex_unlock(&dev->struct_mutex);
+       return ret;
 }
 
-static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
-                                    size_t count, loff_t *pos)
+static ssize_t i915_gpu_state_read(struct file *file, char __user *userbuf,
+                                  size_t count, loff_t *pos)
 {
        struct i915_error_state_file_priv *error_priv = file->private_data;
        struct drm_i915_error_state_buf error_str;
@@ -968,13 +951,72 @@ out:
        return ret ?: ret_count;
 }
 
+static int i915_gpu_state_release(struct inode *inode, struct file *file)
+{
+       struct i915_error_state_file_priv *state_priv = file->private_data;
+
+       i915_gpu_state_put(state_priv->error);
+       kfree(state_priv);
+
+       return 0;
+}
+
+static const struct file_operations i915_gpu_state_fops = {
+       .owner = THIS_MODULE,
+       .open = i915_gpu_state_open,
+       .read = i915_gpu_state_read,
+       .write = NULL,
+       .llseek = default_llseek,
+       .release = i915_gpu_state_release,
+};
+
+static int i915_error_state_open(struct inode *inode, struct file *file)
+{
+       struct drm_device *dev = inode->i_private;
+       struct i915_error_state_file_priv *error_priv;
+
+       error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
+       if (!error_priv)
+               return -ENOMEM;
+
+       error_priv->dev = dev;
+
+       i915_error_state_get(dev, error_priv);
+
+       file->private_data = error_priv;
+
+       return 0;
+}
+
+static ssize_t
+i915_error_state_write(struct file *filp,
+                      const char __user *ubuf,
+                      size_t cnt,
+                      loff_t *ppos)
+{
+       struct i915_error_state_file_priv *error_priv = filp->private_data;
+       struct drm_device *dev = error_priv->dev;
+       int ret;
+
+       DRM_DEBUG_DRIVER("Resetting error state\n");
+
+       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       if (ret)
+               return ret;
+
+       i915_destroy_error_state(dev);
+       mutex_unlock(&dev->struct_mutex);
+
+       return cnt;
+}
+
 static const struct file_operations i915_error_state_fops = {
        .owner = THIS_MODULE,
        .open = i915_error_state_open,
-       .read = i915_error_state_read,
+       .read = i915_gpu_state_read,
        .write = i915_error_state_write,
        .llseek = default_llseek,
-       .release = i915_error_state_release,
+       .release = i915_gpu_state_release,
 };
 
 static int
@@ -4367,6 +4409,7 @@ static const struct i915_debugfs_files {
        {"i915_ring_test_irq", &i915_ring_test_irq_fops},
        {"i915_gem_drop_caches", &i915_drop_caches_fops},
        {"i915_error_state", &i915_error_state_fops},
+       {"i915_gpu_state", &i915_gpu_state_fops},
        {"i915_next_seqno", &i915_next_seqno_fops},
        {"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
        {"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11e85cb..ca487b4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2866,6 +2866,11 @@ static inline void i915_error_state_buf_release(
 {
        kfree(eb->buf);
 }
+
+struct drm_i915_error_state * __must_check
+i915_gpu_state_capture(struct drm_device *dev);
+void i915_gpu_state_put(struct drm_i915_error_state *error);
+
 void i915_capture_error_state(struct drm_device *dev, bool wedge,
                              const char *error_msg);
 void i915_error_state_get(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index dcea1fa..0cefeebc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -540,7 +540,7 @@ static void i915_error_object_free(struct 
drm_i915_error_object *obj)
        kfree(obj);
 }
 
-static void i915_error_state_free(struct kref *error_ref)
+static void i915_gpu_state_free(struct kref *error_ref)
 {
        struct drm_i915_error_state *error = container_of(error_ref,
                                                          typeof(*error), ref);
@@ -1266,7 +1266,7 @@ static void i915_capture_gen_state(struct 
drm_i915_private *dev_priv,
 }
 
 static struct drm_i915_error_state *
-__i915_capture_gpu_state(struct drm_device *dev)
+__i915_gpu_state_capture(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_error_state *error;
@@ -1295,6 +1295,49 @@ __i915_capture_gpu_state(struct drm_device *dev)
 }
 
 /**
+ * i915_gpu_state_capture - capture a gpu hardware state
+ * @dev: drm device
+ *
+ * This will capture the current gpu hw state, identical what
+ * i915_capture_error_state() does, but without any error condition.
+ * The returned state is reference counted and caller is responsible
+ * to release the state by calling i915_gpu_state_put().
+ */
+struct drm_i915_error_state *
+i915_gpu_state_capture(struct drm_device *dev)
+{
+       struct drm_i915_error_state *gpu_state;
+
+       gpu_state = __i915_gpu_state_capture(dev);
+       if (gpu_state == NULL)
+               return NULL;
+
+       scnprintf(gpu_state->error_msg, sizeof(gpu_state->error_msg),
+                 "GPU hw state snapshot\n");
+
+       return gpu_state;
+}
+
+static void i915_gpu_state_get(struct drm_i915_error_state *state)
+{
+       WARN_ON(!state);
+       kref_get(&state->ref);
+}
+
+/**
+ * i915_gpu_state_put - release a drm_i915_error_state
+ * @state: gpu state
+ *
+ * This will release the reference to the the drm_i915_error_state.
+ * Possibly freeing the state if the reference count reaches zero.
+ */
+void i915_gpu_state_put(struct drm_i915_error_state *state)
+{
+       if (state)
+               kref_put(&state->ref, i915_gpu_state_free);
+}
+
+/**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
  *
@@ -1311,7 +1354,7 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
        struct drm_i915_error_state *error;
        unsigned long flags;
 
-       error = __i915_capture_gpu_state(dev);
+       error = __i915_gpu_state_capture(dev);
        if (error == NULL)
                return;
 
@@ -1326,7 +1369,7 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
        spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
 
        if (error) {
-               i915_error_state_free(&error->ref);
+               i915_gpu_state_free(&error->ref);
                return;
        }
 
@@ -1348,7 +1391,7 @@ void i915_error_state_get(struct drm_device *dev,
        spin_lock_irq(&dev_priv->gpu_error.lock);
        error_priv->error = dev_priv->gpu_error.first_error;
        if (error_priv->error)
-               kref_get(&error_priv->error->ref);
+               i915_gpu_state_get(error_priv->error);
        spin_unlock_irq(&dev_priv->gpu_error.lock);
 
 }
@@ -1356,7 +1399,7 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
 {
        if (error_priv->error)
-               kref_put(&error_priv->error->ref, i915_error_state_free);
+               i915_gpu_state_put(error_priv->error);
 }
 
 void i915_destroy_error_state(struct drm_device *dev)
@@ -1370,7 +1413,7 @@ void i915_destroy_error_state(struct drm_device *dev)
        spin_unlock_irq(&dev_priv->gpu_error.lock);
 
        if (error)
-               kref_put(&error->ref, i915_error_state_free);
+               i915_gpu_state_put(error);
 }
 
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to