On Tue, May 19, 2020 at 09:37:13AM -0700, Christoph Hellwig wrote:
> I don't think we need any of that symlink stuff.  Even if we want it
> (which I don't), it should not be in a bug fix patch.
> 
> In fact to fix the blktrace race I think we only need something like
> this fairly trivial patch (completely untested so far) below.
> 
> (and with that we can also drop the previous patch, as blk-debugfs.c
> becomes rather pointless)

You forgot to deal with partitions. Putting similar lipstick on the pig,
this is what I end up with, let me know if this seems agreeable:

diff --git a/block/blk-core.c b/block/blk-core.c
index 2096373bd16d..67cd9ddac822 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -51,9 +51,7 @@
 #include "blk-pm.h"
 #include "blk-rq-qos.h"
 
-#ifdef CONFIG_DEBUG_FS
 struct dentry *blk_debugfs_root;
-#endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -1877,9 +1875,7 @@ int __init blk_dev_init(void)
        blk_requestq_cachep = kmem_cache_create("request_queue",
                        sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 
-#ifdef CONFIG_DEBUG_FS
        blk_debugfs_root = debugfs_create_dir("block", NULL);
-#endif
 
        return 0;
 }
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 96b7a35c898a..08edc3a54114 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -822,9 +822,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
-                                           blk_debugfs_root);
-
        debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
 
        /*
@@ -855,9 +852,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
 
 void blk_mq_debugfs_unregister(struct request_queue *q)
 {
-       debugfs_remove_recursive(q->debugfs_dir);
        q->sched_debugfs_dir = NULL;
-       q->debugfs_dir = NULL;
 }
 
 static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 561624d4cc4e..5babb6547f48 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
 #include <linux/blktrace_api.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-cgroup.h>
+#include <linux/debugfs.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -918,6 +919,7 @@ static void blk_release_queue(struct kobject *kobj)
 
        blk_trace_shutdown(q);
 
+       debugfs_remove_recursive(q->debugfs_dir);
        if (queue_is_mq(q))
                blk_mq_debugfs_unregister(q);
 
@@ -989,6 +991,28 @@ int blk_register_queue(struct gendisk *disk)
                goto unlock;
        }
 
+       /*
+        * Blktrace needs a debugsfs name even for queues that don't register
+        * a gendisk, so it lazily registers the debugfs directory.  But that
+        * can get us into a situation where a SCSI device is found, with no
+        * driver for it (yet).  Then blktrace is used on the device, creating
+        * the debugfs directory, and only after that a drivers is loaded. In
+        * that case we might already have a debugfs directory registered here.
+        * Even worse we could be racing with blktrace to register it.
+        */
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+       mutex_lock(&q->blk_trace_mutex);
+       if (!q->debugfs_dir) {
+               q->debugfs_dir =
+                       debugfs_create_dir(kobject_name(q->kobj.parent),
+                               blk_debugfs_root);
+       }
+       mutex_unlock(&q->blk_trace_mutex);
+#else
+       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+                                           blk_debugfs_root);
+#endif
+
        if (queue_is_mq(q)) {
                __blk_mq_register_dev(dev, q);
                blk_mq_debugfs_register(q);
diff --git a/block/blk.h b/block/blk.h
index 5db4ec1e85f7..f11f79295419 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -14,9 +14,7 @@
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT                (5 * HZ)
 
-#ifdef CONFIG_DEBUG_FS
 extern struct dentry *blk_debugfs_root;
-#endif
 
 struct blk_flush_queue {
        unsigned int            flush_pending_idx:1;
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 297004fd2264..95f9019aac83 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -10,6 +10,7 @@
 #include <linux/vmalloc.h>
 #include <linux/blktrace_api.h>
 #include <linux/raid/detect.h>
+#include <linux/debugfs.h>
 #include "check.h"
 
 static int (*check_part[])(struct parsed_partitions *) = {
@@ -322,6 +323,7 @@ void delete_partition(struct gendisk *disk, struct 
hd_struct *part)
        get_device(disk_to_dev(part_to_disk(part)));
        rcu_assign_pointer(ptbl->part[part->partno], NULL);
        kobject_put(part->holder_dir);
+       debugfs_remove_recursive(part->debugfs_dir);
        device_del(part_to_dev(part));
 
        /*
@@ -443,6 +445,7 @@ static struct hd_struct *add_partition(struct gendisk 
*disk, int partno,
        if (!p->holder_dir)
                goto out_del;
 
+       p->debugfs_dir = debugfs_create_dir(dev_name(pdev), blk_debugfs_root);
        dev_set_uevent_suppress(pdev, 0);
        if (flags & ADDPART_FLAG_WHOLEDISK) {
                err = device_create_file(pdev, &dev_attr_whole_disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 20e378b428b8..737467c29a31 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -574,8 +574,8 @@ struct request_queue {
        struct list_head        tag_set_list;
        struct bio_set          bio_split;
 
-#ifdef CONFIG_BLK_DEBUG_FS
        struct dentry           *debugfs_dir;
+#ifdef CONFIG_BLK_DEBUG_FS
        struct dentry           *sched_debugfs_dir;
        struct dentry           *rqos_debugfs_dir;
 #endif
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3b6ff5902edc..eb6db276e293 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -22,7 +22,6 @@ struct blk_trace {
        u64 end_lba;
        u32 pid;
        u32 dev;
-       struct dentry *dir;
        struct dentry *dropped_file;
        struct dentry *msg_file;
        struct list_head running_list;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a9384449465a..7ff4c4c06140 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -89,6 +89,7 @@ struct hd_struct {
        int make_it_fail;
 #endif
        struct rcu_work rcu_work;
+       struct dentry *debugfs_dir;
 };
 
 /**
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ea47f2084087..8209d41dec18 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -311,7 +311,6 @@ static void blk_trace_free(struct blk_trace *bt)
        debugfs_remove(bt->msg_file);
        debugfs_remove(bt->dropped_file);
        relay_close(bt->rchan);
-       debugfs_remove(bt->dir);
        free_percpu(bt->sequence);
        free_percpu(bt->msg_data);
        kfree(bt);
@@ -482,9 +481,6 @@ static int do_blk_trace_setup(struct request_queue *q, char 
*name, dev_t dev,
        if (!buts->buf_size || !buts->buf_nr)
                return -EINVAL;
 
-       if (!blk_debugfs_root)
-               return -ENOENT;
-
        strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
        buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 
@@ -494,6 +490,38 @@ static int do_blk_trace_setup(struct request_queue *q, 
char *name, dev_t dev,
         */
        strreplace(buts->name, '/', '_');
 
+       /*
+        * We also have to use a partition directory if a partition is
+        * being worked on, even though the same request_queue is shared.
+        */
+       if (bdev && bdev != bdev->bd_contains)
+               dir = bdev->bd_part->debugfs_dir;
+       else {
+               /*
+                * For queues that do not have a gendisk attached to them, the
+                * debugfs directory will not have been created at setup time.
+                * Create it here lazily, it will only be removed when the
+                * queue is torn down.
+                */
+               if (!q->debugfs_dir) {
+                       q->debugfs_dir =
+                               debugfs_create_dir(buts->name,
+                                                  blk_debugfs_root);
+               }
+               dir = q->debugfs_dir;
+       }
+
+       /*
+        * As blktrace relies on debugfs for its interface the debugfs directory
+        * is required, contrary to the usual mantra of not checking for debugfs
+        * files or directories.
+        */
+       if (IS_ERR_OR_NULL(q->debugfs_dir)) {
+               pr_warn("debugfs_dir not present for %s so skipping\n",
+                       buts->name);
+               return -ENOENT;
+       }
+
        bt = kzalloc(sizeof(*bt), GFP_KERNEL);
        if (!bt)
                return -ENOMEM;
@@ -507,12 +535,6 @@ static int do_blk_trace_setup(struct request_queue *q, 
char *name, dev_t dev,
        if (!bt->msg_data)
                goto err;
 
-       ret = -ENOENT;
-
-       dir = debugfs_lookup(buts->name, blk_debugfs_root);
-       if (!dir)
-               bt->dir = dir = debugfs_create_dir(buts->name, 
blk_debugfs_root);
-
        bt->dev = dev;
        atomic_set(&bt->dropped, 0);
        INIT_LIST_HEAD(&bt->running_list);
@@ -551,8 +573,6 @@ static int do_blk_trace_setup(struct request_queue *q, char 
*name, dev_t dev,
 
        ret = 0;
 err:
-       if (dir && !bt->dir)
-               dput(dir);
        if (ret)
                blk_trace_free(bt);
        return ret;
-- 
2.26.2

Reply via email to