From: "Steven Rostedt (VMware)" <[email protected]>

The event_mutex is a high level lock. It should never be taken under
get_online_cpus() being held. Perf is the only user that does so. Move the
taking of event_mutex outside of get_online_cpus() and this should solve the
locking order.

Signed-off-by: Steven Rostedt (VMware) <[email protected]>
---
 include/linux/trace_events.h    | 2 ++
 kernel/events/core.c            | 9 +++++++++
 kernel/trace/trace.h            | 1 -
 kernel/trace/trace_event_perf.c | 8 ++++----
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..0a30d2b5df51 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -50,6 +50,8 @@ struct trace_event;
 int trace_raw_output_prep(struct trace_iterator *iter,
                          struct trace_event *event);
 
+extern struct mutex event_mutex;
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dba870ccda63..c65c3b5a92f4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4021,12 +4021,14 @@ static void unaccount_event(struct perf_event *event)
 
 static void perf_sched_delayed(struct work_struct *work)
 {
+       mutex_lock(&event_mutex);
        get_online_cpus();
        mutex_lock(&perf_sched_mutex);
        if (atomic_dec_and_test(&perf_sched_count))
                static_branch_disable_cpuslocked(&perf_sched_events);
        mutex_unlock(&perf_sched_mutex);
        put_online_cpus();
+       mutex_unlock(&event_mutex);
 }
 
 /*
@@ -4231,7 +4233,9 @@ static void put_event(struct perf_event *event)
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       mutex_lock(&event_mutex);
        _free_event(event);
+       mutex_unlock(&event_mutex);
 }
 
 /*
@@ -8917,6 +8921,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
        pmu->hrtimer_interval_ms = timer;
 
        /* update all cpuctx for this PMU */
+       mutex_lock(&event_mutex);
        get_online_cpus();
        for_each_online_cpu(cpu) {
                struct perf_cpu_context *cpuctx;
@@ -8927,6 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
                        (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
        }
        put_online_cpus();
+       mutex_unlock(&event_mutex);
        mutex_unlock(&mux_interval_mutex);
 
        return count;
@@ -9879,6 +9885,7 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_task;
        }
 
+       mutex_lock(&event_mutex);
        get_online_cpus();
 
        if (task) {
@@ -10160,6 +10167,7 @@ SYSCALL_DEFINE5(perf_event_open,
        }
 
        put_online_cpus();
+       mutex_unlock(&event_mutex);
 
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
@@ -10196,6 +10204,7 @@ SYSCALL_DEFINE5(perf_event_open,
                mutex_unlock(&task->signal->cred_guard_mutex);
 err_cpus:
        put_online_cpus();
+       mutex_lock(&event_mutex);
 err_task:
        if (task)
                put_task_struct(task);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..4df0a8e9d000 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1430,7 +1430,6 @@ static inline void *event_file_data(struct file *filp)
        return ACCESS_ONCE(file_inode(filp)->i_private);
 }
 
-extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 
 extern const struct file_operations event_trigger_fops;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 562fa69df5d3..b8c90a024e99 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -213,7 +213,8 @@ int perf_trace_init(struct perf_event *p_event)
        u64 event_id = p_event->attr.config;
        int ret = -EINVAL;
 
-       mutex_lock(&event_mutex);
+       lockdep_assert_held(&event_mutex);
+
        list_for_each_entry(tp_event, &ftrace_events, list) {
                if (tp_event->event.type == event_id &&
                    tp_event->class && tp_event->class->reg &&
@@ -224,17 +225,16 @@ int perf_trace_init(struct perf_event *p_event)
                        break;
                }
        }
-       mutex_unlock(&event_mutex);
 
        return ret;
 }
 
 void perf_trace_destroy(struct perf_event *p_event)
 {
-       mutex_lock(&event_mutex);
+       lockdep_assert_held(&event_mutex);
+
        perf_trace_event_close(p_event);
        perf_trace_event_unreg(p_event);
-       mutex_unlock(&event_mutex);
 }
 
 int perf_trace_add(struct perf_event *p_event, int flags)
-- 
2.10.2


Reply via email to