Much of the complexity in irqenter_{enter,exit}() is due to #PF being
the sole exception that can schedule from kernel context.

One additional wrinkle with #PF is that it is non-maskable, it can
happen _anywhere_. Due to this, and the wonders of tracing, we can get
the 'normal' NMI nesting vs TRACE_IRQFLAGS:

        local_irq_disable()
          raw_local_irq_disable();
          trace_hardirqs_off();

        local_irq_enable();
          trace_hardirqs_on();
          <#PF>
            trace_hardirqs_off()
            ...
            if (!regs_irqs_disabled(regs)
              trace_hardirqs_on();
          </#PF>
          // WHOOPS -- lockdep thinks IRQs are disabled again!
          raw_local_irqs_enable();

Rework irqenter_{enter,exit}() to save/restore the software state.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
 include/linux/entry-common.h |    1 
 kernel/entry/common.c        |   52 ++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 27 deletions(-)

--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -310,6 +310,7 @@ void irqentry_exit_to_user_mode(struct p
 #ifndef irqentry_state
 typedef struct irqentry_state {
        bool    exit_rcu;
+       bool    irqs_enabled;
 } irqentry_state_t;
 #endif
 
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -260,6 +260,7 @@ noinstr irqentry_state_t irqentry_enter(
 {
        irqentry_state_t ret = {
                .exit_rcu = false,
+               .irqs_enabled = lockdep_hardirqs_enabled(),
        };
 
        if (user_mode(regs)) {
@@ -340,35 +341,32 @@ noinstr void irqentry_exit(struct pt_reg
        /* Check whether this returns to user mode */
        if (user_mode(regs)) {
                irqentry_exit_to_user_mode(regs);
-       } else if (!regs_irqs_disabled(regs)) {
-               /*
-                * If RCU was not watching on entry this needs to be done
-                * carefully and needs the same ordering of lockdep/tracing
-                * and RCU as the return to user mode path.
-                */
-               if (state.exit_rcu) {
-                       instrumentation_begin();
-                       /* Tell the tracer that IRET will enable interrupts */
-                       trace_hardirqs_on_prepare();
-                       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-                       instrumentation_end();
-                       rcu_irq_exit();
-                       lockdep_hardirqs_on(CALLER_ADDR0);
-                       return;
-               }
+               return;
+       }
 
-               instrumentation_begin();
+       instrumentation_begin();
+       /*
+        * When returning to interrupts enabled, and RCU was watching see if we
+        * need preemption.
+        */
+       if (!regs_irqs_disabled(regs) && !state.exit_rcu) {
                if (IS_ENABLED(CONFIG_PREEMPTION))
                        irqentry_exit_cond_resched();
-               /* Covers both tracing and lockdep */
-               trace_hardirqs_on();
-               instrumentation_end();
-       } else {
-               /*
-                * IRQ flags state is correct already. Just tell RCU if it
-                * was not watching on entry.
-                */
-               if (state.exit_rcu)
-                       rcu_irq_exit();
        }
+
+       /*
+        * Return the TRACE_IRQFLAGS state to what we found on entry.
+        * Observe the correct order vs RCU.
+        */
+       if (state.irqs_enabled) {
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       }
+       instrumentation_end();
+
+       if (state.exit_rcu)
+               rcu_irq_exit();
+
+       if (state.irqs_enabled)
+               lockdep_hardirqs_on(CALLER_ADDR0);
 }


Reply via email to