NeHuang created this revision.
NeHuang added reviewers: nemanjai, stefanp, amyk, PowerPC.
NeHuang added a project: LLVM.
Herald added subscribers: shchenz, kbarton, hiraditya.
NeHuang requested review of this revision.

This patch adds the fix for undef virtual register reading failure when trap 
optimization is enabled.

Failure scenario as below:

1. In a machine basic block A, the definition of a virtual register MI was 
eliminated due to trap optimization (TRAP inserted before the MI)
2. The same virtual register is still used in another machine basic block B 
(dominated by A) will trigger undef vr reading failure

Idea of the fix

1. Detect and set all virtual register definition after the conditional trap to 
`IMPLICIT_DEF`
2. Remove all the other machine instructions after the conditional trap and 
change the terminator machine instruction to an unconditional trap


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D117355

Files:
  llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
  llvm/test/CodeGen/PowerPC/mi-peephole-trap-opt-dominated-block.mir

Index: llvm/test/CodeGen/PowerPC/mi-peephole-trap-opt-dominated-block.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/mi-peephole-trap-opt-dominated-block.mir
@@ -0,0 +1,142 @@
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \
+# RUN:   -verify-machineinstrs -start-before=ppc-mi-peepholes \
+# RUN:   -stop-after=ppc-mi-peepholes -ppc-opt-conditional-trap \
+# RUN:   | FileCheck --check-prefix=CHECK-MIR %s
+
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \
+# RUN:   -verify-machineinstrs -start-before=ppc-mi-peepholes \
+# RUN:   -ppc-opt-conditional-trap | FileCheck %s
+
+
+--- |
+  define dso_local signext i32 @callee(i32 signext %ic, i32 signext %id) {
+  entry:
+    %add = add nsw i32 %id, %ic
+    ret i32 %add
+  }
+  define dso_local signext i32 @test_MBB_dominated(i32 signext %ia, i32 signext %ib) {
+  entry:
+    tail call void @llvm.ppc.MBB_dominated(i32 3, i32 3, i32 4)
+    %cmp.not = icmp slt i32 %ia, %ib
+    br i1 %cmp.not, label %cleanup, label %if.then
+  if.then:                                          ; preds = %entry
+    %add = add nsw i32 %ia, 1
+    %call = tail call signext i32 @callee(i32 signext %add, i32 signext %ib)
+    %add1 = add nsw i32 %call, 1
+    br label %cleanup
+  cleanup:                                          ; preds = %if.then, %entry
+    %retval.0 = phi i32 [ %add1, %if.then ], [ 0, %entry ]
+    ret i32 %retval.0
+  }
+  declare void @llvm.ppc.MBB_dominated(i32, i32, i32 immarg) #2
+...
+
+---
+name:            callee
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3, $x4
+    %1:g8rc = COPY $x4
+    %0:g8rc = COPY $x3
+    %2:gprc = COPY %0.sub_32
+    %3:gprc = COPY %1.sub_32
+    %4:gprc = nsw ADD4 killed %3, killed %2
+    %5:g8rc = EXTSW_32_64 killed %4
+    $x3 = COPY %5
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+...
+
+---
+name:            test_MBB_dominated
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    liveins: $x3, $x4
+    %3:g8rc = COPY $x4
+    %2:g8rc = COPY $x3
+    %5:gprc = COPY %3.sub_32
+    %4:gprc_and_gprc_nor0 = COPY %2.sub_32
+    %7:gprc = LI 3
+    TW 4, %7, %7
+    %6:gprc = LI 0
+    %8:crrc = CMPW %4, %5
+    BCC 12, killed %8, %bb.2
+    B %bb.1
+  bb.1.if.then:
+    successors: %bb.2(0x80000000)
+    %9:gprc = nsw ADDI %4, 1
+    ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+    %10:g8rc = EXTSW_32_64 %5
+    %11:g8rc = EXTSW_32_64 killed %9
+    $x3 = COPY %11
+    $x4 = COPY %10
+    BL8 @callee, csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit-def $r1, implicit-def $x3
+    ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+    %12:g8rc = COPY $x3
+    %13:gprc_and_gprc_nor0 = COPY %12.sub_32
+    %0:gprc = nsw ADDI killed %13, 1
+  bb.2.cleanup:
+    %1:gprc = PHI %6, %bb.0, %0, %bb.1
+    %14:g8rc = EXTSW_32_64 %1
+    $x3 = COPY %14
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+    # CHECK-MIR-LABEL: test_MBB_dominated
+    # CHECK-MIR:       bb.0.entry:
+    # CHECK-MIR:       successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    # CHECK-MIR:       liveins: $x3, $x4
+    # CHECK-MIR:       %0:g8rc = COPY $x4
+    # CHECK-MIR-NEXT:  %1:g8rc = COPY $x3
+    # CHECK-MIR-NEXT:  %2:gprc = COPY %0.sub_32
+    # CHECK-MIR-NEXT:  %3:gprc_and_gprc_nor0 = COPY %1.sub_32
+    # CHECK-MIR-NEXT:  %4:gprc = LI 3
+    # CHECK-MIR-NEXT:  %5:gprc = IMPLICIT_DEF
+    # CHECK-MIR-NEXT:  %6:crrc = IMPLICIT_DEF
+    # CHECK-MIR-NEXT:  TRAP
+
+    # CHECK-MIR:       bb.1.if.then:
+    # CHECK-MIR-NEXT:  successors: %bb.2(0x80000000)
+    # CHECK-MIR:       %7:gprc = nsw ADDI %3, 1
+    # CHECK-MIR-NEXT:  ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+    # CHECK-MIR-NEXT:  %8:g8rc = EXTSW_32_64 %2
+    # CHECK-MIR-NEXT:  %9:g8rc = EXTSW_32_64 killed %7
+    # CHECK-MIR-NEXT:  $x3 = COPY %9
+    # CHECK-MIR-NEXT:  $x4 = COPY %8
+    # CHECK-MIR-NEXT:  BL8 @callee,
+    # CHECK-MIR-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+    # CHECK-MIR-NEXT:  %10:g8rc = COPY $x3
+    # CHECK-MIR-NEXT:  %11:gprc_and_gprc_nor0 = COPY %10.sub_32
+    # CHECK-MIR-NEXT:  %12:gprc = nsw ADDI killed %11, 1
+
+    # CHECK-MIR:       bb.2.cleanup:
+    # CHECK-MIR-NEXT:  %13:gprc = PHI %5, %bb.0, %12, %bb.1
+    # CHECK-MIR-NEXT:  %14:g8rc = EXTSW_32_64 %13
+    # CHECK-MIR-NEXT:  $x3 = COPY %14
+    # CHECK-MIR-NEXT:  BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+
+    # CHECK-LABEL: test_MBB_dominated
+    # CHECK: # %bb.0: # %entry
+    # CHECK:       trap
+    # CHECK-NEXT:  .LBB1_1: # %if.then
+    # CHECK-NEXT:  mflr 0
+    # CHECK-NEXT:  std 0, 16(1)
+    # CHECK-NEXT:  stdu 1, -128(1)
+    # CHECK:       addi 3, 3, 1
+    # CHECK-NEXT:  extsw 4, 4
+    # CHECK-NEXT:  std 2, 120(1)
+    # CHECK-NEXT:  extsw 3, 3
+    # CHECK-NEXT:  bl callee
+    # CHECK-NEXT:  ld 2, 120(1)
+    # CHECK-NEXT:  addi 5, 3, 1
+    # CHECK-NEXT:  addi 1, 1, 128
+    # CHECK-NEXT:  ld 0, 16(1)
+    # CHECK-NEXT:  mtlr 0
+    # CHECK-NEXT:  .LBB1_2:  # %cleanup
+    # CHECK-NEXT:  extsw 3, 5
+    # CHECK-NEXT:  blr
Index: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -425,10 +425,17 @@
         ToErase->eraseFromParent();
         ToErase = nullptr;
       }
-      // If a conditional trap instruction got optimized to an
-      // unconditional trap, eliminate all the instructions after
-      // the trap.
+      // If a conditional trap instruction got optimized to an unconditional
+      // trap, set all vreg definitions after the conditional trap to
+      // IMPLICIT_DEF and eliminate all the other instructions after the trap.
       if (EnableTrapOptimization && TrapOpt) {
+        bool IsVReg =
+            (MI.getNumOperands() && MI.getOperand(0).isReg())
+                ? Register::isVirtualRegister(MI.getOperand(0).getReg())
+                : false;
+        if (IsVReg)
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
+                  MI.getOperand(0).getReg());
         ToErase = &MI;
         continue;
       }
@@ -1044,8 +1051,8 @@
         auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
                                                : LiMI2->getOperand(1).getImm();
 
-        // We will replace the MI with an unconditional trap if it will always
-        // trap.
+        // We will remove the MI and change the terminator to an unconditional
+        // trap if it will always trap.
         if ((ImmOperand0 == 31) ||
             ((ImmOperand0 & 0x10) &&
              ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
@@ -1056,7 +1063,6 @@
             ((ImmOperand0 & 0x1) &&
              ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
             ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
-          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
           TrapOpt = true;
         }
         // We will delete the MI if it will never trap.
@@ -1066,6 +1072,13 @@
       }
       }
     }
+    // Change the terminator to an unconditional trap and reset TrapOpt to false
+    // at the end of the basic block.
+    if (EnableTrapOptimization && TrapOpt) {
+      if (ToErase)
+        BuildMI(MBB, ToErase, ToErase->getDebugLoc(), TII->get(PPC::TRAP));
+      TrapOpt = false;
+    }
 
     // If the last instruction was marked for elimination,
     // remove it now.
@@ -1073,9 +1086,6 @@
       ToErase->eraseFromParent();
       ToErase = nullptr;
     }
-    // Reset TrapOpt to false at the end of the basic block.
-    if (EnableTrapOptimization)
-      TrapOpt = false;
   }
 
   // Eliminate all the TOC save instructions which are redundant.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D117355: [PowerPC] Fi... Victor Huang via Phabricator via cfe-commits

Reply via email to