[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

Albion Fung via Phabricator via cfe-commits Wed, 03 Feb 2021 12:11:33 -0800

Conanap updated this revision to Diff 321184.
Conanap added a comment.

Updated to ensure the shortcircuit protects against the destructive function.



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95458/new/

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:       plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:       xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:       xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:       blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:       plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:       blr
+; CHECK-O:       xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
     ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input8@got@pcrel(0), 1
@@ -42,9 +45,6 @@
 ; CHECK-NEXT:    pld r4, output8@got@pcrel(0), 1
 ; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-NEXT:    lbz r3, 0(r3)
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 ; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input16@got@pcrel(0), 1
@@ -61,9 +64,6 @@
 ; CHECK-NEXT:    pld r4, output16@got@pcrel(0), 1
 ; CHECK-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-NEXT:    lhz r3, 0(r3)
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 ; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
@@ -144,7 +144,8 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:    plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:    lfd f0, 0(r3)
 ; CHECK-NEXT:    pld r3, outputf64@got@pcrel(0), 1
@@ -286,8 +287,7 @@
 
 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
 ; CHECK-LABEL: FuncPtrCall:
-; CHECK:         .localentry FuncPtrCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel10:
 ; CHECK-NEXT:    .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
@@ -317,8 +317,7 @@
 
 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
 ; CHECK-LABEL: VecMultiUse:
-; CHECK:         .localentry VecMultiUse, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
@@ -355,8 +354,7 @@
 
 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: UseAddr:
-; CHECK:         .localentry UseAddr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r0, 16(r1)
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -173,7 +173,9 @@
 ; CHECK-LARGE:     add r2, r2, r12
 ; CHECK-S-NOT:       .localentry
 ; CHECK-ALL:       # %bb.0: # %entry
-; CHECK-S-NEXT:    plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:    xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:    xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:    blr
 entry:
   ret double 0x404124A4EBDD334C
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -122,19 +122,23 @@
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
-; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-BE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
-; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-LE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
@@ -145,7 +149,9 @@
 ;
 ; CHECK-BE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
   ret double 3.423300e+02
@@ -186,19 +192,23 @@
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ; CHECK-LE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
-; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
-; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
@@ -209,7 +219,9 @@
 ;
 ; CHECK-BE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
   ret double 0x380B38FB80000000
Index: llvm/test/CodeGen/PowerPC/constant-pool.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -39,8 +39,10 @@
  define ppc_fp128 @LongDoubleConstantPool() {
 ; CHECK-LABEL: LongDoubleConstantPool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI2_0@PCREL(0), 1
-; CHECK-NEXT:    plfd f2, .LCPI2_1@PCREL(0), 1
+; CHECK-NEXT:    plfd f2, .LCPI2_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 56623104
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -609716532
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: LongDoubleConstantPool:
@@ -185,9 +187,11 @@
 define double @two_constants(double %a) {
 ; CHECK-LABEL: two_constants:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI11_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI11_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@@ -212,11 +216,15 @@
 ; CHECK-NEXT:    cmplwi r3, 0
 ; CHECK-NEXT:    beq cr0, .LBB12_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    plfd f1, .LCPI12_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -343597384
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB12_2: # %if.end
-; CHECK-NEXT:    plfd f0, .LCPI12_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1076085391
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 1546188227
 ; CHECK-NEXT:    xsadddp f1, f1, f0
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: two_constants_two_bb:
@@ -248,11 +256,14 @@
 define double @three_constants_f64(double %a, double %c) {
 ; CHECK-LABEL: three_constants_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI13_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI13_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f0, f1
-; CHECK-NEXT:    plfd f1, .LCPI13_2@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073948393
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 2027224564
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@@ -340,23 +351,29 @@
 
 define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-LABEL: three_constants_ppcf128:
-; CHECK:         .localentry three_constants_ppcf128, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    plfd f3, .LCPI16_0@PCREL(0), 1
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    stxv vs3, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_1@PCREL(0), 1
+; CHECK-NEXT:    lxv vs3, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_2@PCREL(0), 1
+; CHECK-NEXT:    lxv vs3, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 1, 8724152
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2606,6 +2606,21 @@
            (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
                              VSFRC)>;
 
+// To replace constant pool with XXSPLI32DX for scalars.
+def : Pat<(f32 nzFPImmAsi64
+           : $A),
+          (COPY_TO_REGCLASS(XXSPLTI32DX(XXSPLTI32DX(IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
+def : Pat<(f64 nzFPImmAsi64
+           : $A),
+          (COPY_TO_REGCLASS(XXSPLTI32DX(XXSPLTI32DX(IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
   // Anonymous patterns for XXEVAL
   // AND
   // and(A, B, C)
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -398,6 +398,32 @@
                                    SDLoc(N), MVT::i32);
 }]>;
 
+// Check if the value can be converted to be single precision immediate, which
+// can be exploited by XXSPLTIDP. Ensure that it cannot be converted to single
+// precision before exploiting with XXSPLTI32DX.
+def nzFPImmAsi64 : PatLeaf<(fpimm), [{
+  APFloat APFloatOfN = N->getValueAPF();
+  return !N->isExactlyValue(+0.0) && checkNonDenormCannotConvertToSingle(APFloatOfN);
+}]>;
+
+// Get the Hi bits of a 64 bit immediate.
+def getFPAs64BitIntHi : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  checkNonDenormCannotConvertToSingle(APFloatOfN);
+  uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() &
+                            0xFFFFFFFF00000000LL) >> 32);
+  return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32);
+}]>;
+
+// Get the Lo bits of a 64 bit immediate.
+def getFPAs64BitIntLo : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  checkNonDenormCannotConvertToSingle(APFloatOfN);
+  uint32_t Lo = (uint32_t)(APFloatOfN.bitcastToAPInt().getZExtValue() &
+                           0xFFFFFFFF);
+  return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32);
+}]>;
+
 def imm34 : PatLeaf<(imm), [{
   return isInt<34>(N->getSExtValue());
 }]>;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1318,6 +1318,8 @@
 
   bool convertToNonDenormSingle(APInt &ArgAPInt);
   bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+  bool checkNonDenormCannotConvertToSingle(APInt &ArgAPInt);
+  bool checkNonDenormCannotConvertToSingle(APFloat &ArgAPFloat);
 
 } // end namespace llvm
 
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8583,6 +8583,25 @@
   return Success;
 }
 
+// Check that the APFloat cannot be exploited with XXSPLTIDP
+// which can only exploit values that can fit in 32 bits. This
+// allows us to exploit with XXSPLTI32DX instead.
+bool llvm::checkNonDenormCannotConvertToSingle(APFloat &ArgAPFloat) {
+  // Only convert if it loses info, since XXSPLTIDP should
+  // handle the other case
+  return !ArgAPFloat.isDenormal() &&
+         !convertToNonDenormSingle(ArgAPFloat);
+}
+
+// Check that the APInt cannot be exploited with XXSPLTIDP
+// which can only exploit values that can fit in 32 bits. This
+// allows us to exploit with XXSPLTI32DX instead.
+bool llvm::checkNonDenormCannotConvertToSingle(APInt &ArgAPInt) {
+  double DpValue = ArgAPInt.bitsToDouble();
+  APFloat APFloatDp(DpValue);
+  return checkNonDenormCannotConvertToSingle(APFloatDp);
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -15833,7 +15852,8 @@
       // With prefixed instructions, we can materialize anything that can be
       // represented with a 32-bit immediate, not just positive zero.
       APFloat APFloatOfImm = Imm;
-      return convertToNonDenormSingle(APFloatOfImm);
+      return convertToNonDenormSingle(APFloatOfImm) ||
+             checkNonDenormCannotConvertToSingle(APFloatOfImm);
     }
     LLVM_FALLTHROUGH;
   case MVT::ppcf128:

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

Reply via email to