[llvm-branch-commits] [llvm] 1150bfa - [PowerPC] Add missing negate for VPERMXOR on little endian subtargets

2021-01-25 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2021-01-25T12:23:33-06:00
New Revision: 1150bfa6bb099f9a85a140f66fde7b7f7aa54e60

URL: 
https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60
DIFF: 
https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60.diff

LOG: [PowerPC] Add missing negate for VPERMXOR on little endian subtargets

This intrinsic is supposed to have the permute control vector complemented on
little endian systems (as the ABI specifies and GCC implements). With the
current code gen, the result vector is byte-reversed.

Differential revision: https://reviews.llvm.org/D95004

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCInstrAltivec.td
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/crypto_bifs.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td 
b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index c029ecb63e72..1a34aa09315b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
  int_ppc_altivec_crypto_vpmsumw, v4i32>;
 def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
  int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
- int_ppc_altivec_crypto_vpermxor, v16i8>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, 
vrrc:$VC),
+"vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
 
 // Vector doubleword integer pack and unpack.
 let hasSideEffects = 1 in {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td 
b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2e45d731c953..db6e00c71b89 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2408,6 +2408,8 @@ def MrgWords {
 // arbitrarily chosen to be Big, Little.
 //
 // Predicate combinations available:
+// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr.
+// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr.
 // [HasVSX]
 // [HasVSX, IsBigEndian]
 // [HasVSX, IsLittleEndian]
@@ -2436,6 +2438,18 @@ def MrgWords {
 // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64]
 // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
 
+// These Altivec patterns are here because we need a VSX instruction to match
+// the intrinsic (but only for little endian system).
+let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+v16i8:$b, v16i8:$c)),
+(v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC),
+ (COPY_TO_REGCLASS $c, VSRC>;
+let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
+  def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
+v16i8:$b, v16i8:$c)),
+(v16i8 (VPERMXOR $a, $b, $c))>;
+
 let AddedComplexity = 400 in {
 // Valid for any VSX subtarget, regardless of endianness.
 let Predicates = [HasVSX] in {

diff  --git a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll 
b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
index b34482abfcd0..e38fe90ecc57 100644
--- a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
+++ b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll
@@ -1,7 +1,11 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu 
-mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu 
-mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu 
-mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu 
-mcpu=pwr9 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
 ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < 
%s | FileCheck %s
 ; FIXME: The original intent was to add a check-next for the blr after every 
check.
 ; However, this currently fails since we don't eliminate stores of the unused
@@ -103,6 +107,7 @@ entry:
   %2 = load <16 x i8>,  <16 x i8>* %c, align 16
   %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x 
i8> %1, <16 x i8> %2)
   ret <16 x i8> %3
+; CHECK-LE: xxlnor
 ; C

[llvm-branch-commits] [llvm] 61f6915 - [PowerPC] Sign extend comparison operand for signed atomic comparisons

2021-01-18 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2021-01-18T21:19:25-06:00
New Revision: 61f69153e8dd7956d03ce46e30257c5bb3e41873

URL: 
https://github.com/llvm/llvm-project/commit/61f69153e8dd7956d03ce46e30257c5bb3e41873
DIFF: 
https://github.com/llvm/llvm-project/commit/61f69153e8dd7956d03ce46e30257c5bb3e41873.diff

LOG: [PowerPC] Sign extend comparison operand for signed atomic comparisons

As of 8dacca943af8a53a23b1caf3142d10fb4a77b645, we sign extend the atomic loaded
operand for signed subword comparisons. However, the assumption that the other
operand is correctly sign extended doesn't always hold. This patch sign extends
the other operand if it needs to be sign extended.

This is a second fix for https://bugs.llvm.org/show_bug.cgi?id=30451

Differential revision: https://reviews.llvm.org/D94058

Added: 
llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll

Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/atomics-regression.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c0dca4af1cf7..d6dd70fb1a0c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10593,17 +10593,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, 
MachineBasicBlock *BB,
   return BB;
 }
 
+static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
+  switch(MI.getOpcode()) {
+  default:
+return false;
+  case PPC::COPY:
+return TII->isSignExtended(MI);
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHAU:
+  case PPC::LHAU8:
+  case PPC::LHAUX:
+  case PPC::LHAUX8:
+  case PPC::LHAX:
+  case PPC::LHAX8:
+  case PPC::LWA:
+  case PPC::LWAUX:
+  case PPC::LWAX:
+  case PPC::LWAX_32:
+  case PPC::LWA_32:
+  case PPC::PLHA:
+  case PPC::PLHA8:
+  case PPC::PLHA8pc:
+  case PPC::PLHApc:
+  case PPC::PLWA:
+  case PPC::PLWA8:
+  case PPC::PLWA8pc:
+  case PPC::PLWApc:
+  case PPC::EXTSB:
+  case PPC::EXTSB8:
+  case PPC::EXTSB8_32_64:
+  case PPC::EXTSB8_rec:
+  case PPC::EXTSB_rec:
+  case PPC::EXTSH:
+  case PPC::EXTSH8:
+  case PPC::EXTSH8_32_64:
+  case PPC::EXTSH8_rec:
+  case PPC::EXTSH_rec:
+  case PPC::EXTSW:
+  case PPC::EXTSWSLI:
+  case PPC::EXTSWSLI_32_64:
+  case PPC::EXTSWSLI_32_64_rec:
+  case PPC::EXTSWSLI_rec:
+  case PPC::EXTSW_32:
+  case PPC::EXTSW_32_64:
+  case PPC::EXTSW_32_64_rec:
+  case PPC::EXTSW_rec:
+  case PPC::SRAW:
+  case PPC::SRAWI:
+  case PPC::SRAWI_rec:
+  case PPC::SRAW_rec:
+return true;
+  }
+  return false;
+}
+
 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
 MachineInstr &MI, MachineBasicBlock *BB,
 bool is8bit, // operation
 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
+
+  // If this is a signed comparison and the value being compared is not known
+  // to be sign extended, sign extend it here.
+  DebugLoc dl = MI.getDebugLoc();
+  MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  Register incr = MI.getOperand(3).getReg();
+  bool IsSignExtended = Register::isVirtualRegister(incr) &&
+isSignExtended(*RegInfo.getVRegDef(incr), TII);
+
+  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
+Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
+.addReg(MI.getOperand(3).getReg());
+MI.getOperand(3).setReg(ValueReg);
+  }
   // If we support part-word atomic mnemonics, just use them
   if (Subtarget.hasPartwordAtomics())
 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
 CmpPred);
 
-  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   // In 64 bit mode we have to use 64 bits for addresses, even though the
   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
   // registers without caring whether they're 32 or 64, but here we're
@@ -10613,14 +10684,11 @@ MachineBasicBlock 
*PPCTargetLowering::EmitPartwordAtomicBinary(
   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction *F = BB->getParent();
   MachineFunction::iterator It = ++BB->getIterator();
 
   Register dest = MI.getOperand(0).getReg();
   Register ptrA = MI.getOperand(1).getReg();
   Register ptrB = MI.getOperand(2).getReg();
-  Register incr = MI.getOperand(3).getReg();
-  DebugLoc dl = MI.getDebugLoc();
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *loop2MBB =
@@ -10634,7 +10702,6 @@ MachineBasicBlock 
*PPCTargetLowering::EmitPartwordAtomicBinary(
   std::next(Machin

[llvm-branch-commits] [llvm] 4f568fb - [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit

2020-12-28 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-28T20:36:16-06:00
New Revision: 4f568fbd21636c7c8d071f1901084cc0ae87f3ee

URL: 
https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee
DIFF: 
https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee.diff

LOG: [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit

If any PHI nodes in loop exit blocks have incoming values from the
loop that are accesses of TLS variables with local dynamic or general
dynamic TLS model, the address will be computed inside the loop. Since
this includes a call to __tls_get_addr, this will in turn cause the
CTR loops verifier to complain.
Disable CTR loops in such cases.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527

Added: 
llvm/test/CodeGen/PowerPC/pr48527.ll

Modified: 
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 011056c21b13..4de1f2aba416 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands,
   return BaseT::getUserCost(U, Operands, CostKind);
 }
 
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
+   SmallPtrSetImpl &Visited) {
+  // No need to traverse again if we already checked this operand.
+  if (!Visited.insert(MemAddr).second)
+return false;
+  const auto *GV = dyn_cast(MemAddr);
+  if (!GV) {
+// Recurse to check for constants that refer to TLS global variables.
+if (const auto *CV = dyn_cast(MemAddr))
+  for (const auto &CO : CV->operands())
+if (memAddrUsesCTR(CO, TM, Visited))
+  return true;
+return false;
+  }
+
+  if (!GV->isThreadLocal())
+return false;
+  TLSModel::Model Model = TM.getTLSModel(GV);
+  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
  SmallPtrSetImpl &Visited) {
   const PPCTargetMachine &TM = ST->getTargetMachine();
@@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, 
TargetLibraryInfo *LibInfo,
 return false;
   };
 
-  // Determining the address of a TLS variable results in a function call in
-  // certain TLS models.
-  std::function memAddrUsesCTR =
-  [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
-// No need to traverse again if we already checked this operand.
-if (!Visited.insert(MemAddr).second)
-  return false;
-const auto *GV = dyn_cast(MemAddr);
-if (!GV) {
-  // Recurse to check for constants that refer to TLS global variables.
-  if (const auto *CV = dyn_cast(MemAddr))
-for (const auto &CO : CV->operands())
-  if (memAddrUsesCTR(CO))
-return true;
-
-  return false;
-}
-
-if (!GV->isThreadLocal())
-  return false;
-TLSModel::Model Model = TM.getTLSModel(GV);
-return Model == TLSModel::GeneralDynamic ||
-  Model == TLSModel::LocalDynamic;
-  };
-
   auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
 if (IntegerType *ITy = dyn_cast(Ty))
   return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
@@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, 
TargetLibraryInfo *LibInfo,
 }
 
 for (Value *Operand : J->operands())
-  if (memAddrUsesCTR(Operand))
+  if (memAddrUsesCTR(Operand, TM, Visited))
 return true;
   }
 
@@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, 
ScalarEvolution &SE,
 }
   }
 
+  // If an exit block has a PHI that accesses a TLS variable as one of the
+  // incoming values from the loop, we cannot produce a CTR loop because the
+  // address for that value will be computed in the loop.
+  SmallVector ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  for (auto &BB : ExitBlocks) {
+for (auto &PHI : BB->phis()) {
+  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
+   Idx++) {
+const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
+const Value *IncomingValue = PHI.getIncomingValue(Idx);
+if (L->contains(IncomingBB) &&
+memAddrUsesCTR(IncomingValue, TM, Visited))
+  return false;
+  }
+}
+  }
+
   LLVMContext &C = L->getHeader()->getContext();
   HWLoopInfo.CountType = TM.isPPC64() ?
 Type::getInt64Ty(C) : Type::getInt32Ty(C);

diff  --git a/llvm/test/CodeGen/PowerPC/pr48527.ll 
b/llvm/test/CodeGen/PowerPC/pr48527.ll
new file mode 100644
index ..eaff15ce071e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr

[llvm-branch-commits] [llvm] 0a19fc3 - [PowerPC] Disable CTR loops containing operations on half-precision

2020-12-29 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-29T05:12:50-06:00
New Revision: 0a19fc3088f58f9a73fdb39a373cba7885be557f

URL: 
https://github.com/llvm/llvm-project/commit/0a19fc3088f58f9a73fdb39a373cba7885be557f
DIFF: 
https://github.com/llvm/llvm-project/commit/0a19fc3088f58f9a73fdb39a373cba7885be557f.diff

LOG: [PowerPC] Disable CTR loops containing operations on half-precision

On subtargets prior to Power9, conversions to/from half precision
are lowered to libcalls. This makes loops containing such operations
invalid candidates for HW loops.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519

Added: 
llvm/test/CodeGen/PowerPC/pr48519.ll

Modified: 
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 4de1f2aba416..71f867a617c8 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -629,6 +629,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, 
TargetLibraryInfo *LibInfo,
   isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
   isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
 return true;
+  if (!ST->isISA3_0() &&
+  (CI->getSrcTy()->getScalarType()->isHalfTy() ||
+   CI->getDestTy()->getScalarType()->isHalfTy()))
+return true;
 } else if (isLargeIntegerTy(!TM.isPPC64(),
 J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||

diff  --git a/llvm/test/CodeGen/PowerPC/pr48519.ll 
b/llvm/test/CodeGen/PowerPC/pr48519.ll
new file mode 100644
index ..777874e91c26
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define void @julia__typed_vcat_20() #0 {
+; CHECK-LABEL: julia__typed_vcat_20:
+; CHECK:   # %bb.0: # %top
+; CHECK-NEXT:mflr r0
+; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:std r0, 16(r1)
+; CHECK-NEXT:stdu r1, -48(r1)
+; CHECK-NEXT:li r3, 1
+; CHECK-NEXT:li r30, 0
+; CHECK-NEXT:.p2align 4
+; CHECK-NEXT:  .LBB0_1: # %L139
+; CHECK-NEXT:#
+; CHECK-NEXT:addi r3, r3, -1
+; CHECK-NEXT:mtfprd f0, r3
+; CHECK-NEXT:xscvsxdsp f1, f0
+; CHECK-NEXT:bl __gnu_f2h_ieee
+; CHECK-NEXT:nop
+; CHECK-NEXT:bl __gnu_h2f_ieee
+; CHECK-NEXT:nop
+; CHECK-NEXT:addi r30, r30, -1
+; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:cmpldi r30, 0
+; CHECK-NEXT:bne+ cr0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %pass.1
+; CHECK-NEXT:bl __gnu_f2h_ieee
+; CHECK-NEXT:nop
+; CHECK-NEXT:sth r3, 0(r3)
+top:
+  %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8
+  %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, 
i64 0)
+  %1 = extractvalue { i64, i1 } %0, 0
+  br label %L139
+
+L139: ; preds = %L139, %top
+  %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ]
+  %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ]
+  %2 = add nsw i64 %value_phi23, -1
+  %3 = add i64 %2, 0
+  %4 = sitofp i64 %3 to half
+  store half %4, half addrspace(13)* undef, align 2
+  %.not101.not = icmp eq i64 %value_phi21, 0
+  %5 = add i64 %value_phi21, 1
+  br i1 %.not101.not, label %pass.1, label %L139
+
+pass.1:   ; preds = %L139
+  unreachable
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0
+
+attributes #0 = { nounwind }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 7486de1 - [PowerPC] Provide patterns for permuted scalar to vector for pre-P8

2020-12-29 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-29T06:49:25-06:00
New Revision: 7486de1b2eced27b0b95598e9ab45039d700

URL: 
https://github.com/llvm/llvm-project/commit/7486de1b2eced27b0b95598e9ab45039d700
DIFF: 
https://github.com/llvm/llvm-project/commit/7486de1b2eced27b0b95598e9ab45039d700.diff

LOG: [PowerPC] Provide patterns for permuted scalar to vector for pre-P8

We will emit these permuted nodes on all VSX little endian subtargets
but don't have the patterns available to match them on subtargets
that don't have direct moves.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=47916

Added: 
llvm/test/CodeGen/PowerPC/pr47916.ll

Modified: 
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td 
b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 4e086366af24..136a53e66d62 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2412,6 +2412,7 @@ def MrgWords {
 // [HasVSX, IsBigEndian]
 // [HasVSX, IsLittleEndian]
 // [HasVSX, NoP9Vector]
+// [HasVSX, NoP9Vector, IsLittleEndian]
 // [HasVSX, HasOnlySwappingMemOps]
 // [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
 // [HasVSX, HasP8Vector]
@@ -3005,6 +3006,19 @@ defm : ScalToVecWPermute<
 VSFRC)), sub_64)>;
 } // HasVSX, NoP9Vector
 
+// Any little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in {
+// Load-and-splat using only X-Form VSX loads.
+defm : ScalToVecWPermute<
+  v2i64, (i64 (load xoaddr:$src)),
+  (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+  (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v2f64, (f64 (load xoaddr:$src)),
+  (XXPERMDIs (XFLOADf64 xoaddr:$src), 2),
+  (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
+} // HasVSX, NoP9Vector, IsLittleEndian
+
 // Any VSX subtarget that only has loads and stores that load in big endian
 // order regardless of endianness. This is really pre-Power9 subtargets.
 let Predicates = [HasVSX, HasOnlySwappingMemOps] in {

diff  --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp 
b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index b7ed8ce9f144..ff251f55afff 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -254,10 +254,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
 if (!MO.isReg())
   continue;
 Register Reg = MO.getReg();
-if (isAnyVecReg(Reg, Partial)) {
+// All operands need to be checked because there are instructions that
+// operate on a partial register and produce a full register (such as
+// XXPERMDIs).
+if (isAnyVecReg(Reg, Partial))
   RelevantInstr = true;
-  break;
-}
   }
 
   if (!RelevantInstr)

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll 
b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index cdd04b33318e..35b590dec1b1 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -8,6 +8,9 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN: -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names \
 ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-NOVSX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P7
 
 define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) 
local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: testmrghb:
@@ -24,6 +27,11 @@ define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x 
i8> %b) local_unnamed_
 ; CHECK-NOVSX:   # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:vmrghb v2, v3, v2
 ; CHECK-NOVSX-NEXT:blr
+;
+; CHECK-P7-LABEL: testmrghb:
+; CHECK-P7:   # %bb.0: # %entry
+; CHECK-P7-NEXT:vmrghb v2, v3, v2
+; CHECK-P7-NEXT:blr
 entry:
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> 
   ret <16 x i8> %shuffle
@@ -46,6 +54,11 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x 
i8> %b) local_unnamed
 ; CHECK-NOVSX-NEXT:lvx v4, 0, r3
 ; CHECK-NOVSX-NEXT:vperm v2, v3, v2, v4
 ; CHECK-NOVSX-NEXT:blr
+;
+; CHECK-P7-LABEL: testmrghb2:
+; CHECK-P7:   # %bb.0: # %entry
+; CHECK-P7-NEXT:vmrghb v2, v2, v3
+; CHECK-P7-NEXT:blr
 entry:
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> 
   ret <16 x i8> %shuffle
@@ -65,6 +78,11 @@ define dso_local <16 x i8> @testmrghh(<16 x i8> %a, <16 x 
i8> %b) local_unnamed_
 ; CHECK-NOVSX:   # %bb.0: # %entry
 ; CH

[llvm-branch-commits] [clang] 3f7b4ce - [PowerPC] Add support for embedded devices with EFPU2

2021-01-12 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2021-01-12T09:47:00-06:00
New Revision: 3f7b4ce96065eea66bf4344973173e76ec1a4255

URL: 
https://github.com/llvm/llvm-project/commit/3f7b4ce96065eea66bf4344973173e76ec1a4255
DIFF: 
https://github.com/llvm/llvm-project/commit/3f7b4ce96065eea66bf4344973173e76ec1a4255.diff

LOG: [PowerPC] Add support for embedded devices with EFPU2

PowerPC cores like e200z759n3 [1] using an efpu2 only support single precision
hardware floating point instructions. The single precision instructions efs*
and evfs* are identical to the spe float instructions while efd* and evfd*
instructions trigger a not implemented exception.

This patch introduces a new command line option -mefpu2 which leads to
single-hardware / double-software code generation.

[1] Core reference:
  https://www.nxp.com/files-static/32bit/doc/ref_manual/e200z759CRM.pdf

Differential revision: https://reviews.llvm.org/D92935

Added: 


Modified: 
clang/docs/ClangCommandLineReference.rst
clang/include/clang/Driver/Options.td
clang/lib/Basic/Targets/PPC.cpp
clang/test/Driver/ppc-features.cpp
llvm/lib/Target/PowerPC/PPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCSubtarget.cpp
llvm/lib/Target/PowerPC/PPCSubtarget.h
llvm/test/CodeGen/PowerPC/spe.ll

Removed: 




diff  --git a/clang/docs/ClangCommandLineReference.rst 
b/clang/docs/ClangCommandLineReference.rst
index b46008970f57..ac97f6fed935 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -3145,6 +3145,8 @@ PowerPC
 
 .. option:: -mdirect-move, -mno-direct-move
 
+.. option:: -mefpu2
+
 .. option:: -mfloat128, -mno-float128
 
 .. option:: -mfprnd, -mno-fprnd

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 35643701f97e..d9586e086a9c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3040,6 +3040,7 @@ def mpcrel: Flag<["-"], "mpcrel">, 
Group;
 def mno_pcrel: Flag<["-"], "mno-pcrel">, Group;
 def mspe : Flag<["-"], "mspe">, Group;
 def mno_spe : Flag<["-"], "mno-spe">, Group;
+def mefpu2 : Flag<["-"], "mefpu2">, Group;
 def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group, 
Flags<[CC1Option]>,
   HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile 
and nonvolatile vector registers">;
 def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group, 
Flags<[CC1Option]>,

diff  --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 2be7555102f8..cfede6e6e756 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -56,7 +56,7 @@ bool 
PPCTargetInfo::handleTargetFeatures(std::vector &Features,
   HasP10Vector = true;
 } else if (Feature == "+pcrelative-memops") {
   HasPCRelativeMemops = true;
-} else if (Feature == "+spe") {
+} else if (Feature == "+spe" || Feature == "+efpu2") {
   HasSPE = true;
   LongDoubleWidth = LongDoubleAlign = 64;
   LongDoubleFormat = &llvm::APFloat::IEEEdouble();
@@ -402,6 +402,8 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
 void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features,
   StringRef Name, bool Enabled) const {
   if (Enabled) {
+if (Name == "efpu2")
+  Features["spe"] = true;
 // If we're enabling any of the vsx based features then enable vsx and
 // altivec. We'll diagnose any problems later.
 bool FeatureHasVSX = llvm::StringSwitch(Name)
@@ -425,6 +427,8 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap 
&Features,
 else
   Features[Name] = true;
   } else {
+if (Name == "spe")
+  Features["efpu2"] = false;
 // If we're disabling altivec or vsx go ahead and disable all of the vsx
 // features.
 if ((Name == "altivec") || (Name == "vsx"))

diff  --git a/clang/test/Driver/ppc-features.cpp 
b/clang/test/Driver/ppc-features.cpp
index 85060951aa16..def96c351b34 100644
--- a/clang/test/Driver/ppc-features.cpp
+++ b/clang/test/Driver/ppc-features.cpp
@@ -155,6 +155,9 @@
 // CHECK-SPE: "-target-feature" "+spe"
 // CHECK-NOSPE: "-target-feature" "-spe"
 
+// RUN: %clang -target powerpc %s -mefpu2 -c -### 2>&1 | FileCheck 
-check-prefix=CHECK-EFPU2 %s
+// CHECK-EFPU2: "-target-feature" "+efpu2"
+
 // Assembler features
 // RUN: %clang -target powerpc-unknown-linux-gnu %s -### -o %t.o 
-no-integrated-as 2>&1 | FileCheck -check-prefix=CHECK_32_BE_AS_ARGS %s
 // CHECK_32_BE_AS_ARGS: "-mppc"

diff  --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 2975ae161aaa..06403f5e55a2 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -72,6 +72,9 @@ def FeatureAltivec   : 
SubtargetFeature<"altivec","HasAltivec", "true",
 def FeatureSPE   : SubtargetFeature<"spe","HasSPE"

[llvm-branch-commits] [llvm] bfdc19e - [PowerPC] Restore stack ptr from frame ptr with setjmp

2020-12-14 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-14T11:34:16-06:00
New Revision: bfdc19e77868b849b5c636bf0512970264aef571

URL: 
https://github.com/llvm/llvm-project/commit/bfdc19e77868b849b5c636bf0512970264aef571
DIFF: 
https://github.com/llvm/llvm-project/commit/bfdc19e77868b849b5c636bf0512970264aef571.diff

LOG: [PowerPC] Restore stack ptr from frame ptr with setjmp

If a function happens to:

- call setjmp
- do a 16-byte stack allocation
- call a function that sets up a stack frame and longjmp's back

The stack pointer that is restores by setjmp will no longer point to a valid
back chain. According to the ABI, stack accesses in such a function are to be
frame pointer based - so it is an error (quite obviously) to restore the stack
from the back chain.
We already restore the stack from the frame pointer when there are calls to
fast_cc functions. We just need to also do that when there are calls to setjmp.
This patch simply does that.

This was pointed out by the Julia team.

Differential revision: https://reviews.llvm.org/D92906

Added: 
llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll

Modified: 
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 7df2f6dc9252..b93322c15534 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -375,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) 
const {
 return false;
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
-MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
-(MF.getTarget().Options.GuaranteedTailCallOpt &&
- MF.getInfo()->hasFastCall());
+ MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() 
||
+ MF.exposesReturnsTwice() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+  MF.getInfo()->hasFastCall());
 }
 
 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
@@ -584,8 +585,8 @@ bool 
PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
   // Frame pointers and base pointers complicate matters so don't do anything
   // if we have them. For example having a frame pointer will sometimes require
   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
-  // 
diff icult.
-  if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+  // 
diff icult. Similar situation exists with setjmp.
+  if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
 return false;
 
   // Calls to fast_cc functions use 
diff erent rules for passing parameters on
@@ -1646,8 +1647,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 // If this function contained a fastcc call and GuaranteedTailCallOpt is
 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
 // call which invalidates the stack pointer value in SP(0). So we use the
-// value of R31 in this case.
-if (FI->hasFastCall()) {
+// value of R31 in this case. Similar situation exists with setjmp.
+if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
   assert(HasFP && "Expecting a valid frame pointer.");
   if (!HasRedZone)
 RBReg = FPReg;

diff  --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll 
b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
new file mode 100644
index ..9928a111734b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-- -verify-machineinstrs | FileCheck %s \
+; RUN:   --check-prefix=BE
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@.str = private unnamed_addr constant [33 x i8] c"Successfully returned from 
main\0A\00", align 1
+
+; Function Attrs: nounwind
+define dso_local signext i32 @main(i32 signext %argc, i8** nocapture readnone 
%argv) local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfocrf 12, 32
+; CHECK-NEXT:mflr 0
+; CHECK-NEXT:std 31, -8(1)
+; CHECK-NEXT:std 0, 16(1)
+; CHECK-NEXT:stw 12, 8(1)
+; CHECK-NEXT:stdu 1, -784(1)
+; CHECK-NEXT:# kill: def $r3 killed $r3 killed $x3
+; CHECK-NEXT:cmpwi 2, 3, 2
+; CHECK-NEXT:mr 31, 1
+; CHECK-NEXT:li 3, 0
+; CHECK-NEXT:blt 2, .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:addi 3, 31, 112
+; CHECK-NEXT:bl _setjmp
+; CHECK-NEXT:nop
+; CHECK-NEXT:crmove 20, 10
+; CHECK-NEXT:# kill: def $r3 killed $r3 killed $x3
+; CHECK-NEXT:cmpwi 3, 0
+; CHECK-NEXT:crorc 20, 10, 2
+

[llvm-branch-commits] [compiler-rt] eed0b9a - [PowerPC] Temporarily disable asan longjmp tests

2020-12-14 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-14T18:22:08-06:00
New Revision: eed0b9acdfe4409fb90b356d58c996f12cfd733f

URL: 
https://github.com/llvm/llvm-project/commit/eed0b9acdfe4409fb90b356d58c996f12cfd733f
DIFF: 
https://github.com/llvm/llvm-project/commit/eed0b9acdfe4409fb90b356d58c996f12cfd733f.diff

LOG: [PowerPC] Temporarily disable asan longjmp tests

Commit bfdc19e77868b849b5c636bf0512970264aef571 seems to have broken
some PPC bots with a couple of asan test cases. Disable those test
cases for now until I can resolve the issue.

Added: 


Modified: 
compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
compiler-rt/test/asan/TestCases/longjmp.cpp

Removed: 




diff  --git 
a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp 
b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
index 4774993cdf328..9da47facac276 100644
--- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
+++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
@@ -1,3 +1,4 @@
+// UNSUPPORTED: powerpc64
 // Tests that __asan_handle_no_return properly unpoisons the signal alternate
 // stack.
 

diff  --git a/compiler-rt/test/asan/TestCases/longjmp.cpp 
b/compiler-rt/test/asan/TestCases/longjmp.cpp
index 8e9f2ae195c71..bc4165ffd8139 100644
--- a/compiler-rt/test/asan/TestCases/longjmp.cpp
+++ b/compiler-rt/test/asan/TestCases/longjmp.cpp
@@ -1,3 +1,4 @@
+// UNSUPPORTED: powerpc64
 // RUN: %clangxx_asan -O %s -o %t && %run %t
 
 #include 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] ba1202a - [PowerPC] Restore stack ptr from base ptr when available

2020-12-22 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-22T05:44:03-06:00
New Revision: ba1202a1e4f75d8f234d01730ac65a913e9baa01

URL: 
https://github.com/llvm/llvm-project/commit/ba1202a1e4f75d8f234d01730ac65a913e9baa01
DIFF: 
https://github.com/llvm/llvm-project/commit/ba1202a1e4f75d8f234d01730ac65a913e9baa01.diff

LOG: [PowerPC] Restore stack ptr from base ptr when available

On subtargets that have a red zone, we will copy the stack pointer to the base
pointer in the prologue prior to updating the stack pointer. There are no other
updates to the base pointer after that. This suggests that we should be able to
restore the stack pointer from the base pointer rather than loading it from the
back chain or adding the frame size back to either the stack pointer or the
frame pointer.
This came about because functions that call setjmp need to restore the SP from
the FP because the back chain might have been clobbered
(see https://reviews.llvm.org/D92906). However, if the stack is realigned, the
restored SP might be incorrect (which is what caused the failures in the two
ASan test cases).

This patch was tested quite extensivelly both with sanitizer runtimes and
general code.

Differential revision: https://reviews.llvm.org/D93327

Added: 


Modified: 
compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
compiler-rt/test/asan/TestCases/longjmp.cpp
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
llvm/test/CodeGen/PowerPC/pr46759.ll
llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
llvm/test/CodeGen/PowerPC/stack-realign.ll

Removed: 




diff  --git 
a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp 
b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
index 9da47facac27..4774993cdf32 100644
--- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
+++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp
@@ -1,4 +1,3 @@
-// UNSUPPORTED: powerpc64
 // Tests that __asan_handle_no_return properly unpoisons the signal alternate
 // stack.
 

diff  --git a/compiler-rt/test/asan/TestCases/longjmp.cpp 
b/compiler-rt/test/asan/TestCases/longjmp.cpp
index bc4165ffd813..8e9f2ae195c7 100644
--- a/compiler-rt/test/asan/TestCases/longjmp.cpp
+++ b/compiler-rt/test/asan/TestCases/longjmp.cpp
@@ -1,4 +1,3 @@
-// UNSUPPORTED: powerpc64
 // RUN: %clangxx_asan -O %s -o %t && %run %t
 
 #include 

diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index b93322c15534..50ce11b8374f 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1644,11 +1644,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
 // zone add this offset back now.
 
+// If the function has a base pointer, the stack pointer has been copied
+// to it so we can restore it by copying in the other direction.
+if (HasRedZone && HasBP) {
+  BuildMI(MBB, MBBI, dl, OrInst, RBReg).
+addReg(BPReg).
+addReg(BPReg);
+}
 // If this function contained a fastcc call and GuaranteedTailCallOpt is
 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
 // call which invalidates the stack pointer value in SP(0). So we use the
 // value of R31 in this case. Similar situation exists with setjmp.
-if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
+else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
   assert(HasFP && "Expecting a valid frame pointer.");
   if (!HasRedZone)
 RBReg = FPReg;

diff  --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll 
b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
index 2b1cc0c45db4..c6e1107d4738 100644
--- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
@@ -27,7 +27,7 @@ declare void @callee(i32*)
 ; 32BIT: stwux 1, 1, 0
 ; 32BIT: addi 3, 1, 64
 ; 32BIT: bl .callee
-; 32BIT: lwz 1, 0(1)
+; 32BIT: mr 1, 30
 ; 32BIT: lwz 30, -16(1)
 
 ; 64BIT-LABEL: .caller:
@@ -38,5 +38,5 @@ declare void @callee(i32*)
 ; 64BIT: stdux 1, 1, 0
 ; 64BIT: addi 3, 1, 128
 ; 64BIT: bl .callee
-; 64BIT: ld 1, 0(1)
+; 64BIT: mr 1, 30
 ; 64BIT: ld 30, -24(1)

diff  --git a/llvm/test/CodeGen/PowerPC/pr46759.ll 
b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 716e050cdbee..33b44b720b6e 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -61,7 +61,7 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:  .LBB0_6: # %entry
 ; CHECK-LE-NEXT:addi r3, r1, 2048
 ; CHECK-LE-NEXT:lbz r3, 0(r3)
-; CHECK-LE-NEXT:ld r1, 0(r1)
+; CHECK-LE-NEXT:mr r1, r30
 ; CHECK-

[llvm-branch-commits] [llvm] e73f885 - [PowerPC] Remove redundant COPY_TO_REGCLASS introduced by 8a58f21f5b6c

2020-12-28 Thread Nemanja Ivanovic via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2020-12-28T09:26:51-06:00
New Revision: e73f885c988d7b94fcad64ddfa6a825e15e77a8f

URL: 
https://github.com/llvm/llvm-project/commit/e73f885c988d7b94fcad64ddfa6a825e15e77a8f
DIFF: 
https://github.com/llvm/llvm-project/commit/e73f885c988d7b94fcad64ddfa6a825e15e77a8f.diff

LOG: [PowerPC] Remove redundant COPY_TO_REGCLASS introduced by 8a58f21f5b6c

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td 
b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index e7fa2affb730..2f29811b20d8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2561,13 +2561,13 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, 
IsLittleEndian] in {
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
 (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
   def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
-(STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>;
+(STXVRWX $src, xoaddr:$dst)>;
   def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
-(STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>;
+(STXVRWX $src, xoaddr:$dst)>;
   def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
-(STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>;
+(STXVRDX $src, xoaddr:$dst)>;
   def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
-(STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>;
+(STXVRDX $src, xoaddr:$dst)>;
  }
 
 class xxevalPattern  imm> :



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PowerPC: Fix using long double libm functions for f128 intrinsics (PR #144382)

2025-06-17 Thread Nemanja Ivanovic via llvm-branch-commits

https://github.com/nemanjai approved this pull request.

LGTM.
I believe that the finite functions are provided by GLIBC on PPC for F128, but 
perhaps someone from IBM can confirm (@lei137 @w2yehia @RolandF77).

https://github.com/llvm/llvm-project/pull/144382
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits