[llvm-branch-commits] [llvm] 1150bfa - [PowerPC] Add missing negate for VPERMXOR on little endian subtargets
Author: Nemanja Ivanovic Date: 2021-01-25T12:23:33-06:00 New Revision: 1150bfa6bb099f9a85a140f66fde7b7f7aa54e60 URL: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60 DIFF: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60.diff LOG: [PowerPC] Add missing negate for VPERMXOR on little endian subtargets This intrinsic is supposed to have the permute control vector complemented on little endian systems (as the ABI specifies and GCC implements). With the current code gen, the result vector is byte-reversed. Differential revision: https://reviews.llvm.org/D95004 Added: Modified: llvm/lib/Target/PowerPC/PPCInstrAltivec.td llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/crypto_bifs.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index c029ecb63e72..1a34aa09315b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", int_ppc_altivec_crypto_vpmsumw, v4i32>; def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", int_ppc_altivec_crypto_vpmsumd, v2i64>; -def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", - int_ppc_altivec_crypto_vpermxor, v16i8>; +def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC), +"vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>; // Vector doubleword integer pack and unpack. let hasSideEffects = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 2e45d731c953..db6e00c71b89 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2408,6 +2408,8 @@ def MrgWords { // arbitrarily chosen to be Big, Little. // // Predicate combinations available: +// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr. +// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr. // [HasVSX] // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] @@ -2436,6 +2438,18 @@ def MrgWords { // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] +// These Altivec patterns are here because we need a VSX instruction to match +// the intrinsic (but only for little endian system). +let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, +v16i8:$b, v16i8:$c)), +(v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC), + (COPY_TO_REGCLASS $c, VSRC>; +let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, +v16i8:$b, v16i8:$c)), +(v16i8 (VPERMXOR $a, $b, $c))>; + let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. let Predicates = [HasVSX] in { diff --git a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll index b34482abfcd0..e38fe90ecc57 100644 --- a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll +++ b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll @@ -1,7 +1,11 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; FIXME: The original intent was to add a check-next for the blr after every check. ; However, this currently fails since we don't eliminate stores of the unused @@ -103,6 +107,7 @@ entry: %2 = load <16 x i8>, <16 x i8>* %c, align 16 %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) ret <16 x i8> %3 +; CHECK-LE: xxlnor ; C
[llvm-branch-commits] [llvm] 61f6915 - [PowerPC] Sign extend comparison operand for signed atomic comparisons
Author: Nemanja Ivanovic Date: 2021-01-18T21:19:25-06:00 New Revision: 61f69153e8dd7956d03ce46e30257c5bb3e41873 URL: https://github.com/llvm/llvm-project/commit/61f69153e8dd7956d03ce46e30257c5bb3e41873 DIFF: https://github.com/llvm/llvm-project/commit/61f69153e8dd7956d03ce46e30257c5bb3e41873.diff LOG: [PowerPC] Sign extend comparison operand for signed atomic comparisons As of 8dacca943af8a53a23b1caf3142d10fb4a77b645, we sign extend the atomic loaded operand for signed subword comparisons. However, the assumption that the other operand is correctly sign extended doesn't always hold. This patch sign extends the other operand if it needs to be sign extended. This is a second fix for https://bugs.llvm.org/show_bug.cgi?id=30451 Differential revision: https://reviews.llvm.org/D94058 Added: llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/atomics-regression.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index c0dca4af1cf7..d6dd70fb1a0c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10593,17 +10593,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { + switch(MI.getOpcode()) { + default: +return false; + case PPC::COPY: +return TII->isSignExtended(MI); + case PPC::LHA: + case PPC::LHA8: + case PPC::LHAU: + case PPC::LHAU8: + case PPC::LHAUX: + case PPC::LHAUX8: + case PPC::LHAX: + case PPC::LHAX8: + case PPC::LWA: + case PPC::LWAUX: + case PPC::LWAX: + case PPC::LWAX_32: + case PPC::LWA_32: + case PPC::PLHA: + case PPC::PLHA8: + case PPC::PLHA8pc: + case PPC::PLHApc: + case PPC::PLWA: + case PPC::PLWA8: + case PPC::PLWA8pc: + case PPC::PLWApc: + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + case PPC::EXTSB8_rec: + case PPC::EXTSB_rec: + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: + case PPC::EXTSH8_rec: + case PPC::EXTSH_rec: + case PPC::EXTSW: + case PPC::EXTSWSLI: + case PPC::EXTSWSLI_32_64: + case PPC::EXTSWSLI_32_64_rec: + case PPC::EXTSWSLI_rec: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: + case PPC::EXTSW_32_64_rec: + case PPC::EXTSW_rec: + case PPC::SRAW: + case PPC::SRAWI: + case PPC::SRAWI_rec: + case PPC::SRAW_rec: +return true; + } + return false; +} + MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const PPCInstrInfo *TII = Subtarget.getInstrInfo(); + + // If this is a signed comparison and the value being compared is not known + // to be sign extended, sign extend it here. + DebugLoc dl = MI.getDebugLoc(); + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + Register incr = MI.getOperand(3).getReg(); + bool IsSignExtended = Register::isVirtualRegister(incr) && +isSignExtended(*RegInfo.getVRegDef(incr), TII); + + if (CmpOpcode == PPC::CMPW && !IsSignExtended) { +Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); +BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) +.addReg(MI.getOperand(3).getReg()); +MI.getOperand(3).setReg(ValueReg); + } // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, CmpPred); - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're @@ -10613,14 +10684,11 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); - Register incr = MI.getOperand(3).getReg(); - DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = @@ -10634,7 +10702,6 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( std::next(Machin
[llvm-branch-commits] [llvm] 4f568fb - [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit
Author: Nemanja Ivanovic Date: 2020-12-28T20:36:16-06:00 New Revision: 4f568fbd21636c7c8d071f1901084cc0ae87f3ee URL: https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee DIFF: https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee.diff LOG: [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit If any PHI nodes in loop exit blocks have incoming values from the loop that are accesses of TLS variables with local dynamic or general dynamic TLS model, the address will be computed inside the loop. Since this includes a call to __tls_get_addr, this will in turn cause the CTR loops verifier to complain. Disable CTR loops in such cases. Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527 Added: llvm/test/CodeGen/PowerPC/pr48527.ll Modified: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 011056c21b13..4de1f2aba416 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, return BaseT::getUserCost(U, Operands, CostKind); } +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, + SmallPtrSetImpl &Visited) { + // No need to traverse again if we already checked this operand. + if (!Visited.insert(MemAddr).second) +return false; + const auto *GV = dyn_cast(MemAddr); + if (!GV) { +// Recurse to check for constants that refer to TLS global variables. +if (const auto *CV = dyn_cast(MemAddr)) + for (const auto &CO : CV->operands()) +if (memAddrUsesCTR(CO, TM, Visited)) + return true; +return false; + } + + if (!GV->isThreadLocal()) +return false; + TLSModel::Model Model = TM.getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} + bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, SmallPtrSetImpl &Visited) { const PPCTargetMachine &TM = ST->getTargetMachine(); @@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, return false; }; - // Determining the address of a TLS variable results in a function call in - // certain TLS models. - std::function memAddrUsesCTR = - [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool { -// No need to traverse again if we already checked this operand. -if (!Visited.insert(MemAddr).second) - return false; -const auto *GV = dyn_cast(MemAddr); -if (!GV) { - // Recurse to check for constants that refer to TLS global variables. - if (const auto *CV = dyn_cast(MemAddr)) -for (const auto &CO : CV->operands()) - if (memAddrUsesCTR(CO)) -return true; - - return false; -} - -if (!GV->isThreadLocal()) - return false; -TLSModel::Model Model = TM.getTLSModel(GV); -return Model == TLSModel::GeneralDynamic || - Model == TLSModel::LocalDynamic; - }; - auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { if (IntegerType *ITy = dyn_cast(Ty)) return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); @@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(Operand)) + if (memAddrUsesCTR(Operand, TM, Visited)) return true; } @@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, } } + // If an exit block has a PHI that accesses a TLS variable as one of the + // incoming values from the loop, we cannot produce a CTR loop because the + // address for that value will be computed in the loop. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (auto &BB : ExitBlocks) { +for (auto &PHI : BB->phis()) { + for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; + Idx++) { +const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx); +const Value *IncomingValue = PHI.getIncomingValue(Idx); +if (L->contains(IncomingBB) && +memAddrUsesCTR(IncomingValue, TM, Visited)) + return false; + } +} + } + LLVMContext &C = L->getHeader()->getContext(); HWLoopInfo.CountType = TM.isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); diff --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll new file mode 100644 index ..eaff15ce071e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr
[llvm-branch-commits] [llvm] 0a19fc3 - [PowerPC] Disable CTR loops containing operations on half-precision
Author: Nemanja Ivanovic Date: 2020-12-29T05:12:50-06:00 New Revision: 0a19fc3088f58f9a73fdb39a373cba7885be557f URL: https://github.com/llvm/llvm-project/commit/0a19fc3088f58f9a73fdb39a373cba7885be557f DIFF: https://github.com/llvm/llvm-project/commit/0a19fc3088f58f9a73fdb39a373cba7885be557f.diff LOG: [PowerPC] Disable CTR loops containing operations on half-precision On subtargets prior to Power9, conversions to/from half precision are lowered to libcalls. This makes loops containing such operations invalid candidates for HW loops. Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519 Added: llvm/test/CodeGen/PowerPC/pr48519.ll Modified: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 4de1f2aba416..71f867a617c8 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -629,6 +629,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) return true; + if (!ST->isISA3_0() && + (CI->getSrcTy()->getScalarType()->isHalfTy() || + CI->getDestTy()->getScalarType()->isHalfTy())) +return true; } else if (isLargeIntegerTy(!TM.isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll new file mode 100644 index ..777874e91c26 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +define void @julia__typed_vcat_20() #0 { +; CHECK-LABEL: julia__typed_vcat_20: +; CHECK: # %bb.0: # %top +; CHECK-NEXT:mflr r0 +; CHECK-NEXT:std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT:std r0, 16(r1) +; CHECK-NEXT:stdu r1, -48(r1) +; CHECK-NEXT:li r3, 1 +; CHECK-NEXT:li r30, 0 +; CHECK-NEXT:.p2align 4 +; CHECK-NEXT: .LBB0_1: # %L139 +; CHECK-NEXT:# +; CHECK-NEXT:addi r3, r3, -1 +; CHECK-NEXT:mtfprd f0, r3 +; CHECK-NEXT:xscvsxdsp f1, f0 +; CHECK-NEXT:bl __gnu_f2h_ieee +; CHECK-NEXT:nop +; CHECK-NEXT:bl __gnu_h2f_ieee +; CHECK-NEXT:nop +; CHECK-NEXT:addi r30, r30, -1 +; CHECK-NEXT:li r3, 0 +; CHECK-NEXT:cmpldi r30, 0 +; CHECK-NEXT:bne+ cr0, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %pass.1 +; CHECK-NEXT:bl __gnu_f2h_ieee +; CHECK-NEXT:nop +; CHECK-NEXT:sth r3, 0(r3) +top: + %.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8 + %0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0) + %1 = extractvalue { i64, i1 } %0, 0 + br label %L139 + +L139: ; preds = %L139, %top + %value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ] + %value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ] + %2 = add nsw i64 %value_phi23, -1 + %3 = add i64 %2, 0 + %4 = sitofp i64 %3 to half + store half %4, half addrspace(13)* undef, align 2 + %.not101.not = icmp eq i64 %value_phi21, 0 + %5 = add i64 %value_phi21, 1 + br i1 %.not101.not, label %pass.1, label %L139 + +pass.1: ; preds = %L139 + unreachable +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0 + +attributes #0 = { nounwind } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 7486de1 - [PowerPC] Provide patterns for permuted scalar to vector for pre-P8
Author: Nemanja Ivanovic Date: 2020-12-29T06:49:25-06:00 New Revision: 7486de1b2eced27b0b95598e9ab45039d700 URL: https://github.com/llvm/llvm-project/commit/7486de1b2eced27b0b95598e9ab45039d700 DIFF: https://github.com/llvm/llvm-project/commit/7486de1b2eced27b0b95598e9ab45039d700.diff LOG: [PowerPC] Provide patterns for permuted scalar to vector for pre-P8 We will emit these permuted nodes on all VSX little endian subtargets but don't have the patterns available to match them on subtargets that don't have direct moves. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47916 Added: llvm/test/CodeGen/PowerPC/pr47916.ll Modified: llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll llvm/test/CodeGen/PowerPC/load-and-splat.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 4e086366af24..136a53e66d62 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2412,6 +2412,7 @@ def MrgWords { // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] // [HasVSX, NoP9Vector] +// [HasVSX, NoP9Vector, IsLittleEndian] // [HasVSX, HasOnlySwappingMemOps] // [HasVSX, HasOnlySwappingMemOps, IsBigEndian] // [HasVSX, HasP8Vector] @@ -3005,6 +3006,19 @@ defm : ScalToVecWPermute< VSFRC)), sub_64)>; } // HasVSX, NoP9Vector +// Any little endian pre-Power9 VSX subtarget. +let Predicates = [HasVSX, NoP9Vector, IsLittleEndian] in { +// Load-and-splat using only X-Form VSX loads. +defm : ScalToVecWPermute< + v2i64, (i64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +defm : ScalToVecWPermute< + v2f64, (f64 (load xoaddr:$src)), + (XXPERMDIs (XFLOADf64 xoaddr:$src), 2), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; +} // HasVSX, NoP9Vector, IsLittleEndian + // Any VSX subtarget that only has loads and stores that load in big endian // order regardless of endianness. This is really pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index b7ed8ce9f144..ff251f55afff 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -254,10 +254,11 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (!MO.isReg()) continue; Register Reg = MO.getReg(); -if (isAnyVecReg(Reg, Partial)) { +// All operands need to be checked because there are instructions that +// operate on a partial register and produce a full register (such as +// XXPERMDIs). +if (isAnyVecReg(Reg, Partial)) RelevantInstr = true; - break; -} } if (!RelevantInstr) diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index cdd04b33318e..35b590dec1b1 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -8,6 +8,9 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-NOVSX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P7 define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghb: @@ -24,6 +27,11 @@ define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT:vmrghb v2, v3, v2 ; CHECK-NOVSX-NEXT:blr +; +; CHECK-P7-LABEL: testmrghb: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT:vmrghb v2, v3, v2 +; CHECK-P7-NEXT:blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -46,6 +54,11 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-NOVSX-NEXT:lvx v4, 0, r3 ; CHECK-NOVSX-NEXT:vperm v2, v3, v2, v4 ; CHECK-NOVSX-NEXT:blr +; +; CHECK-P7-LABEL: testmrghb2: +; CHECK-P7: # %bb.0: # %entry +; CHECK-P7-NEXT:vmrghb v2, v2, v3 +; CHECK-P7-NEXT:blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -65,6 +78,11 @@ define dso_local <16 x i8> @testmrghh(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-NOVSX: # %bb.0: # %entry ; CH
[llvm-branch-commits] [clang] 3f7b4ce - [PowerPC] Add support for embedded devices with EFPU2
Author: Nemanja Ivanovic Date: 2021-01-12T09:47:00-06:00 New Revision: 3f7b4ce96065eea66bf4344973173e76ec1a4255 URL: https://github.com/llvm/llvm-project/commit/3f7b4ce96065eea66bf4344973173e76ec1a4255 DIFF: https://github.com/llvm/llvm-project/commit/3f7b4ce96065eea66bf4344973173e76ec1a4255.diff LOG: [PowerPC] Add support for embedded devices with EFPU2 PowerPC cores like e200z759n3 [1] using an efpu2 only support single precision hardware floating point instructions. The single precision instructions efs* and evfs* are identical to the spe float instructions while efd* and evfd* instructions trigger a not implemented exception. This patch introduces a new command line option -mefpu2 which leads to single-hardware / double-software code generation. [1] Core reference: https://www.nxp.com/files-static/32bit/doc/ref_manual/e200z759CRM.pdf Differential revision: https://reviews.llvm.org/D92935 Added: Modified: clang/docs/ClangCommandLineReference.rst clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/PPC.cpp clang/test/Driver/ppc-features.cpp llvm/lib/Target/PowerPC/PPC.td llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCSubtarget.cpp llvm/lib/Target/PowerPC/PPCSubtarget.h llvm/test/CodeGen/PowerPC/spe.ll Removed: diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index b46008970f57..ac97f6fed935 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3145,6 +3145,8 @@ PowerPC .. option:: -mdirect-move, -mno-direct-move +.. option:: -mefpu2 + .. option:: -mfloat128, -mno-float128 .. option:: -mfprnd, -mno-fprnd diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 35643701f97e..d9586e086a9c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3040,6 +3040,7 @@ def mpcrel: Flag<["-"], "mpcrel">, Group; def mno_pcrel: Flag<["-"], "mno-pcrel">, Group; def mspe : Flag<["-"], "mspe">, Group; def mno_spe : Flag<["-"], "mno-spe">, Group; +def mefpu2 : Flag<["-"], "mefpu2">, Group; def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group, Flags<[CC1Option]>, HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile and nonvolatile vector registers">; def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 2be7555102f8..cfede6e6e756 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -56,7 +56,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, HasP10Vector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; -} else if (Feature == "+spe") { +} else if (Feature == "+spe" || Feature == "+efpu2") { HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); @@ -402,6 +402,8 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const { void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const { if (Enabled) { +if (Name == "efpu2") + Features["spe"] = true; // If we're enabling any of the vsx based features then enable vsx and // altivec. We'll diagnose any problems later. bool FeatureHasVSX = llvm::StringSwitch(Name) @@ -425,6 +427,8 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features, else Features[Name] = true; } else { +if (Name == "spe") + Features["efpu2"] = false; // If we're disabling altivec or vsx go ahead and disable all of the vsx // features. if ((Name == "altivec") || (Name == "vsx")) diff --git a/clang/test/Driver/ppc-features.cpp b/clang/test/Driver/ppc-features.cpp index 85060951aa16..def96c351b34 100644 --- a/clang/test/Driver/ppc-features.cpp +++ b/clang/test/Driver/ppc-features.cpp @@ -155,6 +155,9 @@ // CHECK-SPE: "-target-feature" "+spe" // CHECK-NOSPE: "-target-feature" "-spe" +// RUN: %clang -target powerpc %s -mefpu2 -c -### 2>&1 | FileCheck -check-prefix=CHECK-EFPU2 %s +// CHECK-EFPU2: "-target-feature" "+efpu2" + // Assembler features // RUN: %clang -target powerpc-unknown-linux-gnu %s -### -o %t.o -no-integrated-as 2>&1 | FileCheck -check-prefix=CHECK_32_BE_AS_ARGS %s // CHECK_32_BE_AS_ARGS: "-mppc" diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 2975ae161aaa..06403f5e55a2 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -72,6 +72,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", def FeatureSPE : SubtargetFeature<"spe","HasSPE"
[llvm-branch-commits] [llvm] bfdc19e - [PowerPC] Restore stack ptr from frame ptr with setjmp
Author: Nemanja Ivanovic Date: 2020-12-14T11:34:16-06:00 New Revision: bfdc19e77868b849b5c636bf0512970264aef571 URL: https://github.com/llvm/llvm-project/commit/bfdc19e77868b849b5c636bf0512970264aef571 DIFF: https://github.com/llvm/llvm-project/commit/bfdc19e77868b849b5c636bf0512970264aef571.diff LOG: [PowerPC] Restore stack ptr from frame ptr with setjmp If a function happens to: - call setjmp - do a 16-byte stack allocation - call a function that sets up a stack frame and longjmp's back The stack pointer that is restores by setjmp will no longer point to a valid back chain. According to the ABI, stack accesses in such a function are to be frame pointer based - so it is an error (quite obviously) to restore the stack from the back chain. We already restore the stack from the frame pointer when there are calls to fast_cc functions. We just need to also do that when there are calls to setjmp. This patch simply does that. This was pointed out by the Julia team. Differential revision: https://reviews.llvm.org/D92906 Added: llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll Modified: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 7df2f6dc9252..b93322c15534 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -375,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || -MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || -(MF.getTarget().Options.GuaranteedTailCallOpt && - MF.getInfo()->hasFastCall()); + MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || + MF.exposesReturnsTwice() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo()->hasFastCall()); } void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { @@ -584,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { // Frame pointers and base pointers complicate matters so don't do anything // if we have them. For example having a frame pointer will sometimes require // a copy of r1 into r31 and that makes keeping track of updates to r1 more - // diff icult. - if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + // diff icult. Similar situation exists with setjmp. + if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) return false; // Calls to fast_cc functions use diff erent rules for passing parameters on @@ -1646,8 +1647,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the -// value of R31 in this case. -if (FI->hasFastCall()) { +// value of R31 in this case. Similar situation exists with setjmp. +if (FI->hasFastCall() || MF.exposesReturnsTwice()) { assert(HasFP && "Expecting a valid frame pointer."); if (!HasRedZone) RBReg = FPReg; diff --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll new file mode 100644 index ..9928a111734b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-- -verify-machineinstrs | FileCheck %s \ +; RUN: --check-prefix=BE +%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] } +%struct.__sigset_t = type { [16 x i64] } + +@.str = private unnamed_addr constant [33 x i8] c"Successfully returned from main\0A\00", align 1 + +; Function Attrs: nounwind +define dso_local signext i32 @main(i32 signext %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:mfocrf 12, 32 +; CHECK-NEXT:mflr 0 +; CHECK-NEXT:std 31, -8(1) +; CHECK-NEXT:std 0, 16(1) +; CHECK-NEXT:stw 12, 8(1) +; CHECK-NEXT:stdu 1, -784(1) +; CHECK-NEXT:# kill: def $r3 killed $r3 killed $x3 +; CHECK-NEXT:cmpwi 2, 3, 2 +; CHECK-NEXT:mr 31, 1 +; CHECK-NEXT:li 3, 0 +; CHECK-NEXT:blt 2, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT:addi 3, 31, 112 +; CHECK-NEXT:bl _setjmp +; CHECK-NEXT:nop +; CHECK-NEXT:crmove 20, 10 +; CHECK-NEXT:# kill: def $r3 killed $r3 killed $x3 +; CHECK-NEXT:cmpwi 3, 0 +; CHECK-NEXT:crorc 20, 10, 2 +
[llvm-branch-commits] [compiler-rt] eed0b9a - [PowerPC] Temporarily disable asan longjmp tests
Author: Nemanja Ivanovic Date: 2020-12-14T18:22:08-06:00 New Revision: eed0b9acdfe4409fb90b356d58c996f12cfd733f URL: https://github.com/llvm/llvm-project/commit/eed0b9acdfe4409fb90b356d58c996f12cfd733f DIFF: https://github.com/llvm/llvm-project/commit/eed0b9acdfe4409fb90b356d58c996f12cfd733f.diff LOG: [PowerPC] Temporarily disable asan longjmp tests Commit bfdc19e77868b849b5c636bf0512970264aef571 seems to have broken some PPC bots with a couple of asan test cases. Disable those test cases for now until I can resolve the issue. Added: Modified: compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp compiler-rt/test/asan/TestCases/longjmp.cpp Removed: diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp index 4774993cdf328..9da47facac276 100644 --- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp @@ -1,3 +1,4 @@ +// UNSUPPORTED: powerpc64 // Tests that __asan_handle_no_return properly unpoisons the signal alternate // stack. diff --git a/compiler-rt/test/asan/TestCases/longjmp.cpp b/compiler-rt/test/asan/TestCases/longjmp.cpp index 8e9f2ae195c71..bc4165ffd8139 100644 --- a/compiler-rt/test/asan/TestCases/longjmp.cpp +++ b/compiler-rt/test/asan/TestCases/longjmp.cpp @@ -1,3 +1,4 @@ +// UNSUPPORTED: powerpc64 // RUN: %clangxx_asan -O %s -o %t && %run %t #include ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] ba1202a - [PowerPC] Restore stack ptr from base ptr when available
Author: Nemanja Ivanovic Date: 2020-12-22T05:44:03-06:00 New Revision: ba1202a1e4f75d8f234d01730ac65a913e9baa01 URL: https://github.com/llvm/llvm-project/commit/ba1202a1e4f75d8f234d01730ac65a913e9baa01 DIFF: https://github.com/llvm/llvm-project/commit/ba1202a1e4f75d8f234d01730ac65a913e9baa01.diff LOG: [PowerPC] Restore stack ptr from base ptr when available On subtargets that have a red zone, we will copy the stack pointer to the base pointer in the prologue prior to updating the stack pointer. There are no other updates to the base pointer after that. This suggests that we should be able to restore the stack pointer from the base pointer rather than loading it from the back chain or adding the frame size back to either the stack pointer or the frame pointer. This came about because functions that call setjmp need to restore the SP from the FP because the back chain might have been clobbered (see https://reviews.llvm.org/D92906). However, if the stack is realigned, the restored SP might be incorrect (which is what caused the failures in the two ASan test cases). This patch was tested quite extensivelly both with sanitizer runtimes and general code. Differential revision: https://reviews.llvm.org/D93327 Added: Modified: compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp compiler-rt/test/asan/TestCases/longjmp.cpp llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/test/CodeGen/PowerPC/aix-base-pointer.ll llvm/test/CodeGen/PowerPC/pr46759.ll llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll llvm/test/CodeGen/PowerPC/stack-realign.ll Removed: diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp index 9da47facac27..4774993cdf32 100644 --- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: powerpc64 // Tests that __asan_handle_no_return properly unpoisons the signal alternate // stack. diff --git a/compiler-rt/test/asan/TestCases/longjmp.cpp b/compiler-rt/test/asan/TestCases/longjmp.cpp index bc4165ffd813..8e9f2ae195c7 100644 --- a/compiler-rt/test/asan/TestCases/longjmp.cpp +++ b/compiler-rt/test/asan/TestCases/longjmp.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: powerpc64 // RUN: %clangxx_asan -O %s -o %t && %run %t #include diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index b93322c15534..50ce11b8374f 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1644,11 +1644,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red // zone add this offset back now. +// If the function has a base pointer, the stack pointer has been copied +// to it so we can restore it by copying in the other direction. +if (HasRedZone && HasBP) { + BuildMI(MBB, MBBI, dl, OrInst, RBReg). +addReg(BPReg). +addReg(BPReg); +} // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. Similar situation exists with setjmp. -if (FI->hasFastCall() || MF.exposesReturnsTwice()) { +else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { assert(HasFP && "Expecting a valid frame pointer."); if (!HasRedZone) RBReg = FPReg; diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll index 2b1cc0c45db4..c6e1107d4738 100644 --- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll @@ -27,7 +27,7 @@ declare void @callee(i32*) ; 32BIT: stwux 1, 1, 0 ; 32BIT: addi 3, 1, 64 ; 32BIT: bl .callee -; 32BIT: lwz 1, 0(1) +; 32BIT: mr 1, 30 ; 32BIT: lwz 30, -16(1) ; 64BIT-LABEL: .caller: @@ -38,5 +38,5 @@ declare void @callee(i32*) ; 64BIT: stdux 1, 1, 0 ; 64BIT: addi 3, 1, 128 ; 64BIT: bl .callee -; 64BIT: ld 1, 0(1) +; 64BIT: mr 1, 30 ; 64BIT: ld 30, -24(1) diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index 716e050cdbee..33b44b720b6e 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -61,7 +61,7 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: .LBB0_6: # %entry ; CHECK-LE-NEXT:addi r3, r1, 2048 ; CHECK-LE-NEXT:lbz r3, 0(r3) -; CHECK-LE-NEXT:ld r1, 0(r1) +; CHECK-LE-NEXT:mr r1, r30 ; CHECK-
[llvm-branch-commits] [llvm] e73f885 - [PowerPC] Remove redundant COPY_TO_REGCLASS introduced by 8a58f21f5b6c
Author: Nemanja Ivanovic Date: 2020-12-28T09:26:51-06:00 New Revision: e73f885c988d7b94fcad64ddfa6a825e15e77a8f URL: https://github.com/llvm/llvm-project/commit/e73f885c988d7b94fcad64ddfa6a825e15e77a8f DIFF: https://github.com/llvm/llvm-project/commit/e73f885c988d7b94fcad64ddfa6a825e15e77a8f.diff LOG: [PowerPC] Remove redundant COPY_TO_REGCLASS introduced by 8a58f21f5b6c Added: Modified: llvm/lib/Target/PowerPC/PPCInstrPrefix.td Removed: diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index e7fa2affb730..2f29811b20d8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2561,13 +2561,13 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in { def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst), (STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>; def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst), -(STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>; +(STXVRWX $src, xoaddr:$dst)>; def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst), -(STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>; +(STXVRWX $src, xoaddr:$dst)>; def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst), -(STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>; +(STXVRDX $src, xoaddr:$dst)>; def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst), -(STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>; +(STXVRDX $src, xoaddr:$dst)>; } class xxevalPattern imm> : ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PowerPC: Fix using long double libm functions for f128 intrinsics (PR #144382)
https://github.com/nemanjai approved this pull request. LGTM. I believe that the finite functions are provided by GLIBC on PPC for F128, but perhaps someone from IBM can confirm (@lei137 @w2yehia @RolandF77). https://github.com/llvm/llvm-project/pull/144382 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits