[llvm-branch-commits] [llvm] release/18.x: [PowerPC] Mask constant operands in ValueBit tracking (#67653) (PR #82301)
https://github.com/bzEq approved this pull request. LG. https://github.com/llvm/llvm-project/pull/82301 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f904d50 - [PowerPC] Remaining KnownBits should be constant when performing non-sign comparison
Author: Kai Luo Date: 2020-12-30T02:00:47Z New Revision: f904d50c29f23510cdbae0579085ae7ffebc1f63 URL: https://github.com/llvm/llvm-project/commit/f904d50c29f23510cdbae0579085ae7ffebc1f63 DIFF: https://github.com/llvm/llvm-project/commit/f904d50c29f23510cdbae0579085ae7ffebc1f63.diff LOG: [PowerPC] Remaining KnownBits should be constant when performing non-sign comparison In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says ``` // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. ``` Origin check ``` if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) return SDValue(); ``` is not strong enough. For example, ``` Op1Known = 111x000x; Op2Known = 111x000x; ``` Bit 4, besides bit 0, is still unknown and affects the final result. This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388. Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D93092 Added: llvm/test/CodeGen/PowerPC/pr48388.ll Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 18e35f5a0850f..e951679f92fa0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13237,11 +13237,13 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); // We don't really care about what is known about the first bit (if - // anything), so clear it in all masks prior to comparing them. - Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); - Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); + // anything), so pretend that it is known zero for both to ensure they can + // be compared as constants. + Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); + Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); - if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) + if (!Op1Known.isConstant() || !Op2Known.isConstant() || + Op1Known.getConstant() != Op2Known.getConstant()) return SDValue(); } } diff --git a/llvm/test/CodeGen/PowerPC/pr48388.ll b/llvm/test/CodeGen/PowerPC/pr48388.ll new file mode 100644 index 0..822e5d8523171 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr48388.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \ +; RUN: < %s | FileCheck %s + +define i64 @julia_div_i64(i64 %0, i64 %1) local_unnamed_addr #0 { +; CHECK-LABEL: julia_div_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:divd r6, r3, r4 +; CHECK-NEXT:lis r5, -1592 +; CHECK-NEXT:ori r7, r5, 21321 +; CHECK-NEXT:ori r5, r5, 65519 +; CHECK-NEXT:cmpdi r3, 0 +; CHECK-NEXT:rldic r7, r7, 4, 17 +; CHECK-NEXT:rldic r5, r5, 4, 17 +; CHECK-NEXT:iselgt r9, r5, r7 +; CHECK-NEXT:cmpdi r4, 0 +; CHECK-NEXT:mulld r8, r6, r4 +; CHECK-NEXT:iselgt r4, r5, r7 +; CHECK-NEXT:xor r4, r9, r4 +; CHECK-NEXT:cntlzd r4, r4 +; CHECK-NEXT:rldicl r4, r4, 58, 63 +; CHECK-NEXT:xor r3, r8, r3 +; CHECK-NEXT:addic r5, r3, -1 +; CHECK-NEXT:subfe r3, r5, r3 +; CHECK-NEXT:and r3, r4, r3 +; CHECK-NEXT:add r3, r6, r3 +; CHECK-NEXT:blr +entry: + %2 = sdiv i64 %0, %1 + %3 = icmp sgt i64 %0, 0 + %4 = icmp sgt i64 %1, 0 + %5 = select i1 %3, i64 140735820070640, i64 140735819363472 + %6 = select i1 %4, i64 140735820070640, i64 140735819363472 + %7 = icmp eq i64 %5, %6 + %8 = mul i64 %2, %1 + %9 = icmp ne i64 %8, %0 + %10 = and i1 %7, %9 + %11 = zext i1 %10 to i64 + %12 = add i64 %2, %11 + ret i64 %12 +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e3e25cf - [PowerPC] Add mir test to show effect of `optimizeCompareInstr` when `equalityOnly` is true. NFC.
Author: Kai Luo Date: 2020-12-30T02:23:05Z New Revision: e3e25cfb44bc2a35e3b53d62d37c27b7d13157b6 URL: https://github.com/llvm/llvm-project/commit/e3e25cfb44bc2a35e3b53d62d37c27b7d13157b6 DIFF: https://github.com/llvm/llvm-project/commit/e3e25cfb44bc2a35e3b53d62d37c27b7d13157b6.diff LOG: [PowerPC] Add mir test to show effect of `optimizeCompareInstr` when `equalityOnly` is true. NFC. Added: llvm/test/CodeGen/PowerPC/peephole-cmp-eq.mir Modified: Removed: diff --git a/llvm/test/CodeGen/PowerPC/peephole-cmp-eq.mir b/llvm/test/CodeGen/PowerPC/peephole-cmp-eq.mir new file mode 100644 index ..bac0a6988c53 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/peephole-cmp-eq.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=powerpc64le -simplify-mir -verify-machineinstrs \ +# RUN: -run-pass=peephole-opt %s -o - | FileCheck %s +# Test to show effect of `optimizeCompareInstr` when `equalityOnly` is true. +--- +name:h +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc } + - { id: 1, class: g8rc } + - { id: 2, class: g8rc_and_g8rc_nox0 } + - { id: 3, class: crrc } + - { id: 4, class: g8rc_and_g8rc_nox0 } + - { id: 5, class: g8rc } +liveins: + - { reg: '$x3', virtual-reg: '%0' } + - { reg: '$x4', virtual-reg: '%1' } +frameInfo: + maxAlignment:1 +machineFunctionInfo: {} +body: | + bb.0: +liveins: $x3, $x4 + +; CHECK-LABEL: name: h +; CHECK: liveins: $x3, $x4 +; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x4 +; CHECK: [[COPY1:%[0-9]+]]:g8rc = COPY $x3 +; CHECK: [[SUBF8_rec:%[0-9]+]]:g8rc_and_g8rc_nox0 = SUBF8_rec [[COPY]], [[COPY1]], implicit-def $cr0 +; CHECK: [[COPY2:%[0-9]+]]:crrc = COPY killed $cr0 +; CHECK: [[ISEL8_:%[0-9]+]]:g8rc = ISEL8 $zero8, [[SUBF8_rec]], [[COPY2]].sub_eq +; CHECK: $x3 = COPY [[ISEL8_]] +; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 +%1:g8rc = COPY $x4 +%0:g8rc = COPY $x3 +%2:g8rc_and_g8rc_nox0 = SUBF8 %1, %0 +%3:crrc = CMPLD %0, %1 +%4:g8rc_and_g8rc_nox0 = LI8 0 +%5:g8rc = ISEL8 %4, %2, %3.sub_eq +$x3 = COPY %5 +BLR8 implicit $lr8, implicit $rm, implicit $x3 + +... ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f6515b0 - [PowerPC] Do not fold `cmp(d|w)` and `subf` instruction to `subf.` if `nsw` is not present
Author: Kai Luo Date: 2021-01-04T07:54:15Z New Revision: f6515b05205d4324d174dd1f7455c6c8e6671e6b URL: https://github.com/llvm/llvm-project/commit/f6515b05205d4324d174dd1f7455c6c8e6671e6b DIFF: https://github.com/llvm/llvm-project/commit/f6515b05205d4324d174dd1f7455c6c8e6671e6b.diff LOG: [PowerPC] Do not fold `cmp(d|w)` and `subf` instruction to `subf.` if `nsw` is not present In `PPCInstrInfo::optimizeCompareInstr` we seek opportunities to fold `cmp(d|w)` and `subf` as an `subf.`. However, if `subf.` gets overflow, `cr0` can't reflect the correct order, violating the semantics of `cmp(d|w)`. Fixed https://bugs.llvm.org/show_bug.cgi?id=47830. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D90156 Added: Modified: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/test/CodeGen/PowerPC/pr47830.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index deac690a7611..75a498b807cd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2141,6 +2141,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, if (NewOpC == -1) return false; + // This transformation should not be performed if `nsw` is missing and is not + // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in + // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in + // CRReg can reflect if compared values are equal, this optz is still valid. + if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) && + Sub && !Sub->getFlag(MachineInstr::NoSWrap)) +return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the diff --git a/llvm/test/CodeGen/PowerPC/pr47830.ll b/llvm/test/CodeGen/PowerPC/pr47830.ll index be61a81462c2..bd320907a341 100644 --- a/llvm/test/CodeGen/PowerPC/pr47830.ll +++ b/llvm/test/CodeGen/PowerPC/pr47830.ll @@ -5,8 +5,9 @@ define i64 @f(i64 %a, i64 %b) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT:sub. r3, r3, r4 -; CHECK-NEXT:isellt r3, 0, r3 +; CHECK-NEXT:sub r5, r3, r4 +; CHECK-NEXT:cmpd r3, r4 +; CHECK-NEXT:isellt r3, 0, r5 ; CHECK-NEXT:blr %c = icmp slt i64 %a, %b %d = sub i64 %a, %b ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 8e6d920 - [DAG][PowerPC] Fix dropped `nsw` flag in `SimplifySetCC` by adding `doesNodeExist` helper
Author: Kai Luo Date: 2020-11-25T04:39:03Z New Revision: 8e6d92026c624386b85675a4664e2666225fcfac URL: https://github.com/llvm/llvm-project/commit/8e6d92026c624386b85675a4664e2666225fcfac DIFF: https://github.com/llvm/llvm-project/commit/8e6d92026c624386b85675a4664e2666225fcfac.diff LOG: [DAG][PowerPC] Fix dropped `nsw` flag in `SimplifySetCC` by adding `doesNodeExist` helper `SimplifySetCC` invokes `getNodeIfExists` without passing `Flags` argument and `getNodeIfExists` uses a default `SDNodeFlags` to intersect the original flags, as a consequence, flags like `nsw` is dropped. Added a new helper function `doesNodeExist` to check if a node exists without modifying its flags. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D89938 Added: Modified: llvm/include/llvm/CodeGen/SelectionDAG.h llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/test/CodeGen/PowerPC/setcc-sub-flag.ll Removed: diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 8966e7f51dd9..cbd2e8b043a0 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1520,6 +1520,9 @@ class SelectionDAG { SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef Ops); + /// Check if a node exists without modifying its flags. + bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef Ops); + /// Creates a SDDbgValue node. SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, bool IsIndirect, const DebugLoc &DL, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 489651e987ac..eee80cc4bc70 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8326,6 +8326,19 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, return nullptr; } +/// doesNodeExist - Check if a node exists without modifying its flags. +bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList, + ArrayRef Ops) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { +FoldingSetNodeID ID; +AddNodeIDNode(ID, Opcode, VTList, Ops); +void *IP = nullptr; +if (FindNodeOrInsertPos(ID, SDLoc(), IP)) + return true; + } + return false; +} + /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1d51773dc2d8..93df88b3f6d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3476,8 +3476,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && - DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) && - !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )) + DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && + !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (auto *N1C = isConstOrConstSplat(N1)) { diff --git a/llvm/test/CodeGen/PowerPC/setcc-sub-flag.ll b/llvm/test/CodeGen/PowerPC/setcc-sub-flag.ll index ee4697b874e2..3d89fea12216 100644 --- a/llvm/test/CodeGen/PowerPC/setcc-sub-flag.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-sub-flag.ll @@ -10,7 +10,7 @@ define void @f(i64 %a, i64 %b) { ; CHECK: liveins: $x3, $x4 ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x4 ; CHECK: [[COPY1:%[0-9]+]]:g8rc = COPY $x3 - ; CHECK: [[SUBF8_:%[0-9]+]]:g8rc = SUBF8 [[COPY1]], [[COPY]] + ; CHECK: [[SUBF8_:%[0-9]+]]:g8rc = nsw SUBF8 [[COPY1]], [[COPY]] %c = sub nsw i64 %b, %a call void @foo(i64 %c) %d = icmp slt i64 %a, %b ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 97e7ce3 - [PowerPC] Probe the gap between stackptr and realigned stackptr
Author: Kai Luo Date: 2020-11-25T07:01:45Z New Revision: 97e7ce3b15ccaf3e121a666122a5b282a5a6607d URL: https://github.com/llvm/llvm-project/commit/97e7ce3b15ccaf3e121a666122a5b282a5a6607d DIFF: https://github.com/llvm/llvm-project/commit/97e7ce3b15ccaf3e121a666122a5b282a5a6607d.diff LOG: [PowerPC] Probe the gap between stackptr and realigned stackptr During reviewing https://reviews.llvm.org/D84419, @efriedma mentioned the gap between realigned stack pointer and origin stack pointer should be probed too whatever the alignment is. This patch fixes the issue for PPC64. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D88078 Added: Modified: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/test/CodeGen/PowerPC/pr46759.ll llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 95bcace21f81..7df2f6dc9252 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -525,6 +525,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, // register is available, we can adjust for that by not overlapping the spill // code. However, if we need to realign the stack (i.e. have a base pointer) // and the stack frame is large, we need two scratch registers. +// Also, stack probe requires two scratch registers, one for old sp, one for +// large frame and large probe size. bool PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -536,8 +538,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); + const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); - return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; + return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || + TLI.hasInlineStackProbe(MF); } bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { @@ -676,12 +680,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); // Using the same bool variable as below to suppress compiler warnings. - // Stack probe requires two scratch registers, one for old sp, one for large - // frame and large probe size. bool SingleScratchReg = findScratchRegister( - &MBB, false, - twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), - &ScratchReg, &TempReg); + &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); assert(SingleScratchReg && "Required number of registers not available in this block"); @@ -1202,10 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, if (StackAllocMIPos == PrologMBB.end()) return; const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); + MachineBasicBlock *CurrentMBB = &PrologMBB; DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); MachineInstr &MI = *StackAllocMIPos; int64_t NegFrameSize = MI.getOperand(2).getImm(); - int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); + unsigned ProbeSize = TLI.getStackProbeSize(MF); + int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); int64_t NumBlocks = NegFrameSize / NegProbeSize; int64_t NegResidualSize = NegFrameSize % NegProbeSize; @@ -1214,10 +1216,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, Register FPReg = MI.getOperand(1).getReg(); const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); + Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); - // Initialize current frame pointer. const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); - BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register Reg) { @@ -1257,89 +1258,218 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, // Subroutine to store frame pointer and decrease stack pointer by probe size. auto allocateAndProbe = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t NegSize, - Register NegSizeReg, bool UseDForm) { + Register NegSizeReg, bool UseDForm, + Register StoreReg) { if (UseDForm) BuildMI(M
[llvm-branch-commits] [llvm] f5d5291 - [PowerPC] Pre-commit neg abs test for vector. NFC.
Author: Kai Luo Date: 2020-12-04T06:52:05Z New Revision: f5d52916ce34f68a2fb4de69844f1b51b6bd0a13 URL: https://github.com/llvm/llvm-project/commit/f5d52916ce34f68a2fb4de69844f1b51b6bd0a13 DIFF: https://github.com/llvm/llvm-project/commit/f5d52916ce34f68a2fb4de69844f1b51b6bd0a13.diff LOG: [PowerPC] Pre-commit neg abs test for vector. NFC. Added: Modified: llvm/test/CodeGen/PowerPC/neg-abs.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/neg-abs.ll b/llvm/test/CodeGen/PowerPC/neg-abs.ll index bbe27fdd057c..c23423ad8ddb 100644 --- a/llvm/test/CodeGen/PowerPC/neg-abs.ll +++ b/llvm/test/CodeGen/PowerPC/neg-abs.ll @@ -4,6 +4,10 @@ ; RUN: -check-prefix=CHECK-LE %s declare i64 @llvm.abs.i64(i64, i1 immarg) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) define i64@neg_abs(i64 %x) { ; CHECK-LE-LABEL: neg_abs: @@ -16,3 +20,60 @@ define i64@neg_abs(i64 %x) { %neg = sub nsw i64 0, %abs ret i64 %neg } + +define <2 x i64> @neg_abs_v2i64(<2 x i64> %0) { +; CHECK-LE-LABEL: neg_abs_v2i64: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT:addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-NEXT:addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-NEXT:lxvd2x vs0, 0, r3 +; CHECK-LE-NEXT:xxswapd vs35, vs0 +; CHECK-LE-NEXT:vsrad v3, v2, v3 +; CHECK-LE-NEXT:xxlxor vs34, vs34, vs35 +; CHECK-LE-NEXT:vsubudm v2, v3, v2 +; CHECK-LE-NEXT:blr + %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %0, i1 true) + %neg.abs = sub <2 x i64> zeroinitializer, %abs + ret <2 x i64> %neg.abs +} + +define <4 x i32> @neg_abs_v4i32(<4 x i32> %0) { +; CHECK-LE-LABEL: neg_abs_v4i32: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT:vspltisw v3, -16 +; CHECK-LE-NEXT:vspltisw v4, 15 +; CHECK-LE-NEXT:vsubuwm v3, v4, v3 +; CHECK-LE-NEXT:vsraw v3, v2, v3 +; CHECK-LE-NEXT:xxlxor vs34, vs34, vs35 +; CHECK-LE-NEXT:vsubuwm v2, v3, v2 +; CHECK-LE-NEXT:blr + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %0, i1 true) + %neg.abs = sub <4 x i32> zeroinitializer, %abs + ret <4 x i32> %neg.abs +} + +define <8 x i16> @neg_abs_v8i16(<8 x i16> %0) { +; CHECK-LE-LABEL: neg_abs_v8i16: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT:vspltish v3, 15 +; CHECK-LE-NEXT:vsrah v3, v2, v3 +; CHECK-LE-NEXT:xxlxor vs34, vs34, vs35 +; CHECK-LE-NEXT:vsubuhm v2, v3, v2 +; CHECK-LE-NEXT:blr + %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %0, i1 true) + %neg.abs = sub <8 x i16> zeroinitializer, %abs + ret <8 x i16> %neg.abs +} + +define <16 x i8> @neg_abs_v16i8(<16 x i8> %0) { +; CHECK-LE-LABEL: neg_abs_v16i8: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT:vspltisb v3, 7 +; CHECK-LE-NEXT:vsrab v3, v2, v3 +; CHECK-LE-NEXT:xxlxor vs34, vs34, vs35 +; CHECK-LE-NEXT:vsububm v2, v3, v2 +; CHECK-LE-NEXT:blr + %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %0, i1 true) + %neg.abs = sub <16 x i8> zeroinitializer, %abs + ret <16 x i8> %neg.abs +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 44bd8ea - [DAGCombine][PowerPC] Simplify nabs by using legal `smin` operation
Author: Kai Luo Date: 2020-12-08T03:24:07Z New Revision: 44bd8ea167f2138de9317196a7b199840e29fb59 URL: https://github.com/llvm/llvm-project/commit/44bd8ea167f2138de9317196a7b199840e29fb59 DIFF: https://github.com/llvm/llvm-project/commit/44bd8ea167f2138de9317196a7b199840e29fb59.diff LOG: [DAGCombine][PowerPC] Simplify nabs by using legal `smin` operation Convert `0 - abs(x)` to `smin (x, -x)` if `smin` is a legal operation. Verification: https://alive2.llvm.org/ce/z/vpquFR Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D92637 Added: Modified: llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/test/CodeGen/PowerPC/neg-abs.ll Removed: diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 4aeefd980d7a..3dce96d1c064 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4393,8 +4393,10 @@ class TargetLowering : public TargetLoweringBase { /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) /// \param N Node to expand /// \param Result output after conversion + /// \param IsNegative indicate negated abs /// \returns True, if the expansion was successful, false otherwise - bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG, + bool IsNegative = false) const; /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 51de545d1db9..6d5a54198c0b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3193,18 +3193,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N1; } -// Convert 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)). +// Convert 0 - abs(x). +SDValue Result; if (N1->getOpcode() == ISD::ABS && -!TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - SDValue X = N1->getOperand(0); - SDValue Shift = - DAG.getNode(ISD::SRA, DL, VT, X, - DAG.getConstant(BitWidth - 1, DL, getShiftAmountTy(VT))); - SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Shift); - AddToWorklist(Shift.getNode()); - AddToWorklist(Xor.getNode()); - return DAG.getNode(ISD::SUB, DL, VT, Shift, Xor); -} +!TLI.isOperationLegalOrCustom(ISD::ABS, VT) && +TLI.expandABS(N1.getNode(), Result, DAG, true)) + return Result; } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d27ada4c4b38..3897dce20a19 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6816,14 +6816,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, } bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Op = N->getOperand(0); // abs(x) -> smax(x,sub(0,x)) - if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMAX, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); Result = DAG.getNode(ISD::SMAX, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -6831,24 +6832,42 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, } // abs(x) -> umin(x,sub(0,x)) - if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); Result = DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); return true; } + // 0 - abs(x) -> smin(x, sub(0,x)) + if (IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMIN, VT)) { +SDValue Zero = DAG.getConstant(0, dl, VT); +Result = DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); +return true; + } + // Only expand vector types if we have the appropriate vector operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || -!isOperationLegalOrCustom(ISD::ADD, VT) || -