https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67301
>From 92abb76631594dfc2ca586c46c38031610be0548 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 25 Sep 2023 17:08:59 +0800 Subject: [PATCH 1/5] [Legalizer] Expand fmaximum and fminimum According to langref, llvm.maximum/minimum has -0.0 < +0.0 semantics and propagates NaN. Expand the nodes on targets not supporting the operation, by adding extra check for NaN and using is_fpclass to check zero signs. --- llvm/include/llvm/CodeGen/TargetLowering.h | 3 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 + .../SelectionDAG/LegalizeVectorOps.cpp | 7 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 58 ++ llvm/lib/Target/ARM/ARMISelLowering.cpp | 14 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +- .../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 28 +- .../CodeGen/PowerPC/fminimum-fmaximum-f128.ll | 97 ++ .../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 847 ++++++++++++++++++ 9 files changed, 1039 insertions(+), 31 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll create mode 100644 llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c6a7aa17146dd4..429cfd72af2e6e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5089,6 +5089,9 @@ class TargetLowering : public TargetLoweringBase { /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand fminimum/fmaximum into multiple comparison with selects. + SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. /// \param N Node to expand /// \returns The expansion result diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f19beea3a3ed8b..33f6354d558454 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3540,6 +3540,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Expanded); break; } + case ISD::FMINIMUM: + case ISD::FMAXIMUM: { + if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) + Results.push_back(Expanded); + break; + } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index dec81475f3a88f..db132035adcf29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -949,6 +949,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 39489e0bf142eb..23de9829b5e9ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8177,6 +8177,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } +SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + unsigned Opc = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + bool NoNaN = (N->getFlags().hasNoNaNs() || + (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))); + bool NoZeroSign = + (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) || + DAG.isKnownNeverZeroFloat(RHS)); + bool IsMax = Opc == ISD::FMAXIMUM; + + if (VT.isVector() && + isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType())) + return SDValue(); + + SDValue MinMax; + if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, + VT)) + MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT, + LHS, RHS); + else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT)) + MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS); + else + MinMax = DAG.getSelect( + DL, VT, + DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS, + RHS); + + // Propagate any NaN of both operands + if (!NoNaN) { + ConstantFP *FPNaN = ConstantFP::get( + *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); + MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), + DAG.getConstantFP(*FPNaN, DL, VT), MinMax); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!NoZeroSign) { + SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, + DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); + SDValue TestZero = + DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); + SDValue LCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, + MinMax); + SDValue RCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, + LCmp); + MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax); + } + + return MinMax; +} + /// Returns a true value if if this FPClassTest can be performed with an ordered /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns /// std::nullopt if it cannot be performed as a compare with 0. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 69ef942df1f6e7..9eac62175ee5fc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1549,15 +1549,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasNEON()) { // vmin and vmax aren't available in a scalar form, so we can use - // a NEON instruction with an undef lane instead. This has a performance - // penalty on some cores, so we don't do this unless we have been - // asked to by the core tuning model. - if (Subtarget->useNEONForSinglePrecisionFP()) { - setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - } + // a NEON instruction with an undef lane instead. + setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8e6644821031c1..2c2f3926fc93c5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -440,8 +440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) setOperationAction(ISD::FPOWI, MVT::i32, Custom); - if (!Subtarget.hasStdExtZfa()) - setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, + Subtarget.hasStdExtZfa() ? Legal : Custom); } if (Subtarget.hasStdExtFOrZfinx()) { @@ -464,9 +464,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); - if (Subtarget.hasStdExtZfa()) + if (Subtarget.hasStdExtZfa()) { setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - else + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); + } else setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); } @@ -481,6 +482,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::BITCAST, MVT::f64, Custom); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal); } else { if (Subtarget.is64Bit()) setOperationAction(FPRndMode, MVT::f64, Custom); diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll index be741f536ac757..528bfe0411730a 100644 --- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -46,12 +46,10 @@ define float @fminnum32_intrinsic(float %x, float %y) { define float @fminnum32_nsz_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fminnum32_nsz_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovlt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vmin.f32 d0, d1, d0 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fminnum32_nsz_intrinsic: @@ -78,9 +76,7 @@ define float @fminnum32_non_zero_intrinsic(float %x) { ; ARMV7: @ %bb.0: ; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00 ; ARMV7-NEXT: vmov s2, r0 -; ARMV7-NEXT: vcmp.f32 s2, s0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovlt.f32 s0, s2 +; ARMV7-NEXT: vmin.f32 d0, d1, d0 ; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; @@ -136,12 +132,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) { define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fmaxnum32_nsz_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vmax.f32 d0, d1, d0 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fmaxnum32_nsz_intrinsic: @@ -210,9 +204,7 @@ define float @fmaxnum32_non_zero_intrinsic(float %x) { ; ARMV7: @ %bb.0: ; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00 ; ARMV7-NEXT: vmov s2, r0 -; ARMV7-NEXT: vcmp.f32 s2, s0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s0, s2 +; ARMV7-NEXT: vmax.f32 d0, d1, d0 ; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll new file mode 100644 index 00000000000000..6d9eb133768274 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +define fp128 @f128_minimum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_minimum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: vmr 4, 2 +; CHECK-NEXT: bge 0, .LBB0_8 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bun 0, .LBB0_9 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: xststdcqp 0, 2, 4 +; CHECK-NEXT: bc 4, 2, .LBB0_10 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: xststdcqp 0, 3, 4 +; CHECK-NEXT: bc 12, 2, .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: .LBB0_5: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: xscmpuqp 0, 4, 2 +; CHECK-NEXT: beq 0, .LBB0_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: vmr 3, 4 +; CHECK-NEXT: .LBB0_7: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_8: # %entry +; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: bnu 0, .LBB0_2 +; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: xststdcqp 0, 2, 4 +; CHECK-NEXT: bc 12, 2, .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xststdcqp 0, 3, 4 +; CHECK-NEXT: bc 4, 2, .LBB0_4 +; CHECK-NEXT: b .LBB0_5 +entry: + %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +define fp128 @f128_maximum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_maximum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: vmr 4, 2 +; CHECK-NEXT: ble 0, .LBB1_8 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bun 0, .LBB1_9 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: xststdcqp 0, 2, 8 +; CHECK-NEXT: bc 4, 2, .LBB1_10 +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: xststdcqp 0, 3, 8 +; CHECK-NEXT: bc 12, 2, .LBB1_5 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: .LBB1_5: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: xscmpuqp 0, 4, 2 +; CHECK-NEXT: beq 0, .LBB1_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: vmr 3, 4 +; CHECK-NEXT: .LBB1_7: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_8: # %entry +; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: bnu 0, .LBB1_2 +; CHECK-NEXT: .LBB1_9: +; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: xststdcqp 0, 2, 8 +; CHECK-NEXT: bc 12, 2, .LBB1_3 +; CHECK-NEXT: .LBB1_10: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xststdcqp 0, 3, 8 +; CHECK-NEXT: bc 4, 2, .LBB1_4 +; CHECK-NEXT: b .LBB1_5 +entry: + %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +declare fp128 @llvm.minimum.f128(fp128, fp128) +declare fp128 @llvm.maximum.f128(fp128, fp128) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll new file mode 100644 index 00000000000000..24fa7c716ea295 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -0,0 +1,847 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefix=NOVSX +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefix=VSX +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr8 < %s | FileCheck %s --check-prefix=AIX + +define float @f32_minimum(float %a, float %b) { +; NOVSX-LABEL: f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: bc 12, 0, .LBB0_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB0_2: # %entry +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: bc 4, 3, .LBB0_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) +; NOVSX-NEXT: .LBB0_4: # %entry +; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: bc 12, 2, .LBB0_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB0_6: # %entry +; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB0_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB0_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI0_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB0_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB0_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: xscvdpspn 3, 2 +; VSX-NEXT: mffprwz 3, 0 +; VSX-NEXT: bc 12, 3, .LBB0_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 0, 1, 2 +; VSX-NEXT: b .LBB0_3 +; VSX-NEXT: .LBB0_2: +; VSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) +; VSX-NEXT: .LBB0_3: # %entry +; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: cmplwi 3, 0 +; VSX-NEXT: mffprwz 3, 3 +; VSX-NEXT: bc 12, 2, .LBB0_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB0_5: # %entry +; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: cmplwi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB0_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB0_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB0_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB0_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: xscvdpspn 3, 2 +; AIX-NEXT: mffprwz 3, 0 +; AIX-NEXT: bc 12, 3, L..BB0_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 0, 1, 2 +; AIX-NEXT: b L..BB0_3 +; AIX-NEXT: L..BB0_2: +; AIX-NEXT: ld 4, L..C0(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB0_3: # %entry +; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: cmplwi 3, 0 +; AIX-NEXT: mffprwz 3, 3 +; AIX-NEXT: bc 12, 2, L..BB0_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB0_5: # %entry +; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: cmplwi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB0_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB0_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB0_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB0_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call float @llvm.minimum.f32(float %a, float %b) + ret float %m +} + +define float @f32_maximum(float %a, float %b) { +; NOVSX-LABEL: f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: bc 12, 1, .LBB1_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB1_2: # %entry +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: bc 4, 3, .LBB1_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) +; NOVSX-NEXT: .LBB1_4: # %entry +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: bc 12, 2, .LBB1_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB1_6: # %entry +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB1_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB1_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI1_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB1_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB1_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: xscvdpspn 3, 2 +; VSX-NEXT: mffprwz 3, 0 +; VSX-NEXT: bc 12, 3, .LBB1_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 0, 1, 2 +; VSX-NEXT: b .LBB1_3 +; VSX-NEXT: .LBB1_2: +; VSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) +; VSX-NEXT: .LBB1_3: # %entry +; VSX-NEXT: cmpwi 3, 0 +; VSX-NEXT: mffprwz 3, 3 +; VSX-NEXT: bc 12, 2, .LBB1_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB1_5: # %entry +; VSX-NEXT: cmpwi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB1_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB1_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB1_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB1_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: xscvdpspn 3, 2 +; AIX-NEXT: mffprwz 3, 0 +; AIX-NEXT: bc 12, 3, L..BB1_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 0, 1, 2 +; AIX-NEXT: b L..BB1_3 +; AIX-NEXT: L..BB1_2: +; AIX-NEXT: ld 4, L..C1(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB1_3: # %entry +; AIX-NEXT: cmpwi 3, 0 +; AIX-NEXT: mffprwz 3, 3 +; AIX-NEXT: bc 12, 2, L..BB1_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB1_5: # %entry +; AIX-NEXT: cmpwi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB1_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB1_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB1_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB1_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call float @llvm.maximum.f32(float %a, float %b) + ret float %m +} + +define double @f64_minimum(double %a, double %b) { +; NOVSX-LABEL: f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: bc 12, 0, .LBB2_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB2_2: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 4, 3, .LBB2_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) +; NOVSX-NEXT: .LBB2_4: # %entry +; NOVSX-NEXT: li 4, 1 +; NOVSX-NEXT: rldic 4, 4, 63, 0 +; NOVSX-NEXT: cmpd 3, 4 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 12, 2, .LBB2_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB2_6: # %entry +; NOVSX-NEXT: cmpd 3, 4 +; NOVSX-NEXT: bc 12, 2, .LBB2_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB2_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI2_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI2_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB2_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB2_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: mffprd 3, 1 +; VSX-NEXT: bc 12, 3, .LBB2_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 0, 1, 2 +; VSX-NEXT: b .LBB2_3 +; VSX-NEXT: .LBB2_2: +; VSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) +; VSX-NEXT: .LBB2_3: # %entry +; VSX-NEXT: li 4, 1 +; VSX-NEXT: rldic 4, 4, 63, 0 +; VSX-NEXT: cmpd 3, 4 +; VSX-NEXT: mffprd 3, 2 +; VSX-NEXT: bc 12, 2, .LBB2_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB2_5: # %entry +; VSX-NEXT: cmpd 3, 4 +; VSX-NEXT: bc 12, 2, .LBB2_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB2_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB2_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB2_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: mffprd 3, 1 +; AIX-NEXT: bc 12, 3, L..BB2_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 0, 1, 2 +; AIX-NEXT: b L..BB2_3 +; AIX-NEXT: L..BB2_2: +; AIX-NEXT: ld 4, L..C2(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB2_3: # %entry +; AIX-NEXT: li 4, 1 +; AIX-NEXT: rldic 4, 4, 63, 0 +; AIX-NEXT: cmpd 3, 4 +; AIX-NEXT: mffprd 3, 2 +; AIX-NEXT: bc 12, 2, L..BB2_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB2_5: # %entry +; AIX-NEXT: cmpd 3, 4 +; AIX-NEXT: bc 12, 2, L..BB2_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB2_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB2_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB2_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call double @llvm.minimum.f64(double %a, double %b) + ret double %m +} + +define double @f64_maximum(double %a, double %b) { +; NOVSX-LABEL: f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: bc 12, 1, .LBB3_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB3_2: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 4, 3, .LBB3_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) +; NOVSX-NEXT: .LBB3_4: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 12, 2, .LBB3_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB3_6: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB3_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB3_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI3_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI3_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB3_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB3_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: mffprd 3, 1 +; VSX-NEXT: bc 12, 3, .LBB3_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 0, 1, 2 +; VSX-NEXT: b .LBB3_3 +; VSX-NEXT: .LBB3_2: +; VSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) +; VSX-NEXT: .LBB3_3: # %entry +; VSX-NEXT: cmpdi 3, 0 +; VSX-NEXT: mffprd 3, 2 +; VSX-NEXT: bc 12, 2, .LBB3_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB3_5: # %entry +; VSX-NEXT: cmpdi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB3_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB3_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB3_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB3_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: mffprd 3, 1 +; AIX-NEXT: bc 12, 3, L..BB3_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 0, 1, 2 +; AIX-NEXT: b L..BB3_3 +; AIX-NEXT: L..BB3_2: +; AIX-NEXT: ld 4, L..C3(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB3_3: # %entry +; AIX-NEXT: cmpdi 3, 0 +; AIX-NEXT: mffprd 3, 2 +; AIX-NEXT: bc 12, 2, L..BB3_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB3_5: # %entry +; AIX-NEXT: cmpdi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB3_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB3_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB3_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB3_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call double @llvm.maximum.f64(double %a, double %b) + ret double %m +} + +define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: vcmpeqfp 5, 3, 3 +; NOVSX-NEXT: vspltisb 4, -1 +; NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; NOVSX-NEXT: vcmpeqfp 0, 2, 2 +; NOVSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; NOVSX-NEXT: vcmpgtfp 1, 3, 2 +; NOVSX-NEXT: vslw 4, 4, 4 +; NOVSX-NEXT: vnot 5, 5 +; NOVSX-NEXT: vnot 0, 0 +; NOVSX-NEXT: vsel 1, 3, 2, 1 +; NOVSX-NEXT: vor 5, 0, 5 +; NOVSX-NEXT: lvx 0, 0, 3 +; NOVSX-NEXT: vsel 5, 1, 0, 5 +; NOVSX-NEXT: vcmpequw 0, 2, 4 +; NOVSX-NEXT: vcmpequw 4, 3, 4 +; NOVSX-NEXT: vsel 2, 5, 2, 0 +; NOVSX-NEXT: vxor 0, 0, 0 +; NOVSX-NEXT: vsel 2, 2, 3, 4 +; NOVSX-NEXT: vcmpeqfp 3, 5, 0 +; NOVSX-NEXT: vsel 2, 5, 2, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xxleqv 36, 36, 36 +; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: vslw 4, 4, 4 +; VSX-NEXT: xvminsp 2, 34, 35 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: vcmpequw 5, 2, 4 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: vcmpequw 4, 3, 4 +; VSX-NEXT: xxsel 0, 2, 3, 0 +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: xxsel 2, 0, 34, 37 +; VSX-NEXT: xvcmpeqsp 1, 0, 1 +; VSX-NEXT: xxsel 2, 2, 35, 36 +; VSX-NEXT: xxsel 34, 0, 2, 1 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xxleqv 36, 36, 36 +; AIX-NEXT: ld 3, L..C4(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: vslw 4, 4, 4 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: xvminsp 2, 34, 35 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: vcmpequw 5, 2, 4 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: vcmpequw 4, 3, 4 +; AIX-NEXT: xxsel 0, 2, 3, 0 +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: xxsel 2, 0, 34, 37 +; AIX-NEXT: xvcmpeqsp 1, 0, 1 +; AIX-NEXT: xxsel 2, 2, 35, 36 +; AIX-NEXT: xxsel 34, 0, 2, 1 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: vcmpeqfp 4, 3, 3 +; NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; NOVSX-NEXT: vcmpeqfp 5, 2, 2 +; NOVSX-NEXT: addi 3, 3, .LCPI5_0@toc@l +; NOVSX-NEXT: vcmpgtfp 0, 2, 3 +; NOVSX-NEXT: lvx 1, 0, 3 +; NOVSX-NEXT: vnot 4, 4 +; NOVSX-NEXT: vnot 5, 5 +; NOVSX-NEXT: vsel 0, 3, 2, 0 +; NOVSX-NEXT: vor 4, 5, 4 +; NOVSX-NEXT: vxor 5, 5, 5 +; NOVSX-NEXT: vsel 4, 0, 1, 4 +; NOVSX-NEXT: vcmpequw 0, 2, 5 +; NOVSX-NEXT: vsel 2, 4, 2, 0 +; NOVSX-NEXT: vcmpequw 0, 3, 5 +; NOVSX-NEXT: vsel 2, 2, 3, 0 +; NOVSX-NEXT: vcmpeqfp 3, 4, 5 +; NOVSX-NEXT: vsel 2, 4, 2, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI5_0@toc@l +; VSX-NEXT: xvmaxsp 2, 34, 35 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxlxor 36, 36, 36 +; VSX-NEXT: vcmpequw 5, 2, 4 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: vcmpequw 0, 3, 4 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: xxsel 0, 2, 3, 0 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: xvcmpeqsp 2, 0, 36 +; VSX-NEXT: xxsel 1, 1, 35, 32 +; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: ld 3, L..C5(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: xvmaxsp 2, 34, 35 +; AIX-NEXT: xxlxor 36, 36, 36 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: vcmpequw 5, 2, 4 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: vcmpequw 0, 3, 4 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: xxsel 0, 2, 3, 0 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: xvcmpeqsp 2, 0, 36 +; AIX-NEXT: xxsel 1, 1, 35, 32 +; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 6, 1 +; NOVSX-NEXT: stfd 4, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) +; NOVSX-NEXT: stfd 3, -32(1) +; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: bc 12, 0, .LBB6_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: .LBB6_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; NOVSX-NEXT: ld 4, -24(1) +; NOVSX-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; NOVSX-NEXT: fmr 5, 0 +; NOVSX-NEXT: bc 12, 3, .LBB6_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: .LBB6_4: # %entry +; NOVSX-NEXT: li 3, 1 +; NOVSX-NEXT: rldic 3, 3, 63, 0 +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: ld 4, -32(1) +; NOVSX-NEXT: bc 12, 2, .LBB6_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: .LBB6_6: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 12, 2, .LBB6_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: .LBB6_8: # %entry +; NOVSX-NEXT: addis 4, 2, .LCPI6_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI6_1@toc@l(4) +; NOVSX-NEXT: fcmpu 0, 5, 1 +; NOVSX-NEXT: bc 12, 2, .LBB6_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 3, 5 +; NOVSX-NEXT: .LBB6_10: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: fmr 5, 2 +; NOVSX-NEXT: bc 12, 0, .LBB6_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: .LBB6_12: # %entry +; NOVSX-NEXT: ld 4, -8(1) +; NOVSX-NEXT: bc 12, 3, .LBB6_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: .LBB6_14: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: ld 4, -16(1) +; NOVSX-NEXT: bc 4, 2, .LBB6_19 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 4, 2, .LBB6_20 +; NOVSX-NEXT: .LBB6_16: # %entry +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB6_18 +; NOVSX-NEXT: .LBB6_17: # %entry +; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: .LBB6_18: # %entry +; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB6_19: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 12, 2, .LBB6_16 +; NOVSX-NEXT: .LBB6_20: # %entry +; NOVSX-NEXT: fmr 4, 2 +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 4, 2, .LBB6_17 +; NOVSX-NEXT: b .LBB6_18 +; +; VSX-LABEL: v2f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 36, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha +; VSX-NEXT: xvcmpeqdp 37, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l +; VSX-NEXT: xvmindp 0, 34, 35 +; VSX-NEXT: lxvd2x 32, 0, 3 +; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: vcmpequd 1, 2, 0 +; VSX-NEXT: xxlnor 36, 36, 36 +; VSX-NEXT: xxlnor 37, 37, 37 +; VSX-NEXT: xxlor 2, 37, 36 +; VSX-NEXT: vcmpequd 4, 3, 0 +; VSX-NEXT: xxsel 0, 0, 1, 2 +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: xxsel 2, 0, 34, 33 +; VSX-NEXT: xvcmpeqdp 34, 0, 1 +; VSX-NEXT: xxsel 1, 2, 35, 36 +; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C6(2) # %const.1 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmindp 0, 34, 35 +; AIX-NEXT: lxvd2x 32, 0, 3 +; AIX-NEXT: ld 3, L..C7(2) # %const.0 +; AIX-NEXT: xxlnor 36, 36, 36 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: xxlnor 37, 37, 37 +; AIX-NEXT: vcmpequd 1, 2, 0 +; AIX-NEXT: xxlor 2, 37, 36 +; AIX-NEXT: vcmpequd 4, 3, 0 +; AIX-NEXT: xxsel 0, 0, 1, 2 +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: xxsel 2, 0, 34, 33 +; AIX-NEXT: xvcmpeqdp 34, 0, 1 +; AIX-NEXT: xxsel 1, 2, 35, 36 +; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 6, 1 +; NOVSX-NEXT: stfd 4, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) +; NOVSX-NEXT: stfd 3, -32(1) +; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: bc 12, 1, .LBB7_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: .LBB7_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; NOVSX-NEXT: ld 3, -24(1) +; NOVSX-NEXT: fmr 5, 0 +; NOVSX-NEXT: bc 12, 3, .LBB7_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: .LBB7_4: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -32(1) +; NOVSX-NEXT: bc 12, 2, .LBB7_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: .LBB7_6: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB7_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: .LBB7_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI7_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI7_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 5, 1 +; NOVSX-NEXT: bc 12, 2, .LBB7_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 3, 5 +; NOVSX-NEXT: .LBB7_10: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: fmr 5, 2 +; NOVSX-NEXT: bc 12, 1, .LBB7_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: .LBB7_12: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 12, 3, .LBB7_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: .LBB7_14: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 4, 2, .LBB7_19 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB7_20 +; NOVSX-NEXT: .LBB7_16: # %entry +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB7_18 +; NOVSX-NEXT: .LBB7_17: # %entry +; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: .LBB7_18: # %entry +; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB7_19: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB7_16 +; NOVSX-NEXT: .LBB7_20: # %entry +; NOVSX-NEXT: fmr 4, 2 +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 4, 2, .LBB7_17 +; NOVSX-NEXT: b .LBB7_18 +; +; VSX-LABEL: v2f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 37, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; VSX-NEXT: xvcmpeqdp 32, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l +; VSX-NEXT: xvmaxdp 0, 34, 35 +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: xxlxor 36, 36, 36 +; VSX-NEXT: vcmpequd 1, 2, 4 +; VSX-NEXT: xxlnor 37, 37, 37 +; VSX-NEXT: xxlnor 32, 32, 32 +; VSX-NEXT: xxlor 2, 32, 37 +; VSX-NEXT: vcmpequd 5, 3, 4 +; VSX-NEXT: xxsel 0, 0, 1, 2 +; VSX-NEXT: xxsel 1, 0, 34, 33 +; VSX-NEXT: xvcmpeqdp 34, 0, 36 +; VSX-NEXT: xxsel 1, 1, 35, 37 +; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C8(2) # %const.0 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmaxdp 0, 34, 35 +; AIX-NEXT: xxlxor 32, 32, 32 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: vcmpequd 1, 2, 0 +; AIX-NEXT: xxlnor 36, 36, 36 +; AIX-NEXT: xxlnor 37, 37, 37 +; AIX-NEXT: xxlor 2, 37, 36 +; AIX-NEXT: vcmpequd 4, 3, 0 +; AIX-NEXT: xxsel 0, 0, 1, 2 +; AIX-NEXT: xxsel 1, 0, 34, 33 +; AIX-NEXT: xvcmpeqdp 34, 0, 32 +; AIX-NEXT: xxsel 1, 1, 35, 36 +; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +declare float @llvm.maximum.f32(float, float) +declare double @llvm.maximum.f64(double, double) +declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) + +declare float @llvm.minimum.f32(float, float) +declare double @llvm.minimum.f64(double, double) +declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) >From 4b7e71bbb003c96e09ecae35d4bfde2cf73740e4 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 10 Oct 2023 13:31:26 +0800 Subject: [PATCH 2/5] Add small comments and update test --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 2 + .../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 334 +++++++++--------- 2 files changed, 169 insertions(+), 167 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f49390ac29c0d0..4a1eae5d656d9f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8220,6 +8220,8 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType())) return SDValue(); + // First, implement comparison not propagating NaN. If no native fmin or fmax + // available, use plain select with setcc instead. SDValue MinMax; if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, VT)) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll index 24fa7c716ea295..c33875dbfee464 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -21,13 +21,13 @@ define float @f32_minimum(float %a, float %b) { ; NOVSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) ; NOVSX-NEXT: .LBB0_4: # %entry ; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: lwz 4, -8(1) ; NOVSX-NEXT: cmplwi 3, 0 -; NOVSX-NEXT: lwz 3, -8(1) ; NOVSX-NEXT: bc 12, 2, .LBB0_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB0_6: # %entry -; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: xoris 3, 4, 32768 ; NOVSX-NEXT: cmplwi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB0_8 ; NOVSX-NEXT: # %bb.7: # %entry @@ -45,8 +45,8 @@ define float @f32_minimum(float %a, float %b) { ; ; VSX-LABEL: f32_minimum: ; VSX: # %bb.0: # %entry -; VSX-NEXT: fcmpu 0, 1, 2 ; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: fcmpu 0, 1, 2 ; VSX-NEXT: xscvdpspn 3, 2 ; VSX-NEXT: mffprwz 3, 0 ; VSX-NEXT: bc 12, 3, .LBB0_2 @@ -58,13 +58,13 @@ define float @f32_minimum(float %a, float %b) { ; VSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) ; VSX-NEXT: .LBB0_3: # %entry ; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: mffprwz 4, 3 ; VSX-NEXT: cmplwi 3, 0 -; VSX-NEXT: mffprwz 3, 3 ; VSX-NEXT: bc 12, 2, .LBB0_5 ; VSX-NEXT: # %bb.4: # %entry ; VSX-NEXT: fmr 1, 0 ; VSX-NEXT: .LBB0_5: # %entry -; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: xoris 3, 4, 32768 ; VSX-NEXT: cmplwi 3, 0 ; VSX-NEXT: bc 12, 2, .LBB0_7 ; VSX-NEXT: # %bb.6: # %entry @@ -81,8 +81,8 @@ define float @f32_minimum(float %a, float %b) { ; ; AIX-LABEL: f32_minimum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: fcmpu 0, 1, 2 ; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: fcmpu 0, 1, 2 ; AIX-NEXT: xscvdpspn 3, 2 ; AIX-NEXT: mffprwz 3, 0 ; AIX-NEXT: bc 12, 3, L..BB0_2 @@ -94,13 +94,13 @@ define float @f32_minimum(float %a, float %b) { ; AIX-NEXT: lfs 0, 0(4) ; AIX-NEXT: L..BB0_3: # %entry ; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: mffprwz 4, 3 ; AIX-NEXT: cmplwi 3, 0 -; AIX-NEXT: mffprwz 3, 3 ; AIX-NEXT: bc 12, 2, L..BB0_5 ; AIX-NEXT: # %bb.4: # %entry ; AIX-NEXT: fmr 1, 0 ; AIX-NEXT: L..BB0_5: # %entry -; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: xoris 3, 4, 32768 ; AIX-NEXT: cmplwi 3, 0 ; AIX-NEXT: bc 12, 2, L..BB0_7 ; AIX-NEXT: # %bb.6: # %entry @@ -137,12 +137,12 @@ define float @f32_maximum(float %a, float %b) { ; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) ; NOVSX-NEXT: .LBB1_4: # %entry ; NOVSX-NEXT: cmpwi 3, 0 -; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: lwz 4, -8(1) ; NOVSX-NEXT: bc 12, 2, .LBB1_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB1_6: # %entry -; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: cmpwi 4, 0 ; NOVSX-NEXT: bc 12, 2, .LBB1_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 2, 1 @@ -159,8 +159,8 @@ define float @f32_maximum(float %a, float %b) { ; ; VSX-LABEL: f32_maximum: ; VSX: # %bb.0: # %entry -; VSX-NEXT: fcmpu 0, 1, 2 ; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: fcmpu 0, 1, 2 ; VSX-NEXT: xscvdpspn 3, 2 ; VSX-NEXT: mffprwz 3, 0 ; VSX-NEXT: bc 12, 3, .LBB1_2 @@ -171,13 +171,13 @@ define float @f32_maximum(float %a, float %b) { ; VSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha ; VSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) ; VSX-NEXT: .LBB1_3: # %entry +; VSX-NEXT: mffprwz 4, 3 ; VSX-NEXT: cmpwi 3, 0 -; VSX-NEXT: mffprwz 3, 3 ; VSX-NEXT: bc 12, 2, .LBB1_5 ; VSX-NEXT: # %bb.4: # %entry ; VSX-NEXT: fmr 1, 0 ; VSX-NEXT: .LBB1_5: # %entry -; VSX-NEXT: cmpwi 3, 0 +; VSX-NEXT: cmpwi 4, 0 ; VSX-NEXT: bc 12, 2, .LBB1_7 ; VSX-NEXT: # %bb.6: # %entry ; VSX-NEXT: fmr 2, 1 @@ -193,8 +193,8 @@ define float @f32_maximum(float %a, float %b) { ; ; AIX-LABEL: f32_maximum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: fcmpu 0, 1, 2 ; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: fcmpu 0, 1, 2 ; AIX-NEXT: xscvdpspn 3, 2 ; AIX-NEXT: mffprwz 3, 0 ; AIX-NEXT: bc 12, 3, L..BB1_2 @@ -205,13 +205,13 @@ define float @f32_maximum(float %a, float %b) { ; AIX-NEXT: ld 4, L..C1(2) # %const.0 ; AIX-NEXT: lfs 0, 0(4) ; AIX-NEXT: L..BB1_3: # %entry +; AIX-NEXT: mffprwz 4, 3 ; AIX-NEXT: cmpwi 3, 0 -; AIX-NEXT: mffprwz 3, 3 ; AIX-NEXT: bc 12, 2, L..BB1_5 ; AIX-NEXT: # %bb.4: # %entry ; AIX-NEXT: fmr 1, 0 ; AIX-NEXT: L..BB1_5: # %entry -; AIX-NEXT: cmpwi 3, 0 +; AIX-NEXT: cmpwi 4, 0 ; AIX-NEXT: bc 12, 2, L..BB1_7 ; AIX-NEXT: # %bb.6: # %entry ; AIX-NEXT: fmr 2, 1 @@ -246,15 +246,15 @@ define double @f64_minimum(double %a, double %b) { ; NOVSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; NOVSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) ; NOVSX-NEXT: .LBB2_4: # %entry -; NOVSX-NEXT: li 4, 1 -; NOVSX-NEXT: rldic 4, 4, 63, 0 -; NOVSX-NEXT: cmpd 3, 4 -; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: li 5, 1 +; NOVSX-NEXT: ld 4, -16(1) +; NOVSX-NEXT: rldic 5, 5, 63, 0 +; NOVSX-NEXT: cmpd 3, 5 ; NOVSX-NEXT: bc 12, 2, .LBB2_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB2_6: # %entry -; NOVSX-NEXT: cmpd 3, 4 +; NOVSX-NEXT: cmpd 4, 5 ; NOVSX-NEXT: bc 12, 2, .LBB2_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 2, 1 @@ -281,15 +281,15 @@ define double @f64_minimum(double %a, double %b) { ; VSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; VSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) ; VSX-NEXT: .LBB2_3: # %entry -; VSX-NEXT: li 4, 1 -; VSX-NEXT: rldic 4, 4, 63, 0 -; VSX-NEXT: cmpd 3, 4 -; VSX-NEXT: mffprd 3, 2 +; VSX-NEXT: li 5, 1 +; VSX-NEXT: mffprd 4, 2 +; VSX-NEXT: rldic 5, 5, 63, 0 +; VSX-NEXT: cmpd 3, 5 ; VSX-NEXT: bc 12, 2, .LBB2_5 ; VSX-NEXT: # %bb.4: # %entry ; VSX-NEXT: fmr 1, 0 ; VSX-NEXT: .LBB2_5: # %entry -; VSX-NEXT: cmpd 3, 4 +; VSX-NEXT: cmpd 4, 5 ; VSX-NEXT: bc 12, 2, .LBB2_7 ; VSX-NEXT: # %bb.6: # %entry ; VSX-NEXT: fmr 2, 1 @@ -315,15 +315,15 @@ define double @f64_minimum(double %a, double %b) { ; AIX-NEXT: ld 4, L..C2(2) # %const.0 ; AIX-NEXT: lfs 0, 0(4) ; AIX-NEXT: L..BB2_3: # %entry -; AIX-NEXT: li 4, 1 -; AIX-NEXT: rldic 4, 4, 63, 0 -; AIX-NEXT: cmpd 3, 4 -; AIX-NEXT: mffprd 3, 2 +; AIX-NEXT: li 5, 1 +; AIX-NEXT: mffprd 4, 2 +; AIX-NEXT: rldic 5, 5, 63, 0 +; AIX-NEXT: cmpd 3, 5 ; AIX-NEXT: bc 12, 2, L..BB2_5 ; AIX-NEXT: # %bb.4: # %entry ; AIX-NEXT: fmr 1, 0 ; AIX-NEXT: L..BB2_5: # %entry -; AIX-NEXT: cmpd 3, 4 +; AIX-NEXT: cmpd 4, 5 ; AIX-NEXT: bc 12, 2, L..BB2_7 ; AIX-NEXT: # %bb.6: # %entry ; AIX-NEXT: fmr 2, 1 @@ -359,12 +359,12 @@ define double @f64_maximum(double %a, double %b) { ; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) ; NOVSX-NEXT: .LBB3_4: # %entry ; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: ld 4, -16(1) ; NOVSX-NEXT: bc 12, 2, .LBB3_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB3_6: # %entry -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: cmpdi 4, 0 ; NOVSX-NEXT: bc 12, 2, .LBB3_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 2, 1 @@ -391,13 +391,13 @@ define double @f64_maximum(double %a, double %b) { ; VSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; VSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) ; VSX-NEXT: .LBB3_3: # %entry +; VSX-NEXT: mffprd 4, 2 ; VSX-NEXT: cmpdi 3, 0 -; VSX-NEXT: mffprd 3, 2 ; VSX-NEXT: bc 12, 2, .LBB3_5 ; VSX-NEXT: # %bb.4: # %entry ; VSX-NEXT: fmr 1, 0 ; VSX-NEXT: .LBB3_5: # %entry -; VSX-NEXT: cmpdi 3, 0 +; VSX-NEXT: cmpdi 4, 0 ; VSX-NEXT: bc 12, 2, .LBB3_7 ; VSX-NEXT: # %bb.6: # %entry ; VSX-NEXT: fmr 2, 1 @@ -423,13 +423,13 @@ define double @f64_maximum(double %a, double %b) { ; AIX-NEXT: ld 4, L..C3(2) # %const.0 ; AIX-NEXT: lfs 0, 0(4) ; AIX-NEXT: L..BB3_3: # %entry +; AIX-NEXT: mffprd 4, 2 ; AIX-NEXT: cmpdi 3, 0 -; AIX-NEXT: mffprd 3, 2 ; AIX-NEXT: bc 12, 2, L..BB3_5 ; AIX-NEXT: # %bb.4: # %entry ; AIX-NEXT: fmr 1, 0 ; AIX-NEXT: L..BB3_5: # %entry -; AIX-NEXT: cmpdi 3, 0 +; AIX-NEXT: cmpdi 4, 0 ; AIX-NEXT: bc 12, 2, L..BB3_7 ; AIX-NEXT: # %bb.6: # %entry ; AIX-NEXT: fmr 2, 1 @@ -450,71 +450,71 @@ entry: define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; NOVSX-LABEL: v4f32_minimum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: vcmpeqfp 5, 3, 3 -; NOVSX-NEXT: vspltisb 4, -1 +; NOVSX-NEXT: vcmpeqfp 0, 3, 3 +; NOVSX-NEXT: vcmpeqfp 1, 2, 2 ; NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; NOVSX-NEXT: vcmpeqfp 0, 2, 2 ; NOVSX-NEXT: addi 3, 3, .LCPI4_0@toc@l -; NOVSX-NEXT: vcmpgtfp 1, 3, 2 -; NOVSX-NEXT: vslw 4, 4, 4 -; NOVSX-NEXT: vnot 5, 5 ; NOVSX-NEXT: vnot 0, 0 -; NOVSX-NEXT: vsel 1, 3, 2, 1 -; NOVSX-NEXT: vor 5, 0, 5 -; NOVSX-NEXT: lvx 0, 0, 3 -; NOVSX-NEXT: vsel 5, 1, 0, 5 +; NOVSX-NEXT: vnot 1, 1 +; NOVSX-NEXT: vspltisb 4, -1 +; NOVSX-NEXT: vcmpgtfp 5, 3, 2 +; NOVSX-NEXT: vslw 4, 4, 4 +; NOVSX-NEXT: vor 0, 1, 0 +; NOVSX-NEXT: lvx 1, 0, 3 +; NOVSX-NEXT: vsel 5, 3, 2, 5 +; NOVSX-NEXT: vsel 5, 5, 1, 0 ; NOVSX-NEXT: vcmpequw 0, 2, 4 ; NOVSX-NEXT: vcmpequw 4, 3, 4 ; NOVSX-NEXT: vsel 2, 5, 2, 0 -; NOVSX-NEXT: vxor 0, 0, 0 ; NOVSX-NEXT: vsel 2, 2, 3, 4 -; NOVSX-NEXT: vcmpeqfp 3, 5, 0 +; NOVSX-NEXT: vxor 3, 3, 3 +; NOVSX-NEXT: vcmpeqfp 3, 5, 3 ; NOVSX-NEXT: vsel 2, 5, 2, 3 ; NOVSX-NEXT: blr ; ; VSX-LABEL: v4f32_minimum: ; VSX: # %bb.0: # %entry -; VSX-NEXT: xxleqv 36, 36, 36 +; VSX-NEXT: xvcmpeqsp 1, 35, 35 +; VSX-NEXT: xvcmpeqsp 2, 34, 34 ; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; VSX-NEXT: xvcmpeqsp 0, 35, 35 -; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l -; VSX-NEXT: xvcmpeqsp 1, 34, 34 -; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxleqv 36, 36, 36 +; VSX-NEXT: xvminsp 0, 34, 35 ; VSX-NEXT: vslw 4, 4, 4 -; VSX-NEXT: xvminsp 2, 34, 35 -; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l ; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: xxlnor 2, 2, 2 ; VSX-NEXT: vcmpequw 5, 2, 4 -; VSX-NEXT: xxlor 0, 1, 0 -; VSX-NEXT: vcmpequw 4, 3, 4 -; VSX-NEXT: xxsel 0, 2, 3, 0 -; VSX-NEXT: xxlxor 1, 1, 1 -; VSX-NEXT: xxsel 2, 0, 34, 37 -; VSX-NEXT: xvcmpeqsp 1, 0, 1 -; VSX-NEXT: xxsel 2, 2, 35, 36 -; VSX-NEXT: xxsel 34, 0, 2, 1 +; VSX-NEXT: xxlor 1, 2, 1 +; VSX-NEXT: lxvd2x 2, 0, 3 +; VSX-NEXT: xxsel 0, 0, 2, 1 +; VSX-NEXT: xxlxor 2, 2, 2 +; VSX-NEXT: xvcmpeqsp 2, 0, 2 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: vcmpequw 2, 3, 4 +; VSX-NEXT: xxsel 1, 1, 35, 34 +; VSX-NEXT: xxsel 34, 0, 1, 2 ; VSX-NEXT: blr ; ; AIX-LABEL: v4f32_minimum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: xxleqv 36, 36, 36 +; AIX-NEXT: xvcmpeqsp 1, 35, 35 +; AIX-NEXT: xvcmpeqsp 2, 34, 34 ; AIX-NEXT: ld 3, L..C4(2) # %const.0 -; AIX-NEXT: xvcmpeqsp 0, 35, 35 -; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: xxleqv 36, 36, 36 +; AIX-NEXT: xvminsp 0, 34, 35 ; AIX-NEXT: vslw 4, 4, 4 -; AIX-NEXT: lxvw4x 3, 0, 3 -; AIX-NEXT: xvminsp 2, 34, 35 -; AIX-NEXT: xxlnor 0, 0, 0 ; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: xxlnor 2, 2, 2 ; AIX-NEXT: vcmpequw 5, 2, 4 -; AIX-NEXT: xxlor 0, 1, 0 -; AIX-NEXT: vcmpequw 4, 3, 4 -; AIX-NEXT: xxsel 0, 2, 3, 0 -; AIX-NEXT: xxlxor 1, 1, 1 -; AIX-NEXT: xxsel 2, 0, 34, 37 -; AIX-NEXT: xvcmpeqsp 1, 0, 1 -; AIX-NEXT: xxsel 2, 2, 35, 36 -; AIX-NEXT: xxsel 34, 0, 2, 1 +; AIX-NEXT: xxlor 1, 2, 1 +; AIX-NEXT: lxvw4x 2, 0, 3 +; AIX-NEXT: xxsel 0, 0, 2, 1 +; AIX-NEXT: xxlxor 2, 2, 2 +; AIX-NEXT: xvcmpeqsp 2, 0, 2 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: vcmpequw 2, 3, 4 +; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xxsel 34, 0, 1, 2 ; AIX-NEXT: blr entry: %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) @@ -524,18 +524,18 @@ entry: define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; NOVSX-LABEL: v4f32_maximum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: vcmpeqfp 4, 3, 3 +; NOVSX-NEXT: vcmpeqfp 5, 3, 3 +; NOVSX-NEXT: vcmpeqfp 0, 2, 2 ; NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; NOVSX-NEXT: vcmpeqfp 5, 2, 2 ; NOVSX-NEXT: addi 3, 3, .LCPI5_0@toc@l -; NOVSX-NEXT: vcmpgtfp 0, 2, 3 -; NOVSX-NEXT: lvx 1, 0, 3 -; NOVSX-NEXT: vnot 4, 4 ; NOVSX-NEXT: vnot 5, 5 -; NOVSX-NEXT: vsel 0, 3, 2, 0 -; NOVSX-NEXT: vor 4, 5, 4 +; NOVSX-NEXT: vnot 0, 0 +; NOVSX-NEXT: vcmpgtfp 4, 2, 3 +; NOVSX-NEXT: vor 5, 0, 5 +; NOVSX-NEXT: lvx 0, 0, 3 +; NOVSX-NEXT: vsel 4, 3, 2, 4 +; NOVSX-NEXT: vsel 4, 4, 0, 5 ; NOVSX-NEXT: vxor 5, 5, 5 -; NOVSX-NEXT: vsel 4, 0, 1, 4 ; NOVSX-NEXT: vcmpequw 0, 2, 5 ; NOVSX-NEXT: vsel 2, 4, 2, 0 ; NOVSX-NEXT: vcmpequw 0, 3, 5 @@ -546,42 +546,42 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; ; VSX-LABEL: v4f32_maximum: ; VSX: # %bb.0: # %entry -; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: xvcmpeqsp 1, 35, 35 +; VSX-NEXT: xvcmpeqsp 2, 34, 34 ; VSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; VSX-NEXT: xvcmpeqsp 1, 34, 34 ; VSX-NEXT: addi 3, 3, .LCPI5_0@toc@l -; VSX-NEXT: xvmaxsp 2, 34, 35 -; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: xxlnor 2, 2, 2 +; VSX-NEXT: xvmaxsp 0, 34, 35 ; VSX-NEXT: xxlxor 36, 36, 36 ; VSX-NEXT: vcmpequw 5, 2, 4 -; VSX-NEXT: xxlnor 0, 0, 0 -; VSX-NEXT: xxlnor 1, 1, 1 -; VSX-NEXT: vcmpequw 0, 3, 4 -; VSX-NEXT: xxlor 0, 1, 0 -; VSX-NEXT: xxsel 0, 2, 3, 0 -; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: xxlor 1, 2, 1 +; VSX-NEXT: lxvd2x 2, 0, 3 +; VSX-NEXT: xxsel 0, 0, 2, 1 ; VSX-NEXT: xvcmpeqsp 2, 0, 36 -; VSX-NEXT: xxsel 1, 1, 35, 32 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: vcmpequw 2, 3, 4 +; VSX-NEXT: xxsel 1, 1, 35, 34 ; VSX-NEXT: xxsel 34, 0, 1, 2 ; VSX-NEXT: blr ; ; AIX-LABEL: v4f32_maximum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: xvcmpeqsp 1, 35, 35 +; AIX-NEXT: xvcmpeqsp 2, 34, 34 ; AIX-NEXT: ld 3, L..C5(2) # %const.0 -; AIX-NEXT: xvcmpeqsp 1, 34, 34 -; AIX-NEXT: xvmaxsp 2, 34, 35 +; AIX-NEXT: xvmaxsp 0, 34, 35 ; AIX-NEXT: xxlxor 36, 36, 36 -; AIX-NEXT: lxvw4x 3, 0, 3 -; AIX-NEXT: vcmpequw 5, 2, 4 -; AIX-NEXT: xxlnor 0, 0, 0 ; AIX-NEXT: xxlnor 1, 1, 1 -; AIX-NEXT: vcmpequw 0, 3, 4 -; AIX-NEXT: xxlor 0, 1, 0 -; AIX-NEXT: xxsel 0, 2, 3, 0 -; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: xxlnor 2, 2, 2 +; AIX-NEXT: vcmpequw 5, 2, 4 +; AIX-NEXT: xxlor 1, 2, 1 +; AIX-NEXT: lxvw4x 2, 0, 3 +; AIX-NEXT: xxsel 0, 0, 2, 1 ; AIX-NEXT: xvcmpeqsp 2, 0, 36 -; AIX-NEXT: xxsel 1, 1, 35, 32 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: vcmpequw 2, 3, 4 +; AIX-NEXT: xxsel 1, 1, 35, 34 ; AIX-NEXT: xxsel 34, 0, 1, 2 ; AIX-NEXT: blr entry: @@ -611,14 +611,14 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; NOVSX-NEXT: fmr 5, 6 ; NOVSX-NEXT: .LBB6_4: # %entry ; NOVSX-NEXT: li 3, 1 +; NOVSX-NEXT: ld 5, -32(1) ; NOVSX-NEXT: rldic 3, 3, 63, 0 ; NOVSX-NEXT: cmpd 4, 3 -; NOVSX-NEXT: ld 4, -32(1) ; NOVSX-NEXT: bc 12, 2, .LBB6_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 5 ; NOVSX-NEXT: .LBB6_6: # %entry -; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: cmpd 5, 3 ; NOVSX-NEXT: bc 12, 2, .LBB6_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 3, 1 @@ -636,12 +636,12 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; NOVSX-NEXT: # %bb.11: # %entry ; NOVSX-NEXT: fmr 5, 4 ; NOVSX-NEXT: .LBB6_12: # %entry -; NOVSX-NEXT: ld 4, -8(1) +; NOVSX-NEXT: ld 5, -8(1) ; NOVSX-NEXT: bc 12, 3, .LBB6_14 ; NOVSX-NEXT: # %bb.13: # %entry ; NOVSX-NEXT: fmr 0, 5 ; NOVSX-NEXT: .LBB6_14: # %entry -; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: cmpd 5, 3 ; NOVSX-NEXT: ld 4, -16(1) ; NOVSX-NEXT: bc 4, 2, .LBB6_19 ; NOVSX-NEXT: # %bb.15: # %entry @@ -668,47 +668,47 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; ; VSX-LABEL: v2f64_minimum: ; VSX: # %bb.0: # %entry +; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; VSX-NEXT: xvcmpeqdp 36, 35, 35 -; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha ; VSX-NEXT: xvcmpeqdp 37, 34, 34 -; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l -; VSX-NEXT: xvmindp 0, 34, 35 -; VSX-NEXT: lxvd2x 32, 0, 3 -; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l -; VSX-NEXT: lxvd2x 1, 0, 3 -; VSX-NEXT: vcmpequd 1, 2, 0 ; VSX-NEXT: xxlnor 36, 36, 36 ; VSX-NEXT: xxlnor 37, 37, 37 -; VSX-NEXT: xxlor 2, 37, 36 -; VSX-NEXT: vcmpequd 4, 3, 0 -; VSX-NEXT: xxsel 0, 0, 1, 2 -; VSX-NEXT: xxlxor 1, 1, 1 -; VSX-NEXT: xxsel 2, 0, 34, 33 -; VSX-NEXT: xvcmpeqdp 34, 0, 1 -; VSX-NEXT: xxsel 1, 2, 35, 36 +; VSX-NEXT: xvmindp 0, 34, 35 +; VSX-NEXT: lxvd2x 2, 0, 3 +; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha +; VSX-NEXT: xxlor 1, 37, 36 +; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l +; VSX-NEXT: lxvd2x 36, 0, 3 +; VSX-NEXT: vcmpequd 5, 2, 4 +; VSX-NEXT: xxsel 0, 0, 2, 1 +; VSX-NEXT: xxlxor 2, 2, 2 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: vcmpequd 2, 3, 4 +; VSX-NEXT: xxsel 1, 1, 35, 34 +; VSX-NEXT: xvcmpeqdp 34, 0, 2 ; VSX-NEXT: xxsel 34, 0, 1, 34 ; VSX-NEXT: blr ; ; AIX-LABEL: v2f64_minimum: ; AIX: # %bb.0: # %entry +; AIX-NEXT: ld 3, L..C6(2) # %const.0 ; AIX-NEXT: xvcmpeqdp 36, 35, 35 -; AIX-NEXT: ld 3, L..C6(2) # %const.1 ; AIX-NEXT: xvcmpeqdp 37, 34, 34 -; AIX-NEXT: xvmindp 0, 34, 35 -; AIX-NEXT: lxvd2x 32, 0, 3 -; AIX-NEXT: ld 3, L..C7(2) # %const.0 +; AIX-NEXT: lxvd2x 2, 0, 3 +; AIX-NEXT: ld 3, L..C7(2) # %const.1 ; AIX-NEXT: xxlnor 36, 36, 36 -; AIX-NEXT: lxvd2x 1, 0, 3 ; AIX-NEXT: xxlnor 37, 37, 37 -; AIX-NEXT: vcmpequd 1, 2, 0 -; AIX-NEXT: xxlor 2, 37, 36 -; AIX-NEXT: vcmpequd 4, 3, 0 -; AIX-NEXT: xxsel 0, 0, 1, 2 -; AIX-NEXT: xxlxor 1, 1, 1 -; AIX-NEXT: xxsel 2, 0, 34, 33 -; AIX-NEXT: xvcmpeqdp 34, 0, 1 -; AIX-NEXT: xxsel 1, 2, 35, 36 +; AIX-NEXT: xvmindp 0, 34, 35 +; AIX-NEXT: xxlor 1, 37, 36 +; AIX-NEXT: lxvd2x 36, 0, 3 +; AIX-NEXT: vcmpequd 5, 2, 4 +; AIX-NEXT: xxsel 0, 0, 2, 1 +; AIX-NEXT: xxlxor 2, 2, 2 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: vcmpequd 2, 3, 4 +; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xvcmpeqdp 34, 0, 2 ; AIX-NEXT: xxsel 34, 0, 1, 34 ; AIX-NEXT: blr entry: @@ -729,21 +729,21 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr 6, 3 ; NOVSX-NEXT: .LBB7_2: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; NOVSX-NEXT: addis 4, 2, .LCPI7_0@toc@ha ; NOVSX-NEXT: ld 3, -24(1) +; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(4) ; NOVSX-NEXT: fmr 5, 0 ; NOVSX-NEXT: bc 12, 3, .LBB7_4 ; NOVSX-NEXT: # %bb.3: # %entry ; NOVSX-NEXT: fmr 5, 6 ; NOVSX-NEXT: .LBB7_4: # %entry ; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: ld 3, -32(1) +; NOVSX-NEXT: ld 4, -32(1) ; NOVSX-NEXT: bc 12, 2, .LBB7_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 5 ; NOVSX-NEXT: .LBB7_6: # %entry -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: cmpdi 4, 0 ; NOVSX-NEXT: bc 12, 2, .LBB7_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 3, 1 @@ -761,12 +761,12 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; NOVSX-NEXT: # %bb.11: # %entry ; NOVSX-NEXT: fmr 5, 4 ; NOVSX-NEXT: .LBB7_12: # %entry -; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: ld 4, -8(1) ; NOVSX-NEXT: bc 12, 3, .LBB7_14 ; NOVSX-NEXT: # %bb.13: # %entry ; NOVSX-NEXT: fmr 0, 5 ; NOVSX-NEXT: .LBB7_14: # %entry -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: cmpdi 4, 0 ; NOVSX-NEXT: ld 3, -16(1) ; NOVSX-NEXT: bc 4, 2, .LBB7_19 ; NOVSX-NEXT: # %bb.15: # %entry @@ -793,42 +793,42 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; ; VSX-LABEL: v2f64_maximum: ; VSX: # %bb.0: # %entry -; VSX-NEXT: xvcmpeqdp 37, 35, 35 ; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; VSX-NEXT: xvcmpeqdp 32, 34, 34 +; VSX-NEXT: xvcmpeqdp 36, 35, 35 +; VSX-NEXT: xvcmpeqdp 37, 34, 34 ; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l +; VSX-NEXT: xxlnor 36, 36, 36 +; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xvmaxdp 0, 34, 35 -; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: lxvd2x 2, 0, 3 +; VSX-NEXT: xxlor 1, 37, 36 ; VSX-NEXT: xxlxor 36, 36, 36 -; VSX-NEXT: vcmpequd 1, 2, 4 -; VSX-NEXT: xxlnor 37, 37, 37 -; VSX-NEXT: xxlnor 32, 32, 32 -; VSX-NEXT: xxlor 2, 32, 37 -; VSX-NEXT: vcmpequd 5, 3, 4 -; VSX-NEXT: xxsel 0, 0, 1, 2 -; VSX-NEXT: xxsel 1, 0, 34, 33 +; VSX-NEXT: vcmpequd 5, 2, 4 +; VSX-NEXT: xxsel 0, 0, 2, 1 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: vcmpequd 2, 3, 4 +; VSX-NEXT: xxsel 1, 1, 35, 34 ; VSX-NEXT: xvcmpeqdp 34, 0, 36 -; VSX-NEXT: xxsel 1, 1, 35, 37 ; VSX-NEXT: xxsel 34, 0, 1, 34 ; VSX-NEXT: blr ; ; AIX-LABEL: v2f64_maximum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: xvcmpeqdp 36, 35, 35 ; AIX-NEXT: ld 3, L..C8(2) # %const.0 +; AIX-NEXT: xvcmpeqdp 36, 35, 35 ; AIX-NEXT: xvcmpeqdp 37, 34, 34 -; AIX-NEXT: xvmaxdp 0, 34, 35 -; AIX-NEXT: xxlxor 32, 32, 32 -; AIX-NEXT: lxvd2x 1, 0, 3 -; AIX-NEXT: vcmpequd 1, 2, 0 +; AIX-NEXT: lxvd2x 2, 0, 3 ; AIX-NEXT: xxlnor 36, 36, 36 ; AIX-NEXT: xxlnor 37, 37, 37 -; AIX-NEXT: xxlor 2, 37, 36 -; AIX-NEXT: vcmpequd 4, 3, 0 -; AIX-NEXT: xxsel 0, 0, 1, 2 -; AIX-NEXT: xxsel 1, 0, 34, 33 -; AIX-NEXT: xvcmpeqdp 34, 0, 32 -; AIX-NEXT: xxsel 1, 1, 35, 36 +; AIX-NEXT: xvmaxdp 0, 34, 35 +; AIX-NEXT: xxlor 1, 37, 36 +; AIX-NEXT: xxlxor 36, 36, 36 +; AIX-NEXT: vcmpequd 5, 2, 4 +; AIX-NEXT: xxsel 0, 0, 2, 1 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: vcmpequd 2, 3, 4 +; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xvcmpeqdp 34, 0, 36 ; AIX-NEXT: xxsel 34, 0, 1, 34 ; AIX-NEXT: blr entry: >From 1a5aa06efab33fc6f614729b727950d0d0b6f9ff Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 6 Nov 2023 17:22:41 +0800 Subject: [PATCH 3/5] Address comments --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 327d8061030810..acba34f4ba15a6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8270,11 +8270,6 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, unsigned Opc = N->getOpcode(); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); - bool NoNaN = (N->getFlags().hasNoNaNs() || - (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))); - bool NoZeroSign = - (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) || - DAG.isKnownNeverZeroFloat(RHS)); bool IsMax = Opc == ISD::FMAXIMUM; if (VT.isVector() && @@ -8285,19 +8280,21 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, // available, use plain select with setcc instead. SDValue MinMax; if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, - VT)) + VT)) { MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT, LHS, RHS); - else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT)) + } else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, + VT)) { MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS); - else - MinMax = DAG.getSelect( - DL, VT, - DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS, - RHS); + } else { + SDValue Compare = + DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); + MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS); + } // Propagate any NaN of both operands - if (!NoNaN) { + if (!N->getFlags().hasNoNaNs() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { ConstantFP *FPNaN = ConstantFP::get( *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), @@ -8305,7 +8302,8 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, } // fminimum/fmaximum requires -0.0 less than +0.0 - if (!NoZeroSign) { + if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(LHS) && + !DAG.isKnownNeverZeroFloat(RHS)) { SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); SDValue TestZero = >From 2ec61a1ecf6a294faf6776d152be08a27314b591 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 19 Dec 2023 11:04:26 +0800 Subject: [PATCH 4/5] Fix style --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 594362714332c0..01b2e8d18afb93 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -539,8 +539,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtZfa()) { setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); - } else + } else { setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); + } } if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) >From 8f897835d79f861d6d607beb42163d0425c39b6b Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Wed, 20 Dec 2023 14:50:59 +0800 Subject: [PATCH 5/5] Codestyle refactor --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 931b349bcfb25b..3b928f8bd9d694 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8327,14 +8327,14 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, // First, implement comparison not propagating NaN. If no native fmin or fmax // available, use plain select with setcc instead. SDValue MinMax; - if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, - VT)) { - MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT, - LHS, RHS); - } else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, - VT)) { - MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS); + unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; + unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM; + if (isOperationLegalOrCustom(CompOpcIeee, VT)) { + MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS); + } else if (isOperationLegalOrCustom(CompOpc, VT)) { + MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS); } else { + // NaN (if exists) will be propagated later, so orderness doesn't matter. SDValue Compare = DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS); _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits