llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) <details> <summary>Changes</summary> --- Patch is 404.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114517.diff 4 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+100-3) - (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+5) - (modified) llvm/test/CodeGen/RISCV/memcmp-optsize.ll (+920-530) - (modified) llvm/test/CodeGen/RISCV/memcmp.ll (+4570-1843) ``````````diff diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3b3f8772a08940..89b4f22a1260db 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a +/// recognizable memcmp expansion. +static bool isOrXorXorTree(SDValue X, bool Root = true) { + if (X.getOpcode() == ISD::OR) + return isOrXorXorTree(X.getOperand(0), false) && + isOrXorXorTree(X.getOperand(1), false); + if (Root) + return false; + return X.getOpcode() == ISD::XOR; +} + +/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp +/// expansion. +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, + EVT VecVT, EVT CmpVT) { + SDValue Op0 = X.getOperand(0); + SDValue Op1 = X.getOperand(1); + if (X.getOpcode() == ISD::OR) { + SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT); + SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT); + if (VecVT != CmpVT) + return DAG.getNode(ISD::OR, DL, CmpVT, A, B); + return DAG.getNode(ISD::AND, DL, CmpVT, A, B); + } + if (X.getOpcode() == ISD::XOR) { + SDValue A = DAG.getBitcast(VecVT, Op0); + SDValue B = DAG.getBitcast(VecVT, Op1); + if (VecVT != CmpVT) + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + } + llvm_unreachable("Impossible"); +} + +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, + const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + EVT OpVT = X.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned OpSize = OpVT.getSizeInBits(); + + // We're looking for an oversized integer equality comparison. + if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() || + OpSize < Subtarget.getRealMinVLen() || + OpSize > Subtarget.getRealMinVLen() * 8) + return SDValue(); + + bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); + if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { + X = peekThroughBitcasts(X); + return isa<ConstantSDNode>(X) || X.getValueType().isVector() || + X.getOpcode() == ISD::LOAD; + }; + if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && + !IsOrXorXorTreeCCZero) + return SDValue(); + + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) { + unsigned VecSize = OpSize / 8; + EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize); + EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize); + + SDValue Cmp; + if (IsOrXorXorTreeCCZero) { + Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT); + } else { + SDValue VecX = DAG.getBitcast(VecVT, X); + SDValue VecY = DAG.getBitcast(VecVT, Y); + Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); + } + return DAG.getSetCC(DL, VT, + DAG.getNode(ISD::VECREDUCE_AND, DL, XLenVT, Cmp), + DAG.getConstant(0, DL, XLenVT), CC); + } + + return SDValue(); +} + // Replace (seteq (i64 (and X, 0xffffffff)), C1) with // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg // can become a sext.w instead of a shift pair. static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // Looking for an equality compare. + ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); + if (Cond == ISD::SETNE || Cond == ISD::SETEQ) { + if (SDValue V = combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, + Subtarget)) + return V; + } + if (OpVT != MVT::i64 || !Subtarget.is64Bit()) return SDValue(); @@ -14499,8 +14599,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) return SDValue(); - // Looking for an equality compare. - ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); if (!isIntEqualitySetCC(Cond)) return SDValue(); @@ -14512,7 +14610,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, const APInt &C1 = N1C->getAPIntValue(); - SDLoc dl(N); // If the constant is larger than 2^32 - 1 it is impossible for both sides // to be equal. if (C1.getActiveBits() > 32) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 5f5a18e2868730..d7b05001185f32 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2504,5 +2504,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {8, 4, 2, 1}; else Options.LoadSizes = {4, 2, 1}; + if (IsZeroCmp && ST->hasVInstructions()) { + unsigned RealMinVLen = ST->getRealMinVLen() / 8; + for (int LMUL = 1; LMUL <= 8; LMUL *= 2) + Options.LoadSizes.insert(Options.LoadSizes.begin(), RealMinVLen * LMUL); + } return Options; } diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll index 06fb88b02ea4a6..ba702b4921f098 100644 --- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll +++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll @@ -2910,190 +2910,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_16: ; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 3(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a1) -; CHECK-ALIGNED-RV32-V-NEXT: or a2, a4, a2 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 3(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a5, a3 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a6, a4 -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 4(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 5(a0) -; CHECK-ALIGNED-RV32-V-NEXT: xor a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 6(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 7(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a6, a3 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 4(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 5(a1) -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a3, a4 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 6(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 7(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a5, a6, a5 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a7, a4 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 8(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 9(a0) -; CHECK-ALIGNED-RV32-V-NEXT: xor a3, a3, a4 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 10(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 11(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a5, a6, a5 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a7, a4 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 8(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 9(a1) -; CHECK-ALIGNED-RV32-V-NEXT: or a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 10(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu t0, 11(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a6, a7, a6 -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli t0, t0, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a5, t0, a5 -; CHECK-ALIGNED-RV32-V-NEXT: or a5, a5, a6 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 12(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 13(a0) -; CHECK-ALIGNED-RV32-V-NEXT: xor a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 14(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 15(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a6, a7, a6 -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a5 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 12(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a7, 13(a1) -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a6 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a6, 14(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 15(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a5, a7, a5 -; CHECK-ALIGNED-RV32-V-NEXT: slli a6, a6, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a6 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a5 -; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-ALIGNED-RV32-V-NEXT: or a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a4, a0 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a2, a0 -; CHECK-ALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-ALIGNED-RV32-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-ALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-ALIGNED-RV32-V-NEXT: vmseq.vv v8, v8, v9 +; CHECK-ALIGNED-RV32-V-NEXT: vmnot.m v8, v8 +; CHECK-ALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-ALIGNED-RV32-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-V-NEXT: ret ; ; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_16: ; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 1(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 0(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 3(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a2, a2, a3 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 4(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 5(a0) -; CHECK-ALIGNED-RV64-V-NEXT: or a2, a4, a2 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 6(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 7(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a5, a3 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a6, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 32 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 0(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 1(a1) -; CHECK-ALIGNED-RV64-V-NEXT: or a2, a3, a2 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 3(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 4(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 5(a1) -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 6(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a7, 7(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a7, a7, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a7, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a4, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 32 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 8(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 9(a0) -; CHECK-ALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 10(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 11(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 12(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 13(a0) -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 14(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a0, 15(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 32 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 8(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 9(a1) -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 10(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 11(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a4, a5, a4 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a6, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 12(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a6, 13(a1) -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a3, a4 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 14(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a1, 15(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a6, a6, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a5, a6, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a5 -; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 32 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a3 -; CHECK-ALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a2, a0 -; CHECK-ALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-ALIGNED-RV64-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-ALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-ALIGNED-RV64-V-NEXT: vmseq.vv v8, v8, v9 +; CHECK-ALIGNED-RV64-V-NEXT: vmnot.m v8, v8 +; CHECK-ALIGNED-RV64-V-NEXT: vcpop.m a0, v8 +; CHECK-ALIGNED-RV64-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; ; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_16: @@ -3194,34 +3028,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_16: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a4, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmseq.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vmnot.m v8, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: seqz a0, a0 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_16: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmseq.vv v8, v8, v9 +; CHECK-UNALIGNED-RV64-V-NEXT: vmnot.m v8, v8 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV64-V-NEXT: seqz a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 16) @@ -3229,15 +3053,15 @@ entry: } define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { -; CHECK-RV32-LABEL: bcmp_size_31: -; CHECK-RV32: # %bb.0: # %entry -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT: li a2, 31 -; CHECK-RV32-NEXT: call bcmp -; CHECK-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; CHECK-RV32-NEXT: addi sp, sp, 16 -; CHECK-RV32-NEXT: ret +; CHECK-ALIGNED-RV32-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV32: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, -16 +; CHECK-ALIGNED-RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-ALIGNED-RV32-NEXT: li a2, 31 +; CHECK-ALIGNED-RV32-NEXT: call bcmp +; CHECK-ALIGNED-RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-ALIGNED-RV32-NEXT: addi sp, sp, 16 +; CHECK-ALIGNED-RV32-NEXT: ret ; ; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31: ; CHECK-ALIGNED-RV64: # %bb.0: # %entry @@ -3249,6 +3073,16 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-NEXT: ret ; +; CHECK-ALIGNED-RV32-ZBB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-ALIGNED-RV32-ZBB-NEXT: addi sp, sp, -16 +; CHECK-ALIGNED-RV32-ZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-ALIGNED-RV32-ZBB-NEXT: li a2, 31 +; CHECK-ALIGNED-RV32-ZBB-NEXT: call bcmp +; CHECK-ALIGNED-RV32-ZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-ALIGNED-RV32-ZBB-NEXT: addi sp, sp, 16 +; CHECK-ALIGNED-RV32-ZBB-NEXT: ret +; ; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31: ; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry ; CHECK... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/114517 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits