[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a +/// recognizable memcmp expansion. +static bool isOrXorXorTree(SDValue X, bool Root = true) { + if (X.getOpcode() == ISD::OR) +return isOrXorXorTree(X.getOperand(0), false) && + isOrXorXorTree(X.getOperand(1), false); + if (Root) +return false; + return X.getOpcode() == ISD::XOR; +} + +/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp +/// expansion. +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, +EVT VecVT, EVT CmpVT) { + SDValue Op0 = X.getOperand(0); + SDValue Op1 = X.getOperand(1); + if (X.getOpcode() == ISD::OR) { +SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT); +SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT); +if (VecVT != CmpVT) + return DAG.getNode(ISD::OR, DL, CmpVT, A, B); +return DAG.getNode(ISD::AND, DL, CmpVT, A, B); + } + if (X.getOpcode() == ISD::XOR) { +SDValue A = DAG.getBitcast(VecVT, Op0); +SDValue B = DAG.getBitcast(VecVT, Op1); +if (VecVT != CmpVT) + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); +return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + } + llvm_unreachable("Impossible"); +} + +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + EVT OpVT = X.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned OpSize = OpVT.getSizeInBits(); + + // We're looking for an oversized integer equality comparison. + if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() || + OpSize < Subtarget.getRealMinVLen() || + OpSize > Subtarget.getRealMinVLen() * 8) topperc wrote: 8 here should be `Subtarget.getMaxLMULForFixedLengthVectors()` I think https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -3186,190 +3186,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { ; ; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_16: ; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-V-NEXT:lbu a2, 1(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 0(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 3(a0) -; CHECK-ALIGNED-RV32-V-NEXT:slli a2, a2, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a5, a4 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 0(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 1(a1) -; CHECK-ALIGNED-RV32-V-NEXT:or a2, a4, a2 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 2(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 3(a1) -; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a3, a5, a3 -; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a6, a4 -; CHECK-ALIGNED-RV32-V-NEXT:or a3, a4, a3 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 4(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 5(a0) -; CHECK-ALIGNED-RV32-V-NEXT:xor a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 6(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 7(a0) -; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a5, a4 -; CHECK-ALIGNED-RV32-V-NEXT:slli a3, a3, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a3, a6, a3 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 4(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 5(a1) -; CHECK-ALIGNED-RV32-V-NEXT:or a3, a3, a4 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 6(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 7(a1) -; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a5, a6, a5 -; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a7, a4 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 8(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 9(a0) -; CHECK-ALIGNED-RV32-V-NEXT:xor a3, a3, a4 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 10(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 11(a0) -; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a5, a6, a5 -; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a7, a4 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 8(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 9(a1) -; CHECK-ALIGNED-RV32-V-NEXT:or a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 10(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu t0, 11(a1) -; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a6, a7, a6 -; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli t0, t0, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a5, t0, a5 -; CHECK-ALIGNED-RV32-V-NEXT:or a5, a5, a6 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 12(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 13(a0) -; CHECK-ALIGNED-RV32-V-NEXT:xor a4, a4, a5 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 14(a0) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a0, 15(a0) -; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a6, a7, a6 -; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a0, a0, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a0, a0, a5 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 12(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 13(a1) -; CHECK-ALIGNED-RV32-V-NEXT:or a0, a0, a6 -; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 14(a1) -; CHECK-ALIGNED-RV32-V-NEXT:lbu a1, 15(a1) -; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8 -; CHECK-ALIGNED-RV32-V-NEXT:or a5, a7, a5 -; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 16 -; CHECK-ALIGNED-RV32-V-NEXT:slli a1, a1, 24 -; CHECK-ALIGNED-RV32-V-NEXT:or a1, a1, a6 -; CHECK-ALIGNED-RV32-V-NEXT:or a1, a1, a5 -; CHECK-ALIGNED-RV32-V-NEXT:xor a0, a0, a1 -; CHECK-ALIGNED-RV32-V-NEXT:or a2, a2, a3 -; CHECK-ALIGNED-RV32-V-NEXT:or a0, a4, a0 -; CHECK-ALIGNED-RV32-V-NEXT:or a0, a2, a0 -; CHECK-ALIGNED-RV32-V-NEXT:snez a0, a0 +; CHECK-ALIGNED-RV32-V-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ALIGNED-RV32-V-NEXT:vle8.v v8, (a0) +; CHECK-ALIGNED-RV32-V-NEXT:vle8.v v9, (a1) +; CHECK-ALIGNED-RV32-V-NEXT:vmseq.vv v8, v8, v9 +; CHECK-ALIGNED-RV32-V-NEXT:vmnot.m v8, v8 topperc wrote: Missing combine to use vmsne? https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a +/// recognizable memcmp expansion. +static bool isOrXorXorTree(SDValue X, bool Root = true) { + if (X.getOpcode() == ISD::OR) +return isOrXorXorTree(X.getOperand(0), false) && + isOrXorXorTree(X.getOperand(1), false); + if (Root) +return false; + return X.getOpcode() == ISD::XOR; +} + +/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp +/// expansion. +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, +EVT VecVT, EVT CmpVT) { + SDValue Op0 = X.getOperand(0); + SDValue Op1 = X.getOperand(1); + if (X.getOpcode() == ISD::OR) { +SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT); +SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT); +if (VecVT != CmpVT) + return DAG.getNode(ISD::OR, DL, CmpVT, A, B); +return DAG.getNode(ISD::AND, DL, CmpVT, A, B); + } + if (X.getOpcode() == ISD::XOR) { +SDValue A = DAG.getBitcast(VecVT, Op0); +SDValue B = DAG.getBitcast(VecVT, Op1); +if (VecVT != CmpVT) + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); +return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + } + llvm_unreachable("Impossible"); +} + +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + EVT OpVT = X.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned OpSize = OpVT.getSizeInBits(); + + // We're looking for an oversized integer equality comparison. + if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() || + OpSize < Subtarget.getRealMinVLen() || + OpSize > Subtarget.getRealMinVLen() * 8) +return SDValue(); + + bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); + if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) +return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { +X = peekThroughBitcasts(X); +return isa(X) || X.getValueType().isVector() || + X.getOpcode() == ISD::LOAD; + }; + if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && + !IsOrXorXorTreeCCZero) +return SDValue(); + + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) { topperc wrote: Isn't `Subtarget.hasVInstructions()` already checked earlier? https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -2504,5 +2504,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {8, 4, 2, 1}; else Options.LoadSizes = {4, 2, 1}; + if (IsZeroCmp && ST->hasVInstructions()) { +unsigned RealMinVLen = ST->getRealMinVLen() / 8; +for (int LMUL = 1; LMUL <= 8; LMUL *= 2) topperc wrote: Why do we want to limit this to LMUL>=1? Shouldn't we be able to do this for small vectors even when VLEN is large? https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a +/// recognizable memcmp expansion. +static bool isOrXorXorTree(SDValue X, bool Root = true) { + if (X.getOpcode() == ISD::OR) +return isOrXorXorTree(X.getOperand(0), false) && + isOrXorXorTree(X.getOperand(1), false); + if (Root) +return false; + return X.getOpcode() == ISD::XOR; +} + +/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp +/// expansion. +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, +EVT VecVT, EVT CmpVT) { + SDValue Op0 = X.getOperand(0); + SDValue Op1 = X.getOperand(1); + if (X.getOpcode() == ISD::OR) { +SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT); +SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT); +if (VecVT != CmpVT) + return DAG.getNode(ISD::OR, DL, CmpVT, A, B); +return DAG.getNode(ISD::AND, DL, CmpVT, A, B); + } + if (X.getOpcode() == ISD::XOR) { +SDValue A = DAG.getBitcast(VecVT, Op0); +SDValue B = DAG.getBitcast(VecVT, Op1); +if (VecVT != CmpVT) + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); +return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + } + llvm_unreachable("Impossible"); +} + +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + EVT OpVT = X.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned OpSize = OpVT.getSizeInBits(); + + // We're looking for an oversized integer equality comparison. + if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() || + OpSize < Subtarget.getRealMinVLen() || topperc wrote: Should this be `OpSize <= XLen` instead of `OpSize < Subtarget.getRealMinVLen()? Shouldn't we use fixed vectors for anything that doesn't fit in a scalar? https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)
@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D return true; } +/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a +/// recognizable memcmp expansion. +static bool isOrXorXorTree(SDValue X, bool Root = true) { + if (X.getOpcode() == ISD::OR) +return isOrXorXorTree(X.getOperand(0), false) && + isOrXorXorTree(X.getOperand(1), false); + if (Root) +return false; + return X.getOpcode() == ISD::XOR; +} + +/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp +/// expansion. +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, +EVT VecVT, EVT CmpVT) { + SDValue Op0 = X.getOperand(0); + SDValue Op1 = X.getOperand(1); + if (X.getOpcode() == ISD::OR) { +SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT); +SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT); +if (VecVT != CmpVT) + return DAG.getNode(ISD::OR, DL, CmpVT, A, B); +return DAG.getNode(ISD::AND, DL, CmpVT, A, B); + } + if (X.getOpcode() == ISD::XOR) { +SDValue A = DAG.getBitcast(VecVT, Op0); +SDValue B = DAG.getBitcast(VecVT, Op1); +if (VecVT != CmpVT) + return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); +return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + } + llvm_unreachable("Impossible"); +} + +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue +combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, +const SDLoc &DL, SelectionDAG &DAG, +const RISCVSubtarget &Subtarget) { + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + EVT OpVT = X.getValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned OpSize = OpVT.getSizeInBits(); + + // We're looking for an oversized integer equality comparison. + if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() || + OpSize < Subtarget.getRealMinVLen() || + OpSize > Subtarget.getRealMinVLen() * 8) +return SDValue(); + + bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); + if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) +return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { +X = peekThroughBitcasts(X); +return isa(X) || X.getValueType().isVector() || + X.getOpcode() == ISD::LOAD; + }; + if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && + !IsOrXorXorTreeCCZero) +return SDValue(); + + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) { +unsigned VecSize = OpSize / 8; +EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize); +EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize); + +SDValue Cmp; +if (IsOrXorXorTreeCCZero) { + Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT); +} else { + SDValue VecX = DAG.getBitcast(VecVT, X); + SDValue VecY = DAG.getBitcast(VecVT, Y); + Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); +} +return DAG.getSetCC(DL, VT, +DAG.getNode(ISD::VECREDUCE_AND, DL, XLenVT, Cmp), +DAG.getConstant(0, DL, XLenVT), CC); + } + + return SDValue(); +} + // Replace (seteq (i64 (and X, 0x)), C1) with // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg // can become a sext.w instead of a shift pair. static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // Looking for an equality compare. + ISD::CondCode Cond = cast(N->getOperand(2))->get(); + if (Cond == ISD::SETNE || Cond == ISD::SETEQ) { topperc wrote: Use `ISD::isIntEqualitySetCC(Cond)` https://github.com/llvm/llvm-project/pull/114517 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114726 >From 4e380599d038e8269c100f7a252331d5db9dffb7 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 3 Nov 2024 19:35:26 -0500 Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial state --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 105 +++--- .../annotate-kernel-features-hsa-call.ll | 46 .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 +- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 7 files changed, 117 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 7d51412730d4d5..85500b95eec1a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute if (!CallerInfo || !CallerInfo->isValidState()) return false; + /// When the caller AA is in its initial state, the state remains valid + /// but awaits propagation. We skip processing in this case. Note that we + /// must return true since the state is still considered valid. + if (CallerInfo->isAtInitialState()) { +LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller " + << Caller->getName() + << " is still at initial state. Skip the update.\n"); +return true; + } + Change |= clampStateAndIndicateChange(this->getState(), CallerInfo->getState()); @@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } + /// The initial state of `IntegerRangeState` represents an empty set, which + /// does not constitute a valid range. This empty state complicates + /// propagation, particularly for arithmetic operations like + /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the + /// initial state during processing. + bool isAtInitialState() const { +return isValidState() && getAssumed().isEmptySet(); + } + const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { Attributor &A); ChangeStatus manifest(Attributor &A) override { +if (isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] Still at initial state. No manifest.\n";); + return ChangeStatus::UNCHANGED; +} Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); return emitAttributeIfNotDefaultAfterClamp( @@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; +Function *F = getAssociatedFunction(); + +const auto *AAFlatWorkGroupSize = A.getAAFor( +*this, IRPosition::function(*F), DepClassTy::REQUIRED); +if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) { + LLVM_DEBUG( + dbgs() << '[' << getName() + << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n"); + return ChangeStatus::UNCHANGED; +} + +if (AAFlatWorkGroupSize->isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] AAAMDFlatWorkGroupSize is still at initial " + "state. Skip the update.\n"); + return ChangeStatus::UNCHANGED; +} + +auto CurrentWorkGroupSize = std::make_pair( +AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(), +AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1); + +auto DoUpdate = [&](std::pair WavesPerEU, +std::pair FlatWorkGroupSize) { + auto [Min, Max] = + InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize); + ConstantRange CR(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState IRS(CR); + Change |= clampStateAndIndicateChange(this->getState(), IRS); +}; + +// // We need to clamp once if we are not at initial state, because +// // AAAMDFlatWorkGroupSize could be updated in last iteration. +if (!isAtInitialState()) { + auto CurrentWavesPerEU = + std::make_pair(getAssumed().getLower().getZExtValue(), + getAssumed().getUpper().getZExtValue() - 1); + DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize); +} + auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - Function *Func = getAssociatedFunctio
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes --- Patch is 31.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114726.diff 7 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+87-18) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+22-24) - (modified) llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll (+1-2) - (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-2) - (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-6) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+1-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+1-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 7d51412730d4d5..85500b95eec1a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute if (!CallerInfo || !CallerInfo->isValidState()) return false; + /// When the caller AA is in its initial state, the state remains valid + /// but awaits propagation. We skip processing in this case. Note that we + /// must return true since the state is still considered valid. + if (CallerInfo->isAtInitialState()) { +LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller " + << Caller->getName() + << " is still at initial state. Skip the update.\n"); +return true; + } + Change |= clampStateAndIndicateChange(this->getState(), CallerInfo->getState()); @@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } + /// The initial state of `IntegerRangeState` represents an empty set, which + /// does not constitute a valid range. This empty state complicates + /// propagation, particularly for arithmetic operations like + /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the + /// initial state during processing. + bool isAtInitialState() const { +return isValidState() && getAssumed().isEmptySet(); + } + const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { Attributor &A); ChangeStatus manifest(Attributor &A) override { +if (isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] Still at initial state. No manifest.\n";); + return ChangeStatus::UNCHANGED; +} Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); return emitAttributeIfNotDefaultAfterClamp( @@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; +Function *F = getAssociatedFunction(); + +const auto *AAFlatWorkGroupSize = A.getAAFor( +*this, IRPosition::function(*F), DepClassTy::REQUIRED); +if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) { + LLVM_DEBUG( + dbgs() << '[' << getName() + << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n"); + return ChangeStatus::UNCHANGED; +} + +if (AAFlatWorkGroupSize->isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] AAAMDFlatWorkGroupSize is still at initial " + "state. Skip the update.\n"); + return ChangeStatus::UNCHANGED; +} + +auto CurrentWorkGroupSize = std::make_pair( +AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(), +AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1); + +auto DoUpdate = [&](std::pair WavesPerEU, +std::pair FlatWorkGroupSize) { + auto [Min, Max] = + InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize); + ConstantRange CR(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState IRS(CR); + Change |= clampStateAndIndicateChange(this->getState(), IRS); +}; + +// // We need to clamp once if we are not at initial state, because +// // AAAMDFlatWorkGroupSize could be updated in last iteration. +if (!isAtInitialState()) { + auto CurrentWavesPerEU = + std::make_pair(getAssumed().getLower().getZExtValue(), + getAssumed().getUpper().getZExtValue() - 1); + DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize); +} + auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - Functi
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/114726?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#114726** https://app.graphite.dev/github/pr/llvm/llvm-project/114726?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#114438** https://app.graphite.dev/github/pr/llvm/llvm-project/114438?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#114357** https://app.graphite.dev/github/pr/llvm/llvm-project/114357?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/114726 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/114724 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/114724 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/114746 >From dcf8feee9c8d410b42fa8bed29a15c14bb7d6d2e Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 4 Nov 2024 06:58:14 + Subject: [PATCH] [CodeGen] Move EnableSinkAndFold to TargetOptions --- llvm/include/llvm/CodeGen/TargetPassConfig.h | 8 llvm/include/llvm/Target/TargetOptions.h | 8 +++- llvm/lib/CodeGen/MachineSink.cpp | 5 - llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 4 ++-- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h index 2f5951e3ec3bce..b395774b14c441 100644 --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -131,11 +131,6 @@ class TargetPassConfig : public ImmutablePass { /// Default setting for -enable-tail-merge on this target. bool EnableTailMerge = true; - /// Enable sinking of instructions in MachineSink where a computation can be - /// folded into the addressing mode of a memory load/store instruction or - /// replace a copy. - bool EnableSinkAndFold = false; - /// Require processing of functions such that callees are generated before /// callers. bool RequireCodeGenSCCOrder = false; @@ -198,9 +193,6 @@ class TargetPassConfig : public ImmutablePass { bool getEnableTailMerge() const { return EnableTailMerge; } void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); } - bool getEnableSinkAndFold() const { return EnableSinkAndFold; } - void setEnableSinkAndFold(bool Enable) { setOpt(EnableSinkAndFold, Enable); } - bool requiresCodeGenSCCOrder() const { return RequireCodeGenSCCOrder; } void setRequiresCodeGenSCCOrder(bool Enable = true) { setOpt(RequireCodeGenSCCOrder, Enable); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 88f253805ca99c..b16ad5b69ff05a 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -137,7 +137,8 @@ namespace llvm { ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), - EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), + EnableSinkAndFold(false), EnableFastISel(false), + EnableGlobalISel(false), UseInitArray(false), DisableIntegratedAS(false), FunctionSections(false), DataSections(false), IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), UniqueSectionNames(true), @@ -239,6 +240,11 @@ namespace llvm { /// they were generated. Default is true. unsigned StackSymbolOrdering : 1; +/// EnableSinkAndFold - Enable sinking of instructions in MachineSink where +/// a computation can be folded into the addressing mode of a memory +/// load/store instruction or replace a copy. +unsigned EnableSinkAndFold : 1; + /// EnableFastISel - This flag enables fast-path instruction selection /// which trades away generated code quality in favor of reducing /// compile time. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index a0e09398602e9e..a3a6b24f9be2d1 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -54,6 +54,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -729,7 +730,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); TargetPassConfig *PassConfig = &getAnalysis(); - EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); + auto &TM = PassConfig->getTM(); + EnableSinkAndFold = TM.Options.EnableSinkAndFold; + // EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); bool EverMadeChange = false; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index c7bd0390b65620..ee8aae4ee8bcc8 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -505,7 +505,7 @@ class AArch64PassConfig : public TargetPassConfig { : TargetPassConfig(TM, PM) { if (TM.getOptLevel() != CodeGenOptLevel::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); -setEnableSinkAndFold(EnableSinkFold); +TM.Options.EnableSinkAndFold = EnableSinkFold; } AArch64TargetMachine &getAArch64TargetMachine() const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp i
[llvm-branch-commits] [llvm] [SimplifyLibCalls] Add initial support for non-8-bit bytes (PR #106542)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106542 >From 9859c07861131607e36d3de2ee0d2a9980b8d6da Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Wed, 28 Aug 2024 16:09:44 +0300 Subject: [PATCH] [SimplifyLibCalls] Add initial support for non-8-bit bytes The patch makes CharWidth argument of `getStringLength` mandatory and ensures the correct values are passed in most cases. This is *not* a complete support for unusual byte widths in SimplifyLibCalls since `getConstantStringInfo` returns false for those. The code guarded by `getConstantStringInfo` returning true is unchanged because the changes are currently not testable. --- llvm/include/llvm/Analysis/ValueTracking.h| 4 +- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 4 +- llvm/lib/Analysis/MemoryBuiltins.cpp | 3 +- llvm/lib/Analysis/ValueTracking.cpp | 40 ++-- .../InstCombine/InstCombineCalls.cpp | 12 +- .../InstCombine/InstructionCombining.cpp | 5 +- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 194 -- .../InstCombine/SimplifyLibCalls/fputs-b16.ll | 19 ++ .../SimplifyLibCalls/fwrite-b16.ll| 19 ++ .../SimplifyLibCalls/memchr-b16.ll| 34 +++ .../SimplifyLibCalls/memcmp-b32.ll| 32 +++ .../SimplifyLibCalls/memcpy-b16.ll| 69 +++ .../SimplifyLibCalls/memcpy_chk-b16.ll| 17 ++ .../SimplifyLibCalls/mempcpy-b16.ll | 17 ++ .../SimplifyLibCalls/memrchr-b16.ll | 20 ++ .../SimplifyLibCalls/memset-b16.ll| 66 ++ .../SimplifyLibCalls/stpcpy-b16.ll| 31 +++ .../SimplifyLibCalls/stpcpy_chk-b16.ll| 44 .../SimplifyLibCalls/stpncpy-b16.ll | 47 + .../SimplifyLibCalls/strcat-b16.ll| 20 ++ .../SimplifyLibCalls/strchr-b16.ll| 45 .../SimplifyLibCalls/strcmp-b32.ll| 50 + .../SimplifyLibCalls/strcpy-b16.ll| 18 ++ .../SimplifyLibCalls/strcpy_chk-b16.ll| 30 +++ .../SimplifyLibCalls/strlcpy-b16.ll | 18 ++ .../SimplifyLibCalls/strlen-b16.ll| 16 ++ .../SimplifyLibCalls/strncat-b16.ll | 20 ++ .../SimplifyLibCalls/strncmp-b32.ll | 34 +++ .../SimplifyLibCalls/strncpy-b16.ll | 43 .../SimplifyLibCalls/strndup-b16.ll | 17 ++ .../SimplifyLibCalls/strnlen-b16.ll | 18 ++ .../SimplifyLibCalls/wcslen-b16.ll| 19 ++ llvm/test/Transforms/InstCombine/bcmp-1.ll| 2 +- llvm/test/Transforms/InstCombine/memcmp-1.ll | 2 +- llvm/test/Transforms/InstCombine/strncmp-1.ll | 2 +- 35 files changed, 930 insertions(+), 101 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/fputs-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/fwrite-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/mempcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memrchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memset-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpncpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcat-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlen-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncat-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strndup-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strnlen-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/wcslen-b16.ll diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 76f
[llvm-branch-commits] [clang] [llvm] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (PR #114577)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114577 >From a931d1ad84429798fe01ec76dc77cd221f03d2d4 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 1 Nov 2024 12:39:52 -0400 Subject: [PATCH] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline --- clang/lib/CodeGen/BackendUtil.cpp | 22 + llvm/include/llvm/Passes/PassBuilder.h| 20 +++- llvm/lib/Passes/PassBuilderPipelines.cpp | 24 +++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++--- .../CodeGen/AMDGPU/print-pipeline-passes.ll | 1 + llvm/tools/opt/NewPMDriver.cpp| 4 ++-- 6 files changed, 57 insertions(+), 37 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 47a30f00612eb7..70035a5e069a90 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, // Ensure we lower KCFI operand bundles with -O0. PB.registerOptimizerLastEPCallback( - [&](ModulePassManager &MPM, OptimizationLevel Level) { + [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) { if (Level == OptimizationLevel::O0 && LangOpts.Sanitize.has(SanitizerKind::KCFI)) MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); @@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - auto SanitizersCallback = [&](ModulePassManager &MPM, -OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level, +ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(SanitizerCoveragePass( @@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple, }; if (ClSanitizeOnOptimizerEarlyEP) { PB.registerOptimizerEarlyEPCallback( -[SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { +[SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { ModulePassManager NewMPM; - SanitizersCallback(NewMPM, Level); + SanitizersCallback(NewMPM, Level, Phase); if (!NewMPM.isEmpty()) { // Sanitizers can abandon. NewMPM.addPass(RequireAnalysisPass()); @@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // TODO: Consider passing the MemoryProfileOutput to the pass builder via // the PGOOptions, and set this up there. if (!CodeGenOpts.MemoryProfileOutput.empty()) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { -MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); -MPM.addPass(ModuleMemProfilerPass()); - }); + PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, +OptimizationLevel Level, +ThinOrFullLTOPhase) { +MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); +MPM.addPass(ModuleMemProfilerPass()); + }); } if (CodeGenOpts.FatLTO) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 565fd2ab2147e5..e7bc3a58f414f1 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -490,7 +490,8 @@ class PassBuilder { /// This extension point allows adding optimizations before the function /// optimization pipeline. void registerOptimizerEarlyEPCallback( - const std::function &C) { + const std::function &C) { OptimizerEarlyEPCallbacks.push_back(C); } @@ -499,7 +500,8 @@ class PassBuilder { /// This extension point allows adding optimizations at the very end of the /// function optimization pipeline. void registerOptimizerLastEPCallback( - const std::function &C) { + const std::function &C) { OptimizerLastEPCallbacks.push_back(C); } @@ -630,9 +632,11 @@ class PassBuilder { void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level); void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + OptimizationLevel Level, + ThinOrFullLTOPhase Phase); void invo
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114438 >From 79f88836a79e63069eb6b7b58fa376bcd2b32303 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 31 Oct 2024 12:49:07 -0400 Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 81 +++ .../annotate-kernel-features-hsa-call.ll | 46 ++- .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 59 +++--- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 ++- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 8 files changed, 113 insertions(+), 93 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 376f2067582f32..7d51412730d4d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -201,6 +201,19 @@ class AMDGPUInformationCache : public InformationCache { return ST.getWavesPerEU(F, FlatWorkGroupSize); } + std::optional> + getWavesPerEUAttr(const Function &F) { +auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", + /*OnlyFirstRequired=*/true); +if (!Val) + return std::nullopt; +if (!Val->second) { + const GCNSubtarget &ST = TM.getSubtarget(F); + Val->second = ST.getMaxWavesPerEU(); +} +return std::make_pair(Val->first, *(Val->second)); + } + std::pair getEffectiveWavesPerEU(const Function &F, std::pair WavesPerEU, @@ -771,22 +784,6 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } - ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, - unsigned Max) { -// Don't add the attribute if it's the implied default. -if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) - return ChangeStatus::UNCHANGED; - -Function *F = getAssociatedFunction(); -LLVMContext &Ctx = F->getContext(); -SmallString<10> Buffer; -raw_svector_ostream OS(Buffer); -OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; -return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, AttrName, OS.str())}, - /*ForceReplace=*/true); - } - const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -883,29 +880,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} - bool isValidState() const override { -return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); - } - void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); -if (const auto *AssumedGroupSize = A.getAAFor( -*this, IRPosition::function(*F), DepClassTy::REQUIRED); -AssumedGroupSize->isValidState()) { +auto TakeRange = [&](std::pair R) { + auto [Min, Max] = R; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); +}; - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getWavesPerEU( - *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); +std::pair MaxWavesPerEURange{ +1U, InfoCache.getMaxWavesPerEU(*F)}; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - intersectKnown(Range); +// If the attribute exists, we will honor it if it is not the default. +if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + if (*Attr != MaxWavesPerEURange) { +TakeRange(*Attr); +return; + } } -if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); +// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the +// calculation of waves per EU involves flat work group size, we can't +// simply use an assumed flat work group size as a start point, because the +// update of flat work group size is in an inverse direction of waves per +// EU. However, we can still do something if it is an entry function. Since +// an entry function is a terminal node, and flat work group size either +// from attribute or default will be used anyway, we can take that value and +// calculate the waves per EU based on it. This resu
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/114726 None >From b7612eddae0b0808f82bb8bc2e6fd6e34361ae5c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 3 Nov 2024 19:35:26 -0500 Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial state --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 105 +++--- .../annotate-kernel-features-hsa-call.ll | 46 .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../AMDGPU/remove-no-kernel-id-attribute.ll | 11 +- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 7 files changed, 118 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 7d51412730d4d5..85500b95eec1a4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute if (!CallerInfo || !CallerInfo->isValidState()) return false; + /// When the caller AA is in its initial state, the state remains valid + /// but awaits propagation. We skip processing in this case. Note that we + /// must return true since the state is still considered valid. + if (CallerInfo->isAtInitialState()) { +LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller " + << Caller->getName() + << " is still at initial state. Skip the update.\n"); +return true; + } + Change |= clampStateAndIndicateChange(this->getState(), CallerInfo->getState()); @@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } + /// The initial state of `IntegerRangeState` represents an empty set, which + /// does not constitute a valid range. This empty state complicates + /// propagation, particularly for arithmetic operations like + /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the + /// initial state during processing. + bool isAtInitialState() const { +return isValidState() && getAssumed().isEmptySet(); + } + const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { Attributor &A); ChangeStatus manifest(Attributor &A) override { +if (isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] Still at initial state. No manifest.\n";); + return ChangeStatus::UNCHANGED; +} Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); return emitAttributeIfNotDefaultAfterClamp( @@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; +Function *F = getAssociatedFunction(); + +const auto *AAFlatWorkGroupSize = A.getAAFor( +*this, IRPosition::function(*F), DepClassTy::REQUIRED); +if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) { + LLVM_DEBUG( + dbgs() << '[' << getName() + << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n"); + return ChangeStatus::UNCHANGED; +} + +if (AAFlatWorkGroupSize->isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] AAAMDFlatWorkGroupSize is still at initial " + "state. Skip the update.\n"); + return ChangeStatus::UNCHANGED; +} + +auto CurrentWorkGroupSize = std::make_pair( +AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(), +AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1); + +auto DoUpdate = [&](std::pair WavesPerEU, +std::pair FlatWorkGroupSize) { + auto [Min, Max] = + InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize); + ConstantRange CR(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState IRS(CR); + Change |= clampStateAndIndicateChange(this->getState(), IRS); +}; + +// // We need to clamp once if we are not at initial state, because +// // AAAMDFlatWorkGroupSize could be updated in last iteration. +if (!isAtInitialState()) { + auto CurrentWavesPerEU = + std::make_pair(getAssumed().getLower().getZExtValue(), + getAssumed().getUpper().getZExtValue() - 1); + DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize); +} + auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - Function *Func = getAssociatedF
[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)
@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-RV32: # %bb.0: # %entry ; CHECK-RV32-NEXT:addi sp, sp, -16 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:call bcmp +; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT:addi sp, sp, 16 +; CHECK-RV32-NEXT:ret +; +; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-NEXT:call bcmp +; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret +; +; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-V-NEXT:call bcmp +; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-V-NEXT:ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a
[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)
@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-RV32: # %bb.0: # %entry ; CHECK-RV32-NEXT:addi sp, sp, -16 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:call bcmp +; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT:addi sp, sp, 16 +; CHECK-RV32-NEXT:ret +; +; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-NEXT:call bcmp +; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret +; +; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-V-NEXT:call bcmp +; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-V-NEXT:ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a
[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)
@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-RV32: # %bb.0: # %entry ; CHECK-RV32-NEXT:addi sp, sp, -16 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill -; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:li a2, 31 +; CHECK-RV32-NEXT:call bcmp +; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT:addi sp, sp, 16 +; CHECK-RV32-NEXT:ret +; +; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-NEXT:call bcmp +; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret +; +; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31: +; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16 +; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31 +; CHECK-ALIGNED-RV64-V-NEXT:call bcmp +; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16 +; CHECK-ALIGNED-RV64-V-NEXT:ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a
[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/114724 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (PR #114577)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/114577 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
https://github.com/wangleiat created https://github.com/llvm/llvm-project/pull/114742 This patch adds desired feature flags in JIT compiler to enable hard-float instructions if target supports them and allows to use floats and doubles in lldb expressions. Fited tests: lldb-shell :: Expr/TestAnonNamespaceParamFunc.cpp lldb-shell :: Expr/TestIRMemoryMap.test lldb-shell :: Expr/TestStringLiteralExpr.test lldb-shell :: SymbolFile/DWARF/debug-types-expressions.test Similar as #99336 Depens on: https://github.com/llvm/llvm-project/pull/114741 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)
llvmbot wrote: @llvm/pr-subscribers-lldb Author: wanglei (wangleiat) Changes This patch adds desired feature flags in JIT compiler to enable hard-float instructions if target supports them and allows to use floats and doubles in lldb expressions. Fited tests: lldb-shell :: Expr/TestAnonNamespaceParamFunc.cpp lldb-shell :: Expr/TestIRMemoryMap.test lldb-shell :: Expr/TestStringLiteralExpr.test lldb-shell :: SymbolFile/DWARF/debug-types-expressions.test Similar as #99336 Depens on: https://github.com/llvm/llvm-project/pull/114741 --- Patch is 31.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114742.diff 7 Files Affected: - (modified) lldb/include/lldb/Utility/ArchSpec.h (+9) - (modified) lldb/source/Plugins/ABI/CMakeLists.txt (+1-1) - (added) lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp (+664) - (added) lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h (+104) - (added) lldb/source/Plugins/ABI/LoongArch/CMakeLists.txt (+12) - (modified) lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp (+21) - (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (+17) ``diff diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h index 5990f984b09e2d..2a74058673bae5 100644 --- a/lldb/include/lldb/Utility/ArchSpec.h +++ b/lldb/include/lldb/Utility/ArchSpec.h @@ -107,6 +107,15 @@ class ArchSpec { eRISCVSubType_riscv64, }; + enum LoongArcheflags { +eLoongArch_abi_soft_float = 0x, /// soft float +eLoongArch_abi_single_float = +0x0001, /// single precision floating point, +f +eLoongArch_abi_double_float = +0x0002, /// double precision floating point, +d +eLoongArch_abi_mask = 0x0003, + }; + enum LoongArchSubType { eLoongArchSubType_unknown, eLoongArchSubType_loongarch32, diff --git a/lldb/source/Plugins/ABI/CMakeLists.txt b/lldb/source/Plugins/ABI/CMakeLists.txt index 9241a2487d522f..e33ac87354860e 100644 --- a/lldb/source/Plugins/ABI/CMakeLists.txt +++ b/lldb/source/Plugins/ABI/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(target AArch64 ARM ARC Hexagon Mips MSP430 PowerPC RISCV SystemZ X86) +foreach(target AArch64 ARM ARC Hexagon LoongArch Mips MSP430 PowerPC RISCV SystemZ X86) if (${target} IN_LIST LLVM_TARGETS_TO_BUILD) add_subdirectory(${target}) endif() diff --git a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp new file mode 100644 index 00..cd8270c01113f7 --- /dev/null +++ b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp @@ -0,0 +1,664 @@ +//===-- ABISysV_loongarch.cpp--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "ABISysV_loongarch.h" + +#include +#include +#include + +#include "llvm/IR/DerivedTypes.h" + +#include "Utility/LoongArch_DWARF_Registers.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Value.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StackFrame.h" +#include "lldb/Target/Thread.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/ValueObject/ValueObjectConstResult.h" + +#define DEFINE_REG_NAME(reg_num) ConstString(#reg_num).GetCString() +#define DEFINE_REG_NAME_STR(reg_name) ConstString(reg_name).GetCString() + +// The ABI is not a source of such information as size, offset, encoding, etc. +// of a register. Just provides correct dwarf and eh_frame numbers. + +#define DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, generic_num) \ + { \ + DEFINE_REG_NAME(dwarf_num), \ + DEFINE_REG_NAME_STR(str_name), \ + 0, \ + 0, \ + eEncodingInvalid, \ + eFormatDefault, \ + {dwarf_num, dwarf_num, generic_num, LLDB_INVALID_REGNUM, dwarf_num}, \ + nullptr, \ + nullptr, \ + nullptr, \ + } + +#define DEFINE_REGISTER_STUB(dwarf_num, str_name) \ + DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, LLDB_INVALID_REGNUM
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/114745 None >From 431e6371f161d0f85c598c789902976e3fa74162 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 30 Oct 2024 04:59:30 + Subject: [PATCH] [CodeGen][NewPM] Port MachineCycleInfo to NPM --- .../llvm/CodeGen/MachineCycleAnalysis.h | 18 ++ llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 3 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 34 ++- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/X86/cycle-info.mir | 2 ++ 7 files changed, 50 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index 1888dd053ce65ee..f740a9599edf292 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,6 +16,7 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineSSAContext.h" namespace llvm { @@ -46,6 +47,23 @@ class MachineCycleInfoWrapperPass : public MachineFunctionPass { // version. bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); +class MachineCycleAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = MachineCycleInfo; + + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); +}; + +class MachineCycleInfoPrinterPass : public PassInfoMixin { + raw_ostream &OS; + public: +explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {} +PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 54c070401ec8a40..b040e7c096d1f51 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -191,7 +191,7 @@ void initializeMachineCFGPrinterPass(PassRegistry &); void initializeMachineCSELegacyPass(PassRegistry &); void initializeMachineCombinerPass(PassRegistry &); void initializeMachineCopyPropagationPass(PassRegistry &); -void initializeMachineCycleInfoPrinterPassPass(PassRegistry &); +void initializeMachineCycleInfoPrinterLegacyPass(PassRegistry &); void initializeMachineCycleInfoWrapperPassPass(PassRegistry &); void initializeMachineDominanceFrontierPass(PassRegistry &); void initializeMachineDominatorTreeWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 9d12a120ff7ac6d..497bc97455a1b55 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -101,6 +101,7 @@ MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-block-freq", MachineBlockFrequencyAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-branch-prob", MachineBranchProbabilityAnalysis()) +MACHINE_FUNCTION_ANALYSIS("machine-cycles", MachineCycleAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-opt-remark-emitter", @@ -151,6 +152,7 @@ MACHINE_FUNCTION_PASS("print", MACHINE_FUNCTION_PASS("print", MachineDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", MachineLoopPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) @@ -241,7 +243,6 @@ DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass) DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass) DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass) DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass) -DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 39fba1d0b527ef6..adddb8daaa0e914 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -78,7 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCSELegacyPass(Registry); ini
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#114746** https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#114745** https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#114027** https://app.graphite.dev/github/pr/llvm/llvm-project/114027?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @optimisan and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/114745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/114746 None >From 40df066d3c32cdeab9927787f201e0b8a72af0bb Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 4 Nov 2024 06:58:14 + Subject: [PATCH] [CodeGen] Move EnableSinkAndFold to TargetOptions --- llvm/include/llvm/CodeGen/TargetPassConfig.h | 8 llvm/include/llvm/Target/TargetOptions.h | 8 +++- llvm/lib/CodeGen/MachineSink.cpp | 5 - llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 5 +++-- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h index 2f5951e3ec3bce..b395774b14c441 100644 --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -131,11 +131,6 @@ class TargetPassConfig : public ImmutablePass { /// Default setting for -enable-tail-merge on this target. bool EnableTailMerge = true; - /// Enable sinking of instructions in MachineSink where a computation can be - /// folded into the addressing mode of a memory load/store instruction or - /// replace a copy. - bool EnableSinkAndFold = false; - /// Require processing of functions such that callees are generated before /// callers. bool RequireCodeGenSCCOrder = false; @@ -198,9 +193,6 @@ class TargetPassConfig : public ImmutablePass { bool getEnableTailMerge() const { return EnableTailMerge; } void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); } - bool getEnableSinkAndFold() const { return EnableSinkAndFold; } - void setEnableSinkAndFold(bool Enable) { setOpt(EnableSinkAndFold, Enable); } - bool requiresCodeGenSCCOrder() const { return RequireCodeGenSCCOrder; } void setRequiresCodeGenSCCOrder(bool Enable = true) { setOpt(RequireCodeGenSCCOrder, Enable); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 88f253805ca99c..b16ad5b69ff05a 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -137,7 +137,8 @@ namespace llvm { ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), - EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), + EnableSinkAndFold(false), EnableFastISel(false), + EnableGlobalISel(false), UseInitArray(false), DisableIntegratedAS(false), FunctionSections(false), DataSections(false), IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), UniqueSectionNames(true), @@ -239,6 +240,11 @@ namespace llvm { /// they were generated. Default is true. unsigned StackSymbolOrdering : 1; +/// EnableSinkAndFold - Enable sinking of instructions in MachineSink where +/// a computation can be folded into the addressing mode of a memory +/// load/store instruction or replace a copy. +unsigned EnableSinkAndFold : 1; + /// EnableFastISel - This flag enables fast-path instruction selection /// which trades away generated code quality in favor of reducing /// compile time. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index a0e09398602e9e..a3a6b24f9be2d1 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -54,6 +54,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -729,7 +730,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); TargetPassConfig *PassConfig = &getAnalysis(); - EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); + auto &TM = PassConfig->getTM(); + EnableSinkAndFold = TM.Options.EnableSinkAndFold; + // EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); bool EverMadeChange = false; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index c7bd0390b65620..b077f85fc760b8 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -505,7 +505,7 @@ class AArch64PassConfig : public TargetPassConfig { : TargetPassConfig(TM, PM) { if (TM.getOptLevel() != CodeGenOptLevel::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); -setEnableSinkAndFold(EnableSinkFold); +getAArch64TargetMachine().Options.EnableSinkAndFold = true; } AArch64TargetMachine &getAArch64TargetMachine() const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISC
[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#114746** https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#114745** https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#114027** https://app.graphite.dev/github/pr/llvm/llvm-project/114027?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @optimisan and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/114746 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff dbedca0302d5624554ed7ff4b45c019ad5972166 431e6371f161d0f85c598c789902976e3fa74162 --extensions h,cpp -- llvm/include/llvm/CodeGen/MachineCycleAnalysis.h llvm/include/llvm/InitializePasses.h llvm/lib/CodeGen/CodeGen.cpp llvm/lib/CodeGen/MachineCycleAnalysis.cpp llvm/lib/Passes/PassBuilder.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index f740a9599e..64cf30e6dd 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -57,11 +57,14 @@ public: Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); }; -class MachineCycleInfoPrinterPass : public PassInfoMixin { +class MachineCycleInfoPrinterPass +: public PassInfoMixin { raw_ostream &OS; - public: -explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {} -PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + +public: + explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 82d7aadaca..6e58439960 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -56,7 +56,9 @@ void MachineCycleInfoWrapperPass::releaseMemory() { AnalysisKey MachineCycleAnalysis::Key; -MachineCycleInfo MachineCycleAnalysis::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { +MachineCycleInfo +MachineCycleAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { MachineCycleInfo MCI; MCI.compute(MF); return MCI; @@ -99,7 +101,9 @@ bool MachineCycleInfoPrinterLegacy::runOnMachineFunction(MachineFunction &F) { return false; } -PreservedAnalyses MachineCycleInfoPrinterPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { +PreservedAnalyses +MachineCycleInfoPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { OS << "MachineCycleInfo for function: " << MF.getName() << "\n"; auto &MCI = MFAM.getResult(MF); `` https://github.com/llvm/llvm-project/pull/114745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: Vitaly Buka (vitalybuka) Changes In C++ it's UB to use undeclared values as enum. And there is support `__ATOMIC_HLE_ACQUIRE` and `__ATOMIC_HLE_RELEASE` need such values. Internal implementation was switched to `class enum`, where that behavior is defined. But interface is C, so we just switch to `int`. --- Patch is 32.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114724.diff 4 Files Affected: - (modified) compiler-rt/include/sanitizer/tsan_interface_atomic.h (+82-87) - (modified) compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp (+59-57) - (modified) compiler-rt/lib/tsan/rtl/tsan_interface.h (+8-8) - (modified) compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp (+46-32) ``diff diff --git a/compiler-rt/include/sanitizer/tsan_interface_atomic.h b/compiler-rt/include/sanitizer/tsan_interface_atomic.h index de3a1c3936097d..74ed91efade040 100644 --- a/compiler-rt/include/sanitizer/tsan_interface_atomic.h +++ b/compiler-rt/include/sanitizer/tsan_interface_atomic.h @@ -43,183 +43,178 @@ typedef enum { } __tsan_memory_order; __tsan_atomic8 SANITIZER_CDECL -__tsan_atomic8_load(const volatile __tsan_atomic8 *a, __tsan_memory_order mo); +__tsan_atomic8_load(const volatile __tsan_atomic8 *a, int mo); __tsan_atomic16 SANITIZER_CDECL -__tsan_atomic16_load(const volatile __tsan_atomic16 *a, __tsan_memory_order mo); +__tsan_atomic16_load(const volatile __tsan_atomic16 *a, int mo); __tsan_atomic32 SANITIZER_CDECL -__tsan_atomic32_load(const volatile __tsan_atomic32 *a, __tsan_memory_order mo); +__tsan_atomic32_load(const volatile __tsan_atomic32 *a, int mo); __tsan_atomic64 SANITIZER_CDECL -__tsan_atomic64_load(const volatile __tsan_atomic64 *a, __tsan_memory_order mo); +__tsan_atomic64_load(const volatile __tsan_atomic64 *a, int mo); #if __TSAN_HAS_INT128 -__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_load( -const volatile __tsan_atomic128 *a, __tsan_memory_order mo); +__tsan_atomic128 SANITIZER_CDECL +__tsan_atomic128_load(const volatile __tsan_atomic128 *a, int mo); #endif void SANITIZER_CDECL __tsan_atomic8_store(volatile __tsan_atomic8 *a, - __tsan_atomic8 v, - __tsan_memory_order mo); + __tsan_atomic8 v, int mo); void SANITIZER_CDECL __tsan_atomic16_store(volatile __tsan_atomic16 *a, - __tsan_atomic16 v, - __tsan_memory_order mo); + __tsan_atomic16 v, int mo); void SANITIZER_CDECL __tsan_atomic32_store(volatile __tsan_atomic32 *a, - __tsan_atomic32 v, - __tsan_memory_order mo); + __tsan_atomic32 v, int mo); void SANITIZER_CDECL __tsan_atomic64_store(volatile __tsan_atomic64 *a, - __tsan_atomic64 v, - __tsan_memory_order mo); + __tsan_atomic64 v, int mo); #if __TSAN_HAS_INT128 void SANITIZER_CDECL __tsan_atomic128_store(volatile __tsan_atomic128 *a, -__tsan_atomic128 v, -__tsan_memory_order mo); +__tsan_atomic128 v, int mo); #endif -__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_exchange( -volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); +__tsan_atomic8 SANITIZER_CDECL +__tsan_atomic8_exchange(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_exchange( -volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo); +volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo); __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_exchange( -volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo); +volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo); __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_exchange( -volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo); +volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo); #if __TSAN_HAS_INT128 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_exchange( -volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo); +volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo); #endif -__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_add( -volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo); +__tsan_atomic8 SANITIZER_CDECL +__tsan_atomic8_fetch_add(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo); __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_add( -volatile __tsan_atomic16
[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/114724 In C++ it's UB to use undeclared values as enum. And there is support `__ATOMIC_HLE_ACQUIRE` and `__ATOMIC_HLE_RELEASE` need such values. Internal implementation was switched to `class enum`, where that behavior is defined. But interface is C, so we just switch to `int`. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits