[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, 
ISD::MemIndexType IndexType, SelectionDAG &D
   return true;
 }
 
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+  if (X.getOpcode() == ISD::OR)
+return isOrXorXorTree(X.getOperand(0), false) &&
+   isOrXorXorTree(X.getOperand(1), false);
+  if (Root)
+return false;
+  return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+EVT VecVT, EVT CmpVT) {
+  SDValue Op0 = X.getOperand(0);
+  SDValue Op1 = X.getOperand(1);
+  if (X.getOpcode() == ISD::OR) {
+SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT);
+SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT);
+if (VecVT != CmpVT)
+  return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+  }
+  if (X.getOpcode() == ISD::XOR) {
+SDValue A = DAG.getBitcast(VecVT, Op0);
+SDValue B = DAG.getBitcast(VecVT, Op1);
+if (VecVT != CmpVT)
+  return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+  }
+  llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue
+combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
+const SDLoc &DL, SelectionDAG &DAG,
+const RISCVSubtarget &Subtarget) {
+  assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+  EVT OpVT = X.getValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+  unsigned OpSize = OpVT.getSizeInBits();
+
+  // We're looking for an oversized integer equality comparison.
+  if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() ||
+  OpSize < Subtarget.getRealMinVLen() ||
+  OpSize > Subtarget.getRealMinVLen() * 8)

topperc wrote:

8 here should be `Subtarget.getMaxLMULForFixedLengthVectors()` I think

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -3186,190 +3186,24 @@ define i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind {
 ;
 ; CHECK-ALIGNED-RV32-V-LABEL: bcmp_size_16:
 ; CHECK-ALIGNED-RV32-V:   # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a2, 1(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 0(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 2(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 3(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a2, a2, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 0(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 1(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:or a2, a4, a2
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 2(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 3(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a3, a5, a3
-; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a6, a4
-; CHECK-ALIGNED-RV32-V-NEXT:or a3, a4, a3
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 4(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 5(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:xor a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a3, 6(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 7(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a5, a4
-; CHECK-ALIGNED-RV32-V-NEXT:slli a3, a3, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a3, a6, a3
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 4(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 5(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:or a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 6(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 7(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a5, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a7, a4
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 8(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 9(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:xor a3, a3, a4
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a4, 10(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 11(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a5, a6, a5
-; CHECK-ALIGNED-RV32-V-NEXT:slli a4, a4, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a7, a4
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 8(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 9(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:or a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 10(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu t0, 11(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a6, a7, a6
-; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli t0, t0, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a5, t0, a5
-; CHECK-ALIGNED-RV32-V-NEXT:or a5, a5, a6
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 12(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 13(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:xor a4, a4, a5
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 14(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a0, 15(a0)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a6, a7, a6
-; CHECK-ALIGNED-RV32-V-NEXT:slli a5, a5, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a0, a0, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a0, a0, a5
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a5, 12(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a7, 13(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:or a0, a0, a6
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a6, 14(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:lbu a1, 15(a1)
-; CHECK-ALIGNED-RV32-V-NEXT:slli a7, a7, 8
-; CHECK-ALIGNED-RV32-V-NEXT:or a5, a7, a5
-; CHECK-ALIGNED-RV32-V-NEXT:slli a6, a6, 16
-; CHECK-ALIGNED-RV32-V-NEXT:slli a1, a1, 24
-; CHECK-ALIGNED-RV32-V-NEXT:or a1, a1, a6
-; CHECK-ALIGNED-RV32-V-NEXT:or a1, a1, a5
-; CHECK-ALIGNED-RV32-V-NEXT:xor a0, a0, a1
-; CHECK-ALIGNED-RV32-V-NEXT:or a2, a2, a3
-; CHECK-ALIGNED-RV32-V-NEXT:or a0, a4, a0
-; CHECK-ALIGNED-RV32-V-NEXT:or a0, a2, a0
-; CHECK-ALIGNED-RV32-V-NEXT:snez a0, a0
+; CHECK-ALIGNED-RV32-V-NEXT:vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ALIGNED-RV32-V-NEXT:vle8.v v8, (a0)
+; CHECK-ALIGNED-RV32-V-NEXT:vle8.v v9, (a1)
+; CHECK-ALIGNED-RV32-V-NEXT:vmseq.vv v8, v8, v9
+; CHECK-ALIGNED-RV32-V-NEXT:vmnot.m v8, v8

topperc wrote:

Missing combine to use vmsne?

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, 
ISD::MemIndexType IndexType, SelectionDAG &D
   return true;
 }
 
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+  if (X.getOpcode() == ISD::OR)
+return isOrXorXorTree(X.getOperand(0), false) &&
+   isOrXorXorTree(X.getOperand(1), false);
+  if (Root)
+return false;
+  return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+EVT VecVT, EVT CmpVT) {
+  SDValue Op0 = X.getOperand(0);
+  SDValue Op1 = X.getOperand(1);
+  if (X.getOpcode() == ISD::OR) {
+SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT);
+SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT);
+if (VecVT != CmpVT)
+  return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+  }
+  if (X.getOpcode() == ISD::XOR) {
+SDValue A = DAG.getBitcast(VecVT, Op0);
+SDValue B = DAG.getBitcast(VecVT, Op1);
+if (VecVT != CmpVT)
+  return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+  }
+  llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue
+combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
+const SDLoc &DL, SelectionDAG &DAG,
+const RISCVSubtarget &Subtarget) {
+  assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+  EVT OpVT = X.getValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+  unsigned OpSize = OpVT.getSizeInBits();
+
+  // We're looking for an oversized integer equality comparison.
+  if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() ||
+  OpSize < Subtarget.getRealMinVLen() ||
+  OpSize > Subtarget.getRealMinVLen() * 8)
+return SDValue();
+
+  bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
+  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+return SDValue();
+
+  // Don't perform this combine if constructing the vector will be expensive.
+  auto IsVectorBitCastCheap = [](SDValue X) {
+X = peekThroughBitcasts(X);
+return isa(X) || X.getValueType().isVector() ||
+   X.getOpcode() == ISD::LOAD;
+  };
+  if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+  !IsOrXorXorTreeCCZero)
+return SDValue();
+
+  bool NoImplicitFloatOps =
+  DAG.getMachineFunction().getFunction().hasFnAttribute(
+  Attribute::NoImplicitFloat);
+  if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) {

topperc wrote:

Isn't `Subtarget.hasVInstructions()` already checked earlier?

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -2504,5 +2504,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool 
IsZeroCmp) const {
 Options.LoadSizes = {8, 4, 2, 1};
   else
 Options.LoadSizes = {4, 2, 1};
+  if (IsZeroCmp && ST->hasVInstructions()) {
+unsigned RealMinVLen = ST->getRealMinVLen() / 8;
+for (int LMUL = 1; LMUL <= 8; LMUL *= 2)

topperc wrote:

Why do we want to limit this to LMUL>=1? Shouldn't we be able to do this for 
small vectors even when VLEN is large?

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, 
ISD::MemIndexType IndexType, SelectionDAG &D
   return true;
 }
 
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+  if (X.getOpcode() == ISD::OR)
+return isOrXorXorTree(X.getOperand(0), false) &&
+   isOrXorXorTree(X.getOperand(1), false);
+  if (Root)
+return false;
+  return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+EVT VecVT, EVT CmpVT) {
+  SDValue Op0 = X.getOperand(0);
+  SDValue Op1 = X.getOperand(1);
+  if (X.getOpcode() == ISD::OR) {
+SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT);
+SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT);
+if (VecVT != CmpVT)
+  return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+  }
+  if (X.getOpcode() == ISD::XOR) {
+SDValue A = DAG.getBitcast(VecVT, Op0);
+SDValue B = DAG.getBitcast(VecVT, Op1);
+if (VecVT != CmpVT)
+  return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+  }
+  llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue
+combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
+const SDLoc &DL, SelectionDAG &DAG,
+const RISCVSubtarget &Subtarget) {
+  assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+  EVT OpVT = X.getValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+  unsigned OpSize = OpVT.getSizeInBits();
+
+  // We're looking for an oversized integer equality comparison.
+  if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() ||
+  OpSize < Subtarget.getRealMinVLen() ||

topperc wrote:

Should this be `OpSize <= XLen` instead of `OpSize < 
Subtarget.getRealMinVLen()? Shouldn't we use fixed vectors for anything that 
doesn't fit in a scalar?

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [RISCV] Support memcmp expansion for vectors (PR #114517)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -14474,17 +14475,116 @@ static bool narrowIndex(SDValue &N, 
ISD::MemIndexType IndexType, SelectionDAG &D
   return true;
 }
 
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+  if (X.getOpcode() == ISD::OR)
+return isOrXorXorTree(X.getOperand(0), false) &&
+   isOrXorXorTree(X.getOperand(1), false);
+  if (Root)
+return false;
+  return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+EVT VecVT, EVT CmpVT) {
+  SDValue Op0 = X.getOperand(0);
+  SDValue Op1 = X.getOperand(1);
+  if (X.getOpcode() == ISD::OR) {
+SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT);
+SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT);
+if (VecVT != CmpVT)
+  return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+  }
+  if (X.getOpcode() == ISD::XOR) {
+SDValue A = DAG.getBitcast(VecVT, Op0);
+SDValue B = DAG.getBitcast(VecVT, Op1);
+if (VecVT != CmpVT)
+  return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+  }
+  llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue
+combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
+const SDLoc &DL, SelectionDAG &DAG,
+const RISCVSubtarget &Subtarget) {
+  assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+  EVT OpVT = X.getValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+  unsigned OpSize = OpVT.getSizeInBits();
+
+  // We're looking for an oversized integer equality comparison.
+  if (!Subtarget.hasVInstructions() || !OpVT.isScalarInteger() ||
+  OpSize < Subtarget.getRealMinVLen() ||
+  OpSize > Subtarget.getRealMinVLen() * 8)
+return SDValue();
+
+  bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
+  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+return SDValue();
+
+  // Don't perform this combine if constructing the vector will be expensive.
+  auto IsVectorBitCastCheap = [](SDValue X) {
+X = peekThroughBitcasts(X);
+return isa(X) || X.getValueType().isVector() ||
+   X.getOpcode() == ISD::LOAD;
+  };
+  if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+  !IsOrXorXorTreeCCZero)
+return SDValue();
+
+  bool NoImplicitFloatOps =
+  DAG.getMachineFunction().getFunction().hasFnAttribute(
+  Attribute::NoImplicitFloat);
+  if (!NoImplicitFloatOps && Subtarget.hasVInstructions()) {
+unsigned VecSize = OpSize / 8;
+EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
+EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
+
+SDValue Cmp;
+if (IsOrXorXorTreeCCZero) {
+  Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT);
+} else {
+  SDValue VecX = DAG.getBitcast(VecVT, X);
+  SDValue VecY = DAG.getBitcast(VecVT, Y);
+  Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
+}
+return DAG.getSetCC(DL, VT,
+DAG.getNode(ISD::VECREDUCE_AND, DL, XLenVT, Cmp),
+DAG.getConstant(0, DL, XLenVT), CC);
+  }
+
+  return SDValue();
+}
+
 // Replace (seteq (i64 (and X, 0x)), C1) with
 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
 // can become a sext.w instead of a shift pair.
 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+  SDLoc dl(N);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
+  // Looking for an equality compare.
+  ISD::CondCode Cond = cast(N->getOperand(2))->get();
+  if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {

topperc wrote:

Use `ISD::isIntEqualitySetCC(Cond)`

https://github.com/llvm/llvm-project/pull/114517
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)

2024-11-03 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114726

>From 4e380599d038e8269c100f7a252331d5db9dffb7 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 3 Nov 2024 19:35:26 -0500
Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial
 state

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 105 +++---
 .../annotate-kernel-features-hsa-call.ll  |  46 
 .../AMDGPU/attributor-loop-issue-58639.ll |   3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|   3 +-
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |   9 +-
 .../AMDGPU/uniform-work-group-multistep.ll|   3 +-
 .../uniform-work-group-recursion-test.ll  |   2 +-
 7 files changed, 117 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7d51412730d4d5..85500b95eec1a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute
   if (!CallerInfo || !CallerInfo->isValidState())
 return false;
 
+  /// When the caller AA is in its initial state, the state remains valid
+  /// but awaits propagation. We skip processing in this case. Note that we
+  /// must return true since the state is still considered valid.
+  if (CallerInfo->isAtInitialState()) {
+LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+  << Caller->getName()
+  << " is still at initial state. Skip the update.\n");
+return true;
+  }
+
   Change |=
   clampStateAndIndicateChange(this->getState(), 
CallerInfo->getState());
 
@@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
+  /// The initial state of `IntegerRangeState` represents an empty set, which
+  /// does not constitute a valid range. This empty state complicates
+  /// propagation, particularly for arithmetic operations like
+  /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
+  /// initial state during processing.
+  bool isAtInitialState() const {
+return isValidState() && getAssumed().isEmptySet();
+  }
+
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public 
AAAMDSizeRangeAttribute {
Attributor &A);
 
   ChangeStatus manifest(Attributor &A) override {
+if (isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] Still at initial state. No manifest.\n";);
+  return ChangeStatus::UNCHANGED;
+}
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 return emitAttributeIfNotDefaultAfterClamp(
@@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
 auto &InfoCache = static_cast(A.getInfoCache());
 ChangeStatus Change = ChangeStatus::UNCHANGED;
 
+Function *F = getAssociatedFunction();
+
+const auto *AAFlatWorkGroupSize = A.getAAFor(
+*this, IRPosition::function(*F), DepClassTy::REQUIRED);
+if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
+  LLVM_DEBUG(
+  dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+if (AAFlatWorkGroupSize->isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] AAAMDFlatWorkGroupSize is still at initial "
+   "state. Skip the update.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+auto CurrentWorkGroupSize = std::make_pair(
+AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
+AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
+
+auto DoUpdate = [&](std::pair WavesPerEU,
+std::pair FlatWorkGroupSize) {
+  auto [Min, Max] =
+  InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
+  ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState IRS(CR);
+  Change |= clampStateAndIndicateChange(this->getState(), IRS);
+};
+
+// // We need to clamp once if we are not at initial state, because
+// // AAAMDFlatWorkGroupSize could be updated in last iteration.
+if (!isAtInitialState()) {
+  auto CurrentWavesPerEU =
+  std::make_pair(getAssumed().getLower().getZExtValue(),
+ getAssumed().getUpper().getZExtValue() - 1);
+  DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
+}
+
 auto CheckCallSite = [&](AbstractCallSite CS) {
   Function *Caller = CS.getInstruction()->getFunction();
-  Function *Func = getAssociatedFunctio

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)

2024-11-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)


Changes



---

Patch is 31.50 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/114726.diff


7 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+87-18) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll 
(+22-24) 
- (modified) llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll (+1-2) 
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-2) 
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-6) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+1-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll 
(+1-1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7d51412730d4d5..85500b95eec1a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute
   if (!CallerInfo || !CallerInfo->isValidState())
 return false;
 
+  /// When the caller AA is in its initial state, the state remains valid
+  /// but awaits propagation. We skip processing in this case. Note that we
+  /// must return true since the state is still considered valid.
+  if (CallerInfo->isAtInitialState()) {
+LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+  << Caller->getName()
+  << " is still at initial state. Skip the update.\n");
+return true;
+  }
+
   Change |=
   clampStateAndIndicateChange(this->getState(), 
CallerInfo->getState());
 
@@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
+  /// The initial state of `IntegerRangeState` represents an empty set, which
+  /// does not constitute a valid range. This empty state complicates
+  /// propagation, particularly for arithmetic operations like
+  /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
+  /// initial state during processing.
+  bool isAtInitialState() const {
+return isValidState() && getAssumed().isEmptySet();
+  }
+
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public 
AAAMDSizeRangeAttribute {
Attributor &A);
 
   ChangeStatus manifest(Attributor &A) override {
+if (isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] Still at initial state. No manifest.\n";);
+  return ChangeStatus::UNCHANGED;
+}
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 return emitAttributeIfNotDefaultAfterClamp(
@@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
 auto &InfoCache = static_cast(A.getInfoCache());
 ChangeStatus Change = ChangeStatus::UNCHANGED;
 
+Function *F = getAssociatedFunction();
+
+const auto *AAFlatWorkGroupSize = A.getAAFor(
+*this, IRPosition::function(*F), DepClassTy::REQUIRED);
+if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
+  LLVM_DEBUG(
+  dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+if (AAFlatWorkGroupSize->isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] AAAMDFlatWorkGroupSize is still at initial "
+   "state. Skip the update.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+auto CurrentWorkGroupSize = std::make_pair(
+AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
+AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
+
+auto DoUpdate = [&](std::pair WavesPerEU,
+std::pair FlatWorkGroupSize) {
+  auto [Min, Max] =
+  InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
+  ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState IRS(CR);
+  Change |= clampStateAndIndicateChange(this->getState(), IRS);
+};
+
+// // We need to clamp once if we are not at initial state, because
+// // AAAMDFlatWorkGroupSize could be updated in last iteration.
+if (!isAtInitialState()) {
+  auto CurrentWavesPerEU =
+  std::make_pair(getAssumed().getLower().getZExtValue(),
+ getAssumed().getUpper().getZExtValue() - 1);
+  DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
+}
+
 auto CheckCallSite = [&](AbstractCallSite CS) {
   Function *Caller = CS.getInstruction()->getFunction();
-  Functi

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)

2024-11-03 Thread Shilei Tian via llvm-branch-commits

shiltian wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/114726?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#114726** https://app.graphite.dev/github/pr/llvm/llvm-project/114726?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#114438** https://app.graphite.dev/github/pr/llvm/llvm-project/114438?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#114357** https://app.graphite.dev/github/pr/llvm/llvm-project/114357?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/114726
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)

2024-11-03 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/114724


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)

2024-11-03 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/114724


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)

2024-11-03 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/114746

>From dcf8feee9c8d410b42fa8bed29a15c14bb7d6d2e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 4 Nov 2024 06:58:14 +
Subject: [PATCH] [CodeGen] Move EnableSinkAndFold to TargetOptions

---
 llvm/include/llvm/CodeGen/TargetPassConfig.h | 8 
 llvm/include/llvm/Target/TargetOptions.h | 8 +++-
 llvm/lib/CodeGen/MachineSink.cpp | 5 -
 llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +-
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 4 ++--
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h 
b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 2f5951e3ec3bce..b395774b14c441 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -131,11 +131,6 @@ class TargetPassConfig : public ImmutablePass {
   /// Default setting for -enable-tail-merge on this target.
   bool EnableTailMerge = true;
 
-  /// Enable sinking of instructions in MachineSink where a computation can be
-  /// folded into the addressing mode of a memory load/store instruction or
-  /// replace a copy.
-  bool EnableSinkAndFold = false;
-
   /// Require processing of functions such that callees are generated before
   /// callers.
   bool RequireCodeGenSCCOrder = false;
@@ -198,9 +193,6 @@ class TargetPassConfig : public ImmutablePass {
   bool getEnableTailMerge() const { return EnableTailMerge; }
   void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); }
 
-  bool getEnableSinkAndFold() const { return EnableSinkAndFold; }
-  void setEnableSinkAndFold(bool Enable) { setOpt(EnableSinkAndFold, Enable); }
-
   bool requiresCodeGenSCCOrder() const { return RequireCodeGenSCCOrder; }
   void setRequiresCodeGenSCCOrder(bool Enable = true) {
 setOpt(RequireCodeGenSCCOrder, Enable);
diff --git a/llvm/include/llvm/Target/TargetOptions.h 
b/llvm/include/llvm/Target/TargetOptions.h
index 88f253805ca99c..b16ad5b69ff05a 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -137,7 +137,8 @@ namespace llvm {
   ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false),
   HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
   GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
-  EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
+  EnableSinkAndFold(false), EnableFastISel(false),
+  EnableGlobalISel(false), UseInitArray(false),
   DisableIntegratedAS(false), FunctionSections(false),
   DataSections(false), IgnoreXCOFFVisibility(false),
   XCOFFTracebackTable(true), UniqueSectionNames(true),
@@ -239,6 +240,11 @@ namespace llvm {
 /// they were generated. Default is true.
 unsigned StackSymbolOrdering : 1;
 
+/// EnableSinkAndFold - Enable sinking of instructions in MachineSink where
+/// a computation can be folded into the addressing mode of a memory
+/// load/store instruction or replace a copy.
+unsigned EnableSinkAndFold : 1;
+
 /// EnableFastISel - This flag enables fast-path instruction selection
 /// which trades away generated code quality in favor of reducing
 /// compile time.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index a0e09398602e9e..a3a6b24f9be2d1 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -54,6 +54,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include 
 #include 
 #include 
@@ -729,7 +730,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction 
&MF) {
   AA = &getAnalysis().getAAResults();
   RegClassInfo.runOnMachineFunction(MF);
   TargetPassConfig *PassConfig = &getAnalysis();
-  EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
+  auto &TM = PassConfig->getTM();
+  EnableSinkAndFold = TM.Options.EnableSinkAndFold;
+  // EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
 
   bool EverMadeChange = false;
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c7bd0390b65620..ee8aae4ee8bcc8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -505,7 +505,7 @@ class AArch64PassConfig : public TargetPassConfig {
   : TargetPassConfig(TM, PM) {
 if (TM.getOptLevel() != CodeGenOptLevel::None)
   substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
-setEnableSinkAndFold(EnableSinkFold);
+TM.Options.EnableSinkAndFold = EnableSinkFold;
   }
 
   AArch64TargetMachine &getAArch64TargetMachine() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
i

[llvm-branch-commits] [llvm] [SimplifyLibCalls] Add initial support for non-8-bit bytes (PR #106542)

2024-11-03 Thread Sergei Barannikov via llvm-branch-commits

https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/106542

>From 9859c07861131607e36d3de2ee0d2a9980b8d6da Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Wed, 28 Aug 2024 16:09:44 +0300
Subject: [PATCH] [SimplifyLibCalls] Add initial support for non-8-bit bytes

The patch makes CharWidth argument of `getStringLength` mandatory
and ensures the correct values are passed in most cases.
This is *not* a complete support for unusual byte widths in
SimplifyLibCalls since `getConstantStringInfo` returns false for those.
The code guarded by `getConstantStringInfo` returning true is unchanged
because the changes are currently not testable.
---
 llvm/include/llvm/Analysis/ValueTracking.h|   4 +-
 .../llvm/Transforms/Utils/SimplifyLibCalls.h  |   4 +-
 llvm/lib/Analysis/MemoryBuiltins.cpp  |   3 +-
 llvm/lib/Analysis/ValueTracking.cpp   |  40 ++--
 .../InstCombine/InstCombineCalls.cpp  |  12 +-
 .../InstCombine/InstructionCombining.cpp  |   5 +-
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 194 --
 .../InstCombine/SimplifyLibCalls/fputs-b16.ll |  19 ++
 .../SimplifyLibCalls/fwrite-b16.ll|  19 ++
 .../SimplifyLibCalls/memchr-b16.ll|  34 +++
 .../SimplifyLibCalls/memcmp-b32.ll|  32 +++
 .../SimplifyLibCalls/memcpy-b16.ll|  69 +++
 .../SimplifyLibCalls/memcpy_chk-b16.ll|  17 ++
 .../SimplifyLibCalls/mempcpy-b16.ll   |  17 ++
 .../SimplifyLibCalls/memrchr-b16.ll   |  20 ++
 .../SimplifyLibCalls/memset-b16.ll|  66 ++
 .../SimplifyLibCalls/stpcpy-b16.ll|  31 +++
 .../SimplifyLibCalls/stpcpy_chk-b16.ll|  44 
 .../SimplifyLibCalls/stpncpy-b16.ll   |  47 +
 .../SimplifyLibCalls/strcat-b16.ll|  20 ++
 .../SimplifyLibCalls/strchr-b16.ll|  45 
 .../SimplifyLibCalls/strcmp-b32.ll|  50 +
 .../SimplifyLibCalls/strcpy-b16.ll|  18 ++
 .../SimplifyLibCalls/strcpy_chk-b16.ll|  30 +++
 .../SimplifyLibCalls/strlcpy-b16.ll   |  18 ++
 .../SimplifyLibCalls/strlen-b16.ll|  16 ++
 .../SimplifyLibCalls/strncat-b16.ll   |  20 ++
 .../SimplifyLibCalls/strncmp-b32.ll   |  34 +++
 .../SimplifyLibCalls/strncpy-b16.ll   |  43 
 .../SimplifyLibCalls/strndup-b16.ll   |  17 ++
 .../SimplifyLibCalls/strnlen-b16.ll   |  18 ++
 .../SimplifyLibCalls/wcslen-b16.ll|  19 ++
 llvm/test/Transforms/InstCombine/bcmp-1.ll|   2 +-
 llvm/test/Transforms/InstCombine/memcmp-1.ll  |   2 +-
 llvm/test/Transforms/InstCombine/strncmp-1.ll |   2 +-
 35 files changed, 930 insertions(+), 101 deletions(-)
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/fputs-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/fwrite-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memchr-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcmp-b32.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy_chk-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/mempcpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memrchr-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/memset-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy_chk-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpncpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcat-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strchr-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcmp-b32.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy_chk-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlcpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlen-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncat-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncmp-b32.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncpy-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strndup-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/strnlen-b16.ll
 create mode 100644 
llvm/test/Transforms/InstCombine/SimplifyLibCalls/wcslen-b16.ll

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h 
b/llvm/include/llvm/Analysis/ValueTracking.h
index 76f

[llvm-branch-commits] [clang] [llvm] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (PR #114577)

2024-11-03 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114577

>From a931d1ad84429798fe01ec76dc77cd221f03d2d4 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Fri, 1 Nov 2024 12:39:52 -0400
Subject: [PATCH] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline

---
 clang/lib/CodeGen/BackendUtil.cpp | 22 +
 llvm/include/llvm/Passes/PassBuilder.h| 20 +++-
 llvm/lib/Passes/PassBuilderPipelines.cpp  | 24 +++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++---
 .../CodeGen/AMDGPU/print-pipeline-passes.ll   |  1 +
 llvm/tools/opt/NewPMDriver.cpp|  4 ++--
 6 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 47a30f00612eb7..70035a5e069a90 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const 
LangOptions &LangOpts,
 
   // Ensure we lower KCFI operand bundles with -O0.
   PB.registerOptimizerLastEPCallback(
-  [&](ModulePassManager &MPM, OptimizationLevel Level) {
+  [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) 
{
 if (Level == OptimizationLevel::O0 &&
 LangOpts.Sanitize.has(SanitizerKind::KCFI))
   MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
@@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const 
LangOptions &LangOpts,
 static void addSanitizers(const Triple &TargetTriple,
   const CodeGenOptions &CodeGenOpts,
   const LangOptions &LangOpts, PassBuilder &PB) {
-  auto SanitizersCallback = [&](ModulePassManager &MPM,
-OptimizationLevel Level) {
+  auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel 
Level,
+ThinOrFullLTOPhase) {
 if (CodeGenOpts.hasSanitizeCoverage()) {
   auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
   MPM.addPass(SanitizerCoveragePass(
@@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple,
   };
   if (ClSanitizeOnOptimizerEarlyEP) {
 PB.registerOptimizerEarlyEPCallback(
-[SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
+[SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
   ModulePassManager NewMPM;
-  SanitizersCallback(NewMPM, Level);
+  SanitizersCallback(NewMPM, Level, Phase);
   if (!NewMPM.isEmpty()) {
 // Sanitizers can abandon.
 NewMPM.addPass(RequireAnalysisPass());
@@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
 // TODO: Consider passing the MemoryProfileOutput to the pass builder via
 // the PGOOptions, and set this up there.
 if (!CodeGenOpts.MemoryProfileOutput.empty()) {
-  PB.registerOptimizerLastEPCallback(
-  [](ModulePassManager &MPM, OptimizationLevel Level) {
-MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
-MPM.addPass(ModuleMemProfilerPass());
-  });
+  PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM,
+OptimizationLevel Level,
+ThinOrFullLTOPhase) {
+MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+MPM.addPass(ModuleMemProfilerPass());
+  });
 }
 
 if (CodeGenOpts.FatLTO) {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h 
b/llvm/include/llvm/Passes/PassBuilder.h
index 565fd2ab2147e5..e7bc3a58f414f1 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -490,7 +490,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations before the function
   /// optimization pipeline.
   void registerOptimizerEarlyEPCallback(
-  const std::function &C) {
+  const std::function &C) {
 OptimizerEarlyEPCallbacks.push_back(C);
   }
 
@@ -499,7 +500,8 @@ class PassBuilder {
   /// This extension point allows adding optimizations at the very end of the
   /// function optimization pipeline.
   void registerOptimizerLastEPCallback(
-  const std::function &C) {
+  const std::function &C) {
 OptimizerLastEPCallbacks.push_back(C);
   }
 
@@ -630,9 +632,11 @@ class PassBuilder {
   void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
 OptimizationLevel Level);
   void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
-   OptimizationLevel Level);
+   OptimizationLevel Level,
+   ThinOrFullLTOPhase Phase);
   void invo

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)

2024-11-03 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114438

>From 79f88836a79e63069eb6b7b58fa376bcd2b32303 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 31 Oct 2024 12:49:07 -0400
Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor
 existing attribute

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 81 +++
 .../annotate-kernel-features-hsa-call.ll  | 46 ++-
 .../AMDGPU/attributor-loop-issue-58639.ll |  3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|  3 +-
 .../CodeGen/AMDGPU/propagate-waves-per-eu.ll  | 59 +++---
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |  9 ++-
 .../AMDGPU/uniform-work-group-multistep.ll|  3 +-
 .../uniform-work-group-recursion-test.ll  |  2 +-
 8 files changed, 113 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 376f2067582f32..7d51412730d4d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -201,6 +201,19 @@ class AMDGPUInformationCache : public InformationCache {
 return ST.getWavesPerEU(F, FlatWorkGroupSize);
   }
 
+  std::optional>
+  getWavesPerEUAttr(const Function &F) {
+auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
+   /*OnlyFirstRequired=*/true);
+if (!Val)
+  return std::nullopt;
+if (!Val->second) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+  Val->second = ST.getMaxWavesPerEU();
+}
+return std::make_pair(Val->first, *(Val->second));
+  }
+
   std::pair
   getEffectiveWavesPerEU(const Function &F,
  std::pair WavesPerEU,
@@ -771,22 +784,6 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
-  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
- unsigned Max) {
-// Don't add the attribute if it's the implied default.
-if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
-  return ChangeStatus::UNCHANGED;
-
-Function *F = getAssociatedFunction();
-LLVMContext &Ctx = F->getContext();
-SmallString<10> Buffer;
-raw_svector_ostream OS(Buffer);
-OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
-return A.manifestAttrs(getIRPosition(),
-   {Attribute::get(Ctx, AttrName, OS.str())},
-   /*ForceReplace=*/true);
-  }
-
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -883,29 +880,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
   : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
 
-  bool isValidState() const override {
-return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
-  }
-
   void initialize(Attributor &A) override {
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 
-if (const auto *AssumedGroupSize = A.getAAFor(
-*this, IRPosition::function(*F), DepClassTy::REQUIRED);
-AssumedGroupSize->isValidState()) {
+auto TakeRange = [&](std::pair R) {
+  auto [Min, Max] = R;
+  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState RangeState(Range);
+  clampStateAndIndicateChange(this->getState(), RangeState);
+  indicateOptimisticFixpoint();
+};
 
-  unsigned Min, Max;
-  std::tie(Min, Max) = InfoCache.getWavesPerEU(
-  *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-   AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+std::pair MaxWavesPerEURange{
+1U, InfoCache.getMaxWavesPerEU(*F)};
 
-  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-  intersectKnown(Range);
+// If the attribute exists, we will honor it if it is not the default.
+if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+  if (*Attr != MaxWavesPerEURange) {
+TakeRange(*Attr);
+return;
+  }
 }
 
-if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-  indicatePessimisticFixpoint();
+// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
+// calculation of waves per EU involves flat work group size, we can't
+// simply use an assumed flat work group size as a start point, because the
+// update of flat work group size is in an inverse direction of waves per
+// EU. However, we can still do something if it is an entry function. Since
+// an entry function is a terminal node, and flat work group size either
+// from attribute or default will be used anyway, we can take that value 
and
+// calculate the waves per EU based on it. This resu

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)

2024-11-03 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian created 
https://github.com/llvm/llvm-project/pull/114726

None

>From b7612eddae0b0808f82bb8bc2e6fd6e34361ae5c Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 3 Nov 2024 19:35:26 -0500
Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial
 state

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 105 +++---
 .../annotate-kernel-features-hsa-call.ll  |  46 
 .../AMDGPU/attributor-loop-issue-58639.ll |   3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|   3 +-
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |  11 +-
 .../AMDGPU/uniform-work-group-multistep.ll|   3 +-
 .../uniform-work-group-recursion-test.ll  |   2 +-
 7 files changed, 118 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7d51412730d4d5..85500b95eec1a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute
   if (!CallerInfo || !CallerInfo->isValidState())
 return false;
 
+  /// When the caller AA is in its initial state, the state remains valid
+  /// but awaits propagation. We skip processing in this case. Note that we
+  /// must return true since the state is still considered valid.
+  if (CallerInfo->isAtInitialState()) {
+LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+  << Caller->getName()
+  << " is still at initial state. Skip the update.\n");
+return true;
+  }
+
   Change |=
   clampStateAndIndicateChange(this->getState(), 
CallerInfo->getState());
 
@@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
+  /// The initial state of `IntegerRangeState` represents an empty set, which
+  /// does not constitute a valid range. This empty state complicates
+  /// propagation, particularly for arithmetic operations like
+  /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
+  /// initial state during processing.
+  bool isAtInitialState() const {
+return isValidState() && getAssumed().isEmptySet();
+  }
+
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public 
AAAMDSizeRangeAttribute {
Attributor &A);
 
   ChangeStatus manifest(Attributor &A) override {
+if (isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] Still at initial state. No manifest.\n";);
+  return ChangeStatus::UNCHANGED;
+}
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 return emitAttributeIfNotDefaultAfterClamp(
@@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
 auto &InfoCache = static_cast(A.getInfoCache());
 ChangeStatus Change = ChangeStatus::UNCHANGED;
 
+Function *F = getAssociatedFunction();
+
+const auto *AAFlatWorkGroupSize = A.getAAFor(
+*this, IRPosition::function(*F), DepClassTy::REQUIRED);
+if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
+  LLVM_DEBUG(
+  dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+if (AAFlatWorkGroupSize->isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] AAAMDFlatWorkGroupSize is still at initial "
+   "state. Skip the update.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+auto CurrentWorkGroupSize = std::make_pair(
+AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
+AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
+
+auto DoUpdate = [&](std::pair WavesPerEU,
+std::pair FlatWorkGroupSize) {
+  auto [Min, Max] =
+  InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
+  ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState IRS(CR);
+  Change |= clampStateAndIndicateChange(this->getState(), IRS);
+};
+
+// // We need to clamp once if we are not at initial state, because
+// // AAAMDFlatWorkGroupSize could be updated in last iteration.
+if (!isAtInitialState()) {
+  auto CurrentWavesPerEU =
+  std::make_pair(getAssumed().getLower().getZExtValue(),
+ getAssumed().getUpper().getZExtValue() - 1);
+  DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
+}
+
 auto CheckCallSite = [&](AbstractCallSite CS) {
   Function *Caller = CS.getInstruction()->getFunction();
-  Function *Func = getAssociatedF

[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-11-03 Thread Pengcheng Wang via llvm-branch-commits


@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind 
optsize {
 ; CHECK-RV32:   # %bb.0: # %entry
 ; CHECK-RV32-NEXT:addi sp, sp, -16
 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:call bcmp
+; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:addi sp, sp, 16
+; CHECK-RV32-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-V:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-V-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-V-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a

[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind 
optsize {
 ; CHECK-RV32:   # %bb.0: # %entry
 ; CHECK-RV32-NEXT:addi sp, sp, -16
 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:call bcmp
+; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:addi sp, sp, 16
+; CHECK-RV32-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-V:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-V-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-V-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a

[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-11-03 Thread Craig Topper via llvm-branch-commits


@@ -315,967 +3233,10985 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind 
optsize {
 ; CHECK-RV32:   # %bb.0: # %entry
 ; CHECK-RV32-NEXT:addi sp, sp, -16
 ; CHECK-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:li a2, 31
+; CHECK-RV32-NEXT:call bcmp
+; CHECK-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:addi sp, sp, 16
+; CHECK-RV32-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-ZBKB-NEXT:ret
+;
+; CHECK-ALIGNED-RV64-V-LABEL: bcmp_size_31:
+; CHECK-ALIGNED-RV64-V:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, -16
+; CHECK-ALIGNED-RV64-V-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-ALIGNED-RV64-V-NEXT:li a2, 31
+; CHECK-ALIGNED-RV64-V-NEXT:call bcmp
+; CHECK-ALIGNED-RV64-V-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-ALIGNED-RV64-V-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-V-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a4, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:or a0, a2, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:snez a0, a0
+; CHECK-UNALIGNED-RV64-ZBB-NEXT:ret
+;
+; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_31:
+; CHECK-UNALIGNED-RV64-ZBKB:   # %bb.0: # %entry
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a3, 8(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a4, 16(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a0, 23(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a5, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a6, 8(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a7, 16(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:ld a1, 23(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a2, a2, a5
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a3, a3, a6
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a4, a4, a7
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:xor a0, a0, a1
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a2, a2, a3
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT:or a0, a4, a

[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)

2024-11-03 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka edited 
https://github.com/llvm/llvm-project/pull/114724
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (PR #114577)

2024-11-03 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/114577
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)

2024-11-03 Thread via llvm-branch-commits

https://github.com/wangleiat created 
https://github.com/llvm/llvm-project/pull/114742

This patch adds desired feature flags in JIT compiler to enable
hard-float instructions if target supports them and allows to use floats
and doubles in lldb expressions.

Fited tests:
lldb-shell :: Expr/TestAnonNamespaceParamFunc.cpp
lldb-shell :: Expr/TestIRMemoryMap.test
lldb-shell :: Expr/TestStringLiteralExpr.test
lldb-shell :: SymbolFile/DWARF/debug-types-expressions.test

Similar as #99336
Depens on: https://github.com/llvm/llvm-project/pull/114741



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb][LoongArch] Function calls support in lldb expressions (PR #114742)

2024-11-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lldb

Author: wanglei (wangleiat)


Changes

This patch adds desired feature flags in JIT compiler to enable
hard-float instructions if target supports them and allows to use floats
and doubles in lldb expressions.

Fited tests:
lldb-shell :: Expr/TestAnonNamespaceParamFunc.cpp
lldb-shell :: Expr/TestIRMemoryMap.test
lldb-shell :: Expr/TestStringLiteralExpr.test
lldb-shell :: SymbolFile/DWARF/debug-types-expressions.test

Similar as #99336
Depens on: https://github.com/llvm/llvm-project/pull/114741


---

Patch is 31.85 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/114742.diff


7 Files Affected:

- (modified) lldb/include/lldb/Utility/ArchSpec.h (+9) 
- (modified) lldb/source/Plugins/ABI/CMakeLists.txt (+1-1) 
- (added) lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp (+664) 
- (added) lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.h (+104) 
- (added) lldb/source/Plugins/ABI/LoongArch/CMakeLists.txt (+12) 
- (modified) 
lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp (+21) 
- (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (+17) 


``diff
diff --git a/lldb/include/lldb/Utility/ArchSpec.h 
b/lldb/include/lldb/Utility/ArchSpec.h
index 5990f984b09e2d..2a74058673bae5 100644
--- a/lldb/include/lldb/Utility/ArchSpec.h
+++ b/lldb/include/lldb/Utility/ArchSpec.h
@@ -107,6 +107,15 @@ class ArchSpec {
 eRISCVSubType_riscv64,
   };
 
+  enum LoongArcheflags {
+eLoongArch_abi_soft_float = 0x, /// soft float
+eLoongArch_abi_single_float =
+0x0001, /// single precision floating point, +f
+eLoongArch_abi_double_float =
+0x0002, /// double precision floating point, +d
+eLoongArch_abi_mask = 0x0003,
+  };
+
   enum LoongArchSubType {
 eLoongArchSubType_unknown,
 eLoongArchSubType_loongarch32,
diff --git a/lldb/source/Plugins/ABI/CMakeLists.txt 
b/lldb/source/Plugins/ABI/CMakeLists.txt
index 9241a2487d522f..e33ac87354860e 100644
--- a/lldb/source/Plugins/ABI/CMakeLists.txt
+++ b/lldb/source/Plugins/ABI/CMakeLists.txt
@@ -1,4 +1,4 @@
-foreach(target AArch64 ARM ARC Hexagon Mips MSP430 PowerPC RISCV SystemZ X86)
+foreach(target AArch64 ARM ARC Hexagon LoongArch Mips MSP430 PowerPC RISCV 
SystemZ X86)
   if (${target} IN_LIST LLVM_TARGETS_TO_BUILD)
 add_subdirectory(${target})
   endif()
diff --git a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp 
b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp
new file mode 100644
index 00..cd8270c01113f7
--- /dev/null
+++ b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp
@@ -0,0 +1,664 @@
+//===-- 
ABISysV_loongarch.cpp--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "ABISysV_loongarch.h"
+
+#include 
+#include 
+#include 
+
+#include "llvm/IR/DerivedTypes.h"
+
+#include "Utility/LoongArch_DWARF_Registers.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Core/Value.h"
+#include "lldb/Target/RegisterContext.h"
+#include "lldb/Target/StackFrame.h"
+#include "lldb/Target/Thread.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/RegisterValue.h"
+#include "lldb/ValueObject/ValueObjectConstResult.h"
+
+#define DEFINE_REG_NAME(reg_num) ConstString(#reg_num).GetCString()
+#define DEFINE_REG_NAME_STR(reg_name) ConstString(reg_name).GetCString()
+
+// The ABI is not a source of such information as size, offset, encoding, etc.
+// of a register. Just provides correct dwarf and eh_frame numbers.
+
+#define DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, generic_num) 
\
+  {
\
+  DEFINE_REG_NAME(dwarf_num),  
\
+  DEFINE_REG_NAME_STR(str_name),   
\
+  0,   
\
+  0,   
\
+  eEncodingInvalid,
\
+  eFormatDefault,  
\
+  {dwarf_num, dwarf_num, generic_num, LLDB_INVALID_REGNUM, dwarf_num}, 
\
+  nullptr, 
\
+  nullptr, 
\
+  nullptr, 
\
+  }
+
+#define DEFINE_REGISTER_STUB(dwarf_num, str_name)  
\
+  DEFINE_GENERIC_REGISTER_STUB(dwarf_num, str_name, LLDB_INVALID_REGNUM

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)

2024-11-03 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/114745

None

>From 431e6371f161d0f85c598c789902976e3fa74162 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 30 Oct 2024 04:59:30 +
Subject: [PATCH] [CodeGen][NewPM] Port MachineCycleInfo to NPM

---
 .../llvm/CodeGen/MachineCycleAnalysis.h   | 18 ++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  3 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 34 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/X86/cycle-info.mir  |  2 ++
 7 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h 
b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
index 1888dd053ce65ee..f740a9599edf292 100644
--- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/GenericCycleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineSSAContext.h"
 
 namespace llvm {
@@ -46,6 +47,23 @@ class MachineCycleInfoWrapperPass : public 
MachineFunctionPass {
 //   version.
 bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);
 
+class MachineCycleAnalysis : public AnalysisInfoMixin {
+  friend AnalysisInfoMixin;
+  static AnalysisKey Key;
+
+public:
+  using Result = MachineCycleInfo;
+
+  Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
+class MachineCycleInfoPrinterPass : public 
PassInfoMixin {
+  raw_ostream &OS;
+  public:
+explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
+PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager 
&MFAM);
+};
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 54c070401ec8a40..b040e7c096d1f51 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -191,7 +191,7 @@ void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSELegacyPass(PassRegistry &);
 void initializeMachineCombinerPass(PassRegistry &);
 void initializeMachineCopyPropagationPass(PassRegistry &);
-void initializeMachineCycleInfoPrinterPassPass(PassRegistry &);
+void initializeMachineCycleInfoPrinterLegacyPass(PassRegistry &);
 void initializeMachineCycleInfoWrapperPassPass(PassRegistry &);
 void initializeMachineDominanceFrontierPass(PassRegistry &);
 void initializeMachineDominatorTreeWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 9d12a120ff7ac6d..497bc97455a1b55 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -101,6 +101,7 @@ MACHINE_FUNCTION_ANALYSIS("live-vars", 
LiveVariablesAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-block-freq", 
MachineBlockFrequencyAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-branch-prob",
   MachineBranchProbabilityAnalysis())
+MACHINE_FUNCTION_ANALYSIS("machine-cycles", MachineCycleAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-opt-remark-emitter",
@@ -151,6 +152,7 @@ MACHINE_FUNCTION_PASS("print",
 MACHINE_FUNCTION_PASS("print",
   MachineDominatorTreePrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("print", MachineLoopPrinterPass(dbgs()))
+MACHINE_FUNCTION_PASS("print-machine-cycles", 
MachineCycleInfoPrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("print",
   MachinePostDominatorTreePrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs()))
@@ -241,7 +243,6 @@ DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", 
PostRASchedulerPass)
 DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass)
 DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass)
-DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", 
MachineCycleInfoPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", 
MachineUniformityInfoPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
 DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 39fba1d0b527ef6..adddb8daaa0e914 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -78,7 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineCSELegacyPass(Registry);
   ini

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)

2024-11-03 Thread Akshat Oke via llvm-branch-commits

optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#114746** https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#114745** https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#114027** https://app.graphite.dev/github/pr/llvm/llvm-project/114027?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @optimisan and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/114745
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)

2024-11-03 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/114746

None

>From 40df066d3c32cdeab9927787f201e0b8a72af0bb Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 4 Nov 2024 06:58:14 +
Subject: [PATCH] [CodeGen] Move EnableSinkAndFold to TargetOptions

---
 llvm/include/llvm/CodeGen/TargetPassConfig.h | 8 
 llvm/include/llvm/Target/TargetOptions.h | 8 +++-
 llvm/lib/CodeGen/MachineSink.cpp | 5 -
 llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +-
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 5 +++--
 5 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h 
b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 2f5951e3ec3bce..b395774b14c441 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -131,11 +131,6 @@ class TargetPassConfig : public ImmutablePass {
   /// Default setting for -enable-tail-merge on this target.
   bool EnableTailMerge = true;
 
-  /// Enable sinking of instructions in MachineSink where a computation can be
-  /// folded into the addressing mode of a memory load/store instruction or
-  /// replace a copy.
-  bool EnableSinkAndFold = false;
-
   /// Require processing of functions such that callees are generated before
   /// callers.
   bool RequireCodeGenSCCOrder = false;
@@ -198,9 +193,6 @@ class TargetPassConfig : public ImmutablePass {
   bool getEnableTailMerge() const { return EnableTailMerge; }
   void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); }
 
-  bool getEnableSinkAndFold() const { return EnableSinkAndFold; }
-  void setEnableSinkAndFold(bool Enable) { setOpt(EnableSinkAndFold, Enable); }
-
   bool requiresCodeGenSCCOrder() const { return RequireCodeGenSCCOrder; }
   void setRequiresCodeGenSCCOrder(bool Enable = true) {
 setOpt(RequireCodeGenSCCOrder, Enable);
diff --git a/llvm/include/llvm/Target/TargetOptions.h 
b/llvm/include/llvm/Target/TargetOptions.h
index 88f253805ca99c..b16ad5b69ff05a 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -137,7 +137,8 @@ namespace llvm {
   ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false),
   HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
   GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
-  EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
+  EnableSinkAndFold(false), EnableFastISel(false),
+  EnableGlobalISel(false), UseInitArray(false),
   DisableIntegratedAS(false), FunctionSections(false),
   DataSections(false), IgnoreXCOFFVisibility(false),
   XCOFFTracebackTable(true), UniqueSectionNames(true),
@@ -239,6 +240,11 @@ namespace llvm {
 /// they were generated. Default is true.
 unsigned StackSymbolOrdering : 1;
 
+/// EnableSinkAndFold - Enable sinking of instructions in MachineSink where
+/// a computation can be folded into the addressing mode of a memory
+/// load/store instruction or replace a copy.
+unsigned EnableSinkAndFold : 1;
+
 /// EnableFastISel - This flag enables fast-path instruction selection
 /// which trades away generated code quality in favor of reducing
 /// compile time.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index a0e09398602e9e..a3a6b24f9be2d1 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -54,6 +54,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include 
 #include 
 #include 
@@ -729,7 +730,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction 
&MF) {
   AA = &getAnalysis().getAAResults();
   RegClassInfo.runOnMachineFunction(MF);
   TargetPassConfig *PassConfig = &getAnalysis();
-  EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
+  auto &TM = PassConfig->getTM();
+  EnableSinkAndFold = TM.Options.EnableSinkAndFold;
+  // EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
 
   bool EverMadeChange = false;
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c7bd0390b65620..b077f85fc760b8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -505,7 +505,7 @@ class AArch64PassConfig : public TargetPassConfig {
   : TargetPassConfig(TM, PM) {
 if (TM.getOptLevel() != CodeGenOptLevel::None)
   substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
-setEnableSinkAndFold(EnableSinkFold);
+getAArch64TargetMachine().Options.EnableSinkAndFold = true;
   }
 
   AArch64TargetMachine &getAArch64TargetMachine() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp 
b/llvm/lib/Target/RISCV/RISC

[llvm-branch-commits] [llvm] [CodeGen] Move EnableSinkAndFold to TargetOptions (PR #114746)

2024-11-03 Thread Akshat Oke via llvm-branch-commits

optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#114746** https://app.graphite.dev/github/pr/llvm/llvm-project/114746?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#114745** https://app.graphite.dev/github/pr/llvm/llvm-project/114745?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#114027** https://app.graphite.dev/github/pr/llvm/llvm-project/114027?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @optimisan and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/114746
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port MachineCycleInfo to NPM (PR #114745)

2024-11-03 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff dbedca0302d5624554ed7ff4b45c019ad5972166 
431e6371f161d0f85c598c789902976e3fa74162 --extensions h,cpp -- 
llvm/include/llvm/CodeGen/MachineCycleAnalysis.h 
llvm/include/llvm/InitializePasses.h llvm/lib/CodeGen/CodeGen.cpp 
llvm/lib/CodeGen/MachineCycleAnalysis.cpp llvm/lib/Passes/PassBuilder.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h 
b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
index f740a9599e..64cf30e6dd 100644
--- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
@@ -57,11 +57,14 @@ public:
   Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
 };
 
-class MachineCycleInfoPrinterPass : public 
PassInfoMixin {
+class MachineCycleInfoPrinterPass
+: public PassInfoMixin {
   raw_ostream &OS;
-  public:
-explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
-PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager 
&MFAM);
+
+public:
+  explicit MachineCycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp 
b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
index 82d7aadaca..6e58439960 100644
--- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -56,7 +56,9 @@ void MachineCycleInfoWrapperPass::releaseMemory() {
 
 AnalysisKey MachineCycleAnalysis::Key;
 
-MachineCycleInfo MachineCycleAnalysis::run(MachineFunction &MF, 
MachineFunctionAnalysisManager &MFAM) {
+MachineCycleInfo
+MachineCycleAnalysis::run(MachineFunction &MF,
+  MachineFunctionAnalysisManager &MFAM) {
   MachineCycleInfo MCI;
   MCI.compute(MF);
   return MCI;
@@ -99,7 +101,9 @@ bool 
MachineCycleInfoPrinterLegacy::runOnMachineFunction(MachineFunction &F) {
   return false;
 }
 
-PreservedAnalyses MachineCycleInfoPrinterPass::run(MachineFunction &MF, 
MachineFunctionAnalysisManager &MFAM) {
+PreservedAnalyses
+MachineCycleInfoPrinterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
   OS << "MachineCycleInfo for function: " << MF.getName() << "\n";
 
   auto &MCI = MFAM.getResult(MF);

``




https://github.com/llvm/llvm-project/pull/114745
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)

2024-11-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes

In C++ it's UB to use undeclared values as enum.
And there is support `__ATOMIC_HLE_ACQUIRE` and
`__ATOMIC_HLE_RELEASE` need such values.

Internal implementation was switched to `class
enum`, where that behavior is defined. But
interface is C, so we just switch to `int`.


---

Patch is 32.28 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/114724.diff


4 Files Affected:

- (modified) compiler-rt/include/sanitizer/tsan_interface_atomic.h (+82-87) 
- (modified) compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp (+59-57) 
- (modified) compiler-rt/lib/tsan/rtl/tsan_interface.h (+8-8) 
- (modified) compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp (+46-32) 


``diff
diff --git a/compiler-rt/include/sanitizer/tsan_interface_atomic.h 
b/compiler-rt/include/sanitizer/tsan_interface_atomic.h
index de3a1c3936097d..74ed91efade040 100644
--- a/compiler-rt/include/sanitizer/tsan_interface_atomic.h
+++ b/compiler-rt/include/sanitizer/tsan_interface_atomic.h
@@ -43,183 +43,178 @@ typedef enum {
 } __tsan_memory_order;
 
 __tsan_atomic8 SANITIZER_CDECL
-__tsan_atomic8_load(const volatile __tsan_atomic8 *a, __tsan_memory_order mo);
+__tsan_atomic8_load(const volatile __tsan_atomic8 *a, int mo);
 __tsan_atomic16 SANITIZER_CDECL
-__tsan_atomic16_load(const volatile __tsan_atomic16 *a, __tsan_memory_order 
mo);
+__tsan_atomic16_load(const volatile __tsan_atomic16 *a, int mo);
 __tsan_atomic32 SANITIZER_CDECL
-__tsan_atomic32_load(const volatile __tsan_atomic32 *a, __tsan_memory_order 
mo);
+__tsan_atomic32_load(const volatile __tsan_atomic32 *a, int mo);
 __tsan_atomic64 SANITIZER_CDECL
-__tsan_atomic64_load(const volatile __tsan_atomic64 *a, __tsan_memory_order 
mo);
+__tsan_atomic64_load(const volatile __tsan_atomic64 *a, int mo);
 #if __TSAN_HAS_INT128
-__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_load(
-const volatile __tsan_atomic128 *a, __tsan_memory_order mo);
+__tsan_atomic128 SANITIZER_CDECL
+__tsan_atomic128_load(const volatile __tsan_atomic128 *a, int mo);
 #endif
 
 void SANITIZER_CDECL __tsan_atomic8_store(volatile __tsan_atomic8 *a,
-  __tsan_atomic8 v,
-  __tsan_memory_order mo);
+  __tsan_atomic8 v, int mo);
 void SANITIZER_CDECL __tsan_atomic16_store(volatile __tsan_atomic16 *a,
-   __tsan_atomic16 v,
-   __tsan_memory_order mo);
+   __tsan_atomic16 v, int mo);
 void SANITIZER_CDECL __tsan_atomic32_store(volatile __tsan_atomic32 *a,
-   __tsan_atomic32 v,
-   __tsan_memory_order mo);
+   __tsan_atomic32 v, int mo);
 void SANITIZER_CDECL __tsan_atomic64_store(volatile __tsan_atomic64 *a,
-   __tsan_atomic64 v,
-   __tsan_memory_order mo);
+   __tsan_atomic64 v, int mo);
 #if __TSAN_HAS_INT128
 void SANITIZER_CDECL __tsan_atomic128_store(volatile __tsan_atomic128 *a,
-__tsan_atomic128 v,
-__tsan_memory_order mo);
+__tsan_atomic128 v, int mo);
 #endif
 
-__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_exchange(
-volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
+__tsan_atomic8 SANITIZER_CDECL
+__tsan_atomic8_exchange(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
 __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_exchange(
-volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
+volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
 __tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_exchange(
-volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
+volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
 __tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_exchange(
-volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
+volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
 #if __TSAN_HAS_INT128
 __tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_exchange(
-volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
+volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
 #endif
 
-__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_add(
-volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
+__tsan_atomic8 SANITIZER_CDECL
+__tsan_atomic8_fetch_add(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
 __tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_add(
-volatile __tsan_atomic16 

[llvm-branch-commits] [tsan] Don't use `enum __tsan_memory_order` in tsan interface (PR #114724)

2024-11-03 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/114724

In C++ it's UB to use undeclared values as enum.
And there is support `__ATOMIC_HLE_ACQUIRE` and
`__ATOMIC_HLE_RELEASE` need such values.

Internal implementation was switched to `class
enum`, where that behavior is defined. But
interface is C, so we just switch to `int`.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits