[llvm-branch-commits] [libcxx] release/21.x: [libc++] Add checks for misused hardening macros (#150669) (PR #151582)

2025-08-10 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 approved this pull request.


https://github.com/llvm/llvm-project/pull/151582
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/21.x: [libc++] Add checks for misused hardening macros (#150669) (PR #151582)

2025-08-10 Thread Nikolas Klauser via llvm-branch-commits

philnik777 wrote:

> Who can review?
> 
> btw what's up with the buildkite ci never ending?

I don't know; I've pinged the owner. I don't think it should be blocking though.

https://github.com/llvm/llvm-project/pull/151582
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Renato Golin via llvm-branch-commits

https://github.com/rengolin commented:

Looks good, some nit, but I'll let others review and approve.

https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Renato Golin via llvm-branch-commits


@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2: 
tensor) -> tenso
 }
 // CHECK-LABEL: func @detensor_op_sequence
 // CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor)
-// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]]
+// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]]
 // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]]
-// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]], 
%[[arg2_val]]
-// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]], 
%[[detensored_res]]
+// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]]
+// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]], 
%[[arg2_val]]

rengolin wrote:

CHECK-DAG can come in any order, but the op here specifies them in a particular 
order, and the two `arg1` vals have the same pattern. This may randomly fail.

https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Renato Golin via llvm-branch-commits


@@ -551,8 +567,22 @@ struct LinalgDetensorize
 populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
 
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+  opsToDetensor.erase(op);
+  detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+  for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+  }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),

rengolin wrote:

Since the callback doesn't return anything, it won't change the success/failure 
result, so this should be fine.

https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Renato Golin via llvm-branch-commits

https://github.com/rengolin edited 
https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 90c9271 - Revert "[PatternMatch] Add `m_[Shift]OrSelf` matchers. (#152924)"

2025-08-10 Thread via llvm-branch-commits

Author: Yingwei Zheng
Date: 2025-08-11T11:38:27+08:00
New Revision: 90c9271c1304f5cf1f57f397a6cb9fce9649ed96

URL: 
https://github.com/llvm/llvm-project/commit/90c9271c1304f5cf1f57f397a6cb9fce9649ed96
DIFF: 
https://github.com/llvm/llvm-project/commit/90c9271c1304f5cf1f57f397a6cb9fce9649ed96.diff

LOG: Revert "[PatternMatch] Add `m_[Shift]OrSelf` matchers. (#152924)"

This reverts commit 1c499351d682aa46c2f087a6f757d22b01d18aa7.

Added: 


Modified: 
llvm/include/llvm/IR/PatternMatch.h
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/unittests/IR/PatternMatch.cpp

Removed: 




diff  --git a/llvm/include/llvm/IR/PatternMatch.h 
b/llvm/include/llvm/IR/PatternMatch.h
index 76482ad47c771..27c5d5ca08cd6 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1327,45 +1327,6 @@ inline BinaryOp_match 
m_AShr(const LHS &L,
   return BinaryOp_match(L, R);
 }
 
-template  struct ShiftLike_match {
-  LHS_t L;
-  uint64_t &R;
-
-  ShiftLike_match(const LHS_t &LHS, uint64_t &RHS) : L(LHS), R(RHS) {}
-
-  template  bool match(OpTy *V) const {
-if (auto *Op = dyn_cast(V)) {
-  if (Op->getOpcode() == Opcode)
-return m_ConstantInt(R).match(Op->getOperand(1)) &&
-   L.match(Op->getOperand(0));
-}
-// Interpreted as shiftop V, 0
-R = 0;
-return L.match(V);
-  }
-};
-
-/// Matches shl L, ConstShAmt or L itself.
-template 
-inline ShiftLike_match m_ShlOrSelf(const LHS &L,
-  uint64_t &R) {
-  return ShiftLike_match(L, R);
-}
-
-/// Matches lshr L, ConstShAmt or L itself.
-template 
-inline ShiftLike_match m_LShrOrSelf(const LHS &L,
-uint64_t &R) {
-  return ShiftLike_match(L, R);
-}
-
-/// Matches ashr L, ConstShAmt or L itself.
-template 
-inline ShiftLike_match m_AShrOrSelf(const LHS &L,
-uint64_t &R) {
-  return ShiftLike_match(L, R);
-}
-
 template 
 struct OverflowingBinaryOp_match {

diff  --git 
a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp 
b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e3c31f96f86d9..40a7f8043034e 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -617,7 +617,7 @@ struct LoadOps {
   LoadInst *RootInsert = nullptr;
   bool FoundRoot = false;
   uint64_t LoadSize = 0;
-  uint64_t Shift = 0;
+  const APInt *Shift = nullptr;
   Type *ZextType;
   AAMDNodes AATags;
 };
@@ -627,15 +627,17 @@ struct LoadOps {
 // (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
 static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
AliasAnalysis &AA) {
-  uint64_t ShAmt2;
+  const APInt *ShAmt2 = nullptr;
   Value *X;
   Instruction *L1, *L2;
 
   // Go to the last node with loads.
-  if (match(V, m_OneUse(m_c_Or(m_Value(X),
-   m_OneUse(m_ShlOrSelf(m_OneUse(m_ZExt(m_OneUse(
-m_Instruction(L2,
-ShAmt2)) {
+  if (match(V, m_OneUse(m_c_Or(
+   m_Value(X),
+   
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2,
+  m_APInt(ShAmt2)) ||
+  match(V, m_OneUse(m_Or(m_Value(X),
+ m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2 
{
 if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
   // Avoid Partial chain merge.
   return false;
@@ -644,10 +646,11 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, 
const DataLayout &DL,
 
   // Check if the pattern has loads
   LoadInst *LI1 = LOps.Root;
-  uint64_t ShAmt1 = LOps.Shift;
+  const APInt *ShAmt1 = LOps.Shift;
   if (LOps.FoundRoot == false &&
-  match(X, m_OneUse(m_ShlOrSelf(
-   m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1, ShAmt1 {
+  (match(X, m_OneUse(m_ZExt(m_Instruction(L1 ||
+   match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1,
+   m_APInt(ShAmt1)) {
 LI1 = dyn_cast(L1);
   }
   LoadInst *LI2 = dyn_cast(L2);
@@ -723,6 +726,13 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, 
const DataLayout &DL,
   if (IsBigEndian)
 std::swap(ShAmt1, ShAmt2);
 
+  // Find Shifts values.
+  uint64_t Shift1 = 0, Shift2 = 0;
+  if (ShAmt1)
+Shift1 = ShAmt1->getZExtValue();
+  if (ShAmt2)
+Shift2 = ShAmt2->getZExtValue();
+
   // First load is always LI1. This is where we put the new load.
   // Use the merged load size available from LI1 for forward loads.
   

[llvm-branch-commits] [llvm] 5bb34ba - Revert "[NFC][MC] Removed unused switch case in `emitCATTR` (#152907)"

2025-08-10 Thread via llvm-branch-commits

Author: Abhishek Kaushik
Date: 2025-08-10T22:19:26+05:30
New Revision: 5bb34babaeb25cd133f3c1af79a5c7bb069c49d9

URL: 
https://github.com/llvm/llvm-project/commit/5bb34babaeb25cd133f3c1af79a5c7bb069c49d9
DIFF: 
https://github.com/llvm/llvm-project/commit/5bb34babaeb25cd133f3c1af79a5c7bb069c49d9.diff

LOG: Revert "[NFC][MC] Removed unused switch case in `emitCATTR` (#152907)"

This reverts commit 9247b51a02b5cf92195f6cd6e82e392619ae5235.

Added: 


Modified: 
llvm/lib/MC/MCAsmInfoGOFF.cpp

Removed: 




diff  --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp
index d35020429e346..0a5d1927b1a22 100644
--- a/llvm/lib/MC/MCAsmInfoGOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -71,6 +71,8 @@ static void emitCATTR(raw_ostream &OS, StringRef Name, 
GOFF::ESDRmode Rmode,
 case GOFF::ESD_RMODE_64:
   OS << "64";
   break;
+case GOFF::ESD_RMODE_None:
+  break;
 }
 OS << ')';
   }



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)

2025-08-10 Thread Kevin Sala Penades via llvm-branch-commits


@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t 
Bytes) {
   memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
 }
 
+struct DynCGroupMemTy {
+  void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
+Size = 0;
+Ptr = nullptr;
+IsFallback = false;

kevinsala wrote:

With the `clang::loader_uninitialized` attribute in the `DynCGroupMem` 
variable, I can't use field initializers or a constructor.

https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)

2025-08-10 Thread Kevin Sala Penades via llvm-branch-commits

https://github.com/kevinsala updated 
https://github.com/llvm/llvm-project/pull/152831

>From fa3c7425ae9e5ffea83841f2be61b0f494b99038 Mon Sep 17 00:00:00 2001
From: Kevin Sala 
Date: Fri, 8 Aug 2025 11:25:14 -0700
Subject: [PATCH 1/2] [OpenMP][Offload] Add offload runtime support for
 dyn_groupprivate clause

---
 offload/DeviceRTL/include/DeviceTypes.h   |   4 +
 offload/DeviceRTL/include/Interface.h |   2 +-
 offload/DeviceRTL/include/State.h |   2 +-
 offload/DeviceRTL/src/Kernel.cpp  |  14 +-
 offload/DeviceRTL/src/State.cpp   |  48 +-
 offload/include/Shared/APITypes.h |   6 +-
 offload/include/Shared/Environment.h  |   4 +-
 offload/include/device.h  |   3 +
 offload/include/omptarget.h   |   7 +-
 offload/libomptarget/OpenMP/API.cpp   |  14 ++
 offload/libomptarget/device.cpp   |   6 +
 offload/libomptarget/exports  |   1 +
 .../amdgpu/dynamic_hsa/hsa_ext_amd.h  |   1 +
 offload/plugins-nextgen/amdgpu/src/rtl.cpp|  34 +++--
 .../common/include/PluginInterface.h  |  33 +++-
 .../common/src/PluginInterface.cpp|  86 ---
 .../plugins-nextgen/cuda/dynamic_cuda/cuda.h  |   1 +
 offload/plugins-nextgen/cuda/src/rtl.cpp  |  37 +++--
 offload/plugins-nextgen/host/src/rtl.cpp  |   4 +-
 .../offloading/dyn_groupprivate_strict.cpp| 141 ++
 openmp/runtime/src/include/omp.h.var  |  10 ++
 openmp/runtime/src/kmp_csupport.cpp   |   9 ++
 openmp/runtime/src/kmp_stub.cpp   |  16 ++
 23 files changed, 418 insertions(+), 65 deletions(-)
 create mode 100644 offload/test/offloading/dyn_groupprivate_strict.cpp

diff --git a/offload/DeviceRTL/include/DeviceTypes.h 
b/offload/DeviceRTL/include/DeviceTypes.h
index 2e5d92380f040..a43b506d6879e 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
 
 ///}
 
+enum omp_access_t {
+  omp_access_cgroup = 0,
+};
+
 #endif
diff --git a/offload/DeviceRTL/include/Interface.h 
b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4..672afea206785 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
 int8_t __kmpc_is_spmd_exec_mode();
 
 int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
-   KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+   KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
 
 void __kmpc_target_deinit();
 
diff --git a/offload/DeviceRTL/include/State.h 
b/offload/DeviceRTL/include/State.h
index db396dae6e445..17c3c6f2d3e42 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -116,7 +116,7 @@ extern Local ThreadStates;
 
 /// Initialize the state machinery. Must be called by all threads.
 void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
-  KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+  KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
 
 /// Return the kernel and kernel launch environment associated with the current
 /// kernel. The former is static and contains compile time information that
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..58e9a09105a76 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
 };
 
 static void
-inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
-KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+  KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
   // Order is important here.
   synchronize::init(IsSPMD);
   mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
 /// \param Ident   Source location identification, can be NULL.
 ///
 int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
-   KernelLaunchEnvironmentTy &KernelLaunchEnvironment) 
{
+   KernelLaunchEnvironmentTy *KernelLaunchEnvironment) 
{
   ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
   bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
   bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
   if (IsSPMD) {
-inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
-KernelLaunchEnvironment);
+initializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
+  KernelLaunchEnvironment);
 synchronize::threadsAligned(atomic::relaxed);
   } else {
-inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
-KernelLaunchEnv

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-10 Thread Florian Hahn via llvm-branch-commits


@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
   // Map to keep track of created compares, The key is the pair of operands for
   // the compare, to allow detecting and re-using redundant compares.
   DenseMap, Value *> SeenCompares;
-  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+  Value *AliasLaneMask = nullptr;
+  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
 Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that 
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
-  continue;
+if (!VF.isScalar() && UseSafeEltsMask) {
+  Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+  Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);

fhahn wrote:

It doesn't look like there's much re-use going on here. Better to have a 
separate function than complicate the logic here?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-10 Thread Florian Hahn via llvm-branch-commits


@@ -479,7 +479,11 @@ class LoopVectorizationPlanner {
   /// Build VPlans for the specified \p UserVF and \p UserIC if they are
   /// non-zero or all applicable candidate VFs otherwise. If vectorization and
   /// interleaving should be avoided up-front, no plans are generated.
-  void plan(ElementCount UserVF, unsigned UserIC);
+  /// DiffChecks is a list of pointer pairs that should be checked for 
aliasing,

fhahn wrote:

Better to add the new recipes at the same place where the runtime checks are 
attached to VPlan, rather than threading through the general planning stage: 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp#L9359C32-L9359C51

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-10 Thread Florian Hahn via llvm-branch-commits


@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
   PartialReductionExtendKind OpBExtend, std::optional BinOp,
   TTI::TargetCostKind CostKind) const;
 
+  /// \return true if a mask should be formed that disables lanes that could
+  /// alias between two pointers. The mask is created by the
+  /// loop_dependence_{war,raw}_mask intrinsics.
+  LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;

fhahn wrote:

Could we just check the cost of the new intrinsic using the existing hooks?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch] Implement isLegalAddressingMode for lsx/lasx (PR #151917)

2025-08-10 Thread via llvm-branch-commits

https://github.com/zhaoqi5 converted_to_draft 
https://github.com/llvm/llvm-project/pull/151917
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)

2025-08-10 Thread Matt Arsenault via llvm-branch-commits


@@ -556,8 +563,45 @@ Error GenericKernelTy::launch(GenericDeviceTy 
&GenericDevice, void **ArgPtrs,
   llvm::SmallVector Args;
   llvm::SmallVector Ptrs;
 
+  uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0],
+KernelArgs.ThreadLimit[1],
+KernelArgs.ThreadLimit[2]};
+  uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
+   KernelArgs.NumTeams[2]};
+  if (!isBareMode()) {
+NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
+NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
+NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
+  }
+
+  uint32_t MaxBlockMemSize = GenericDevice.getMaxBlockSharedMemSize();
+  uint32_t DynBlockMemSize = KernelArgs.DynCGroupMem;
+  uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
+  if (StaticBlockMemSize > MaxBlockMemSize)
+return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Static block memory size exceeds maximum");

arsenm wrote:

Error messages should start with lowercase 

https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)

2025-08-10 Thread Matt Arsenault via llvm-branch-commits


@@ -556,8 +563,45 @@ Error GenericKernelTy::launch(GenericDeviceTy 
&GenericDevice, void **ArgPtrs,
   llvm::SmallVector Args;
   llvm::SmallVector Ptrs;
 
+  uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0],
+KernelArgs.ThreadLimit[1],
+KernelArgs.ThreadLimit[2]};
+  uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
+   KernelArgs.NumTeams[2]};
+  if (!isBareMode()) {
+NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
+NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
+NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
+  }
+
+  uint32_t MaxBlockMemSize = GenericDevice.getMaxBlockSharedMemSize();
+  uint32_t DynBlockMemSize = KernelArgs.DynCGroupMem;
+  uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
+  if (StaticBlockMemSize > MaxBlockMemSize)
+return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Static block memory size exceeds maximum");
+  else if (!KernelArgs.Flags.AllowDynCGroupMemFallback &&
+   TotalBlockMemSize > MaxBlockMemSize)
+return Plugin::error(
+ErrorCode::INVALID_ARGUMENT,
+"Static and dynamic block memory size exceeds maximum");

arsenm wrote:

Error messages should start with lowercase

https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-linalg

Author: Matthias Springer (matthias-springer)


Changes

The pass used to access erased operations and block arguments in the type 
converter. That is no longer supported in the new conversion driver.


---
Full diff: https://github.com/llvm/llvm-project/pull/152912.diff


2 Files Affected:

- (modified) mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp (+32-2) 
- (modified) mlir/test/Dialect/Linalg/detensorize_0d.mlir (+4-3) 


``diff
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp 
b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
index 830905495e759..221f95a8d8f33 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -458,6 +458,22 @@ struct LinalgDetensorize
 }
   };
 
+  /// A listener that forwards notifyBlockErased and notifyOperationErased to
+  /// the given callbacks.
+  struct CallbackListener : public RewriterBase::Listener {
+CallbackListener(std::function onOperationErased,
+ std::function onBlockErased)
+: onOperationErased(onOperationErased), onBlockErased(onBlockErased) {}
+
+void notifyBlockErased(Block *block) override { onBlockErased(block); }
+void notifyOperationErased(Operation *op) override {
+  onOperationErased(op);
+}
+
+std::function onOperationErased;
+std::function onBlockErased;
+  };
+
   void runOnOperation() override {
 MLIRContext *context = &getContext();
 DetensorizeTypeConverter typeConverter;
@@ -551,8 +567,22 @@ struct LinalgDetensorize
 populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
 
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+  opsToDetensor.erase(op);
+  detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+  for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+  }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),
+   config)))
   signalPassFailure();
 
 RewritePatternSet canonPatterns(context);
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir 
b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
index 74931cb0830bc..5c29b04630cad 100644
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2: 
tensor) -> tenso
 }
 // CHECK-LABEL: func @detensor_op_sequence
 // CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor)
-// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]]
+// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]]
 // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]]
-// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]], 
%[[arg2_val]]
-// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]], 
%[[detensored_res]]
+// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]]
+// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]], 
%[[arg2_val]]
+// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val_1]], 
%[[detensored_res]]
 // CHECK: %[[detensored_res3:.*]] = arith.divf %[[detensored_res]], 
%[[detensored_res2]]
 // CHECK: %[[new_tensor_res:.*]] = tensor.from_elements 
%[[detensored_res3]]
 // CHECK: return %[[new_tensor_res]]

``




https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)

2025-08-10 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer created 
https://github.com/llvm/llvm-project/pull/152912

The pass used to access erased operations and block arguments in the type 
converter. That is no longer supported in the new conversion driver.


>From c2e90f3a39148223619497eeff16ed810e3cab95 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sun, 10 Aug 2025 11:41:51 +
Subject: [PATCH] [mlir][linalg] Migrate Detensorize pass to new dialect
 conversion driver

---
 .../Dialect/Linalg/Transforms/Detensorize.cpp | 34 +--
 mlir/test/Dialect/Linalg/detensorize_0d.mlir  |  7 ++--
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp 
b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
index 830905495e759..221f95a8d8f33 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -458,6 +458,22 @@ struct LinalgDetensorize
 }
   };
 
+  /// A listener that forwards notifyBlockErased and notifyOperationErased to
+  /// the given callbacks.
+  struct CallbackListener : public RewriterBase::Listener {
+CallbackListener(std::function onOperationErased,
+ std::function onBlockErased)
+: onOperationErased(onOperationErased), onBlockErased(onBlockErased) {}
+
+void notifyBlockErased(Block *block) override { onBlockErased(block); }
+void notifyOperationErased(Operation *op) override {
+  onOperationErased(op);
+}
+
+std::function onOperationErased;
+std::function onBlockErased;
+  };
+
   void runOnOperation() override {
 MLIRContext *context = &getContext();
 DetensorizeTypeConverter typeConverter;
@@ -551,8 +567,22 @@ struct LinalgDetensorize
 populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
 
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+  opsToDetensor.erase(op);
+  detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+  for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+  }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),
+   config)))
   signalPassFailure();
 
 RewritePatternSet canonPatterns(context);
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir 
b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
index 74931cb0830bc..5c29b04630cad 100644
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2: 
tensor) -> tenso
 }
 // CHECK-LABEL: func @detensor_op_sequence
 // CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor)
-// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]]
+// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]]
 // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]]
-// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]], 
%[[arg2_val]]
-// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]], 
%[[detensored_res]]
+// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]]
+// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]], 
%[[arg2_val]]
+// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val_1]], 
%[[detensored_res]]
 // CHECK: %[[detensored_res3:.*]] = arith.divf %[[detensored_res]], 
%[[detensored_res2]]
 // CHECK: %[[new_tensor_res:.*]] = tensor.from_elements 
%[[detensored_res3]]
 // CHECK: return %[[new_tensor_res]]

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/152921

Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168

Requested by: @dtcxzyw

>From 7ef6f5bdc487cd277fcfa4ac3b4f812f657bbb66 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Sun, 10 Aug 2025 22:55:04 +0800
Subject: [PATCH] [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913)

Closes https://github.com/llvm/llvm-project/issues/152824.

(cherry picked from commit d8b1b46cd39c91830bcf49ed91d80f38f78c2168)
---
 .../IR/GenericFloatingPointPredicateUtils.h   | 24 +++--
 .../InstSimplify/floating-point-arithmetic.ll | 21 +--
 llvm/unittests/Analysis/ValueTrackingTest.cpp | 36 +++
 3 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h 
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template  class 
GenericFloatingPointPredicateUtils {
   if (Mode.Input != DenormalMode::IEEE)
 return {Invalid, fcAllFlags, fcAllFlags};
 
+  auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+  Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+  };
+
   switch (Pred) {
   case FCmpInst::FCMP_OEQ: // Match x == 0.0
 return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template  class 
GenericFloatingPointPredicateUtils {
   case FCmpInst::FCMP_UNO:
 return exactClass(Src, fcNan);
   case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
   case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | 
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
   case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
   case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
   case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
   case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | 
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
   case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
   case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
   default:
 llvm_unreachable("all compare types are handled");
   }
-
-  return {Invalid, fcAllFlags, fcAllFlags};
 }
 
 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
 
 define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, 0.0
   ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float 
nofpclass(inf nan) %f) {
 
 define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, -0.0
   ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double 
%unknown) {
   %cmp = fcmp nnan oge float %op, 0.0
   ret i1 %cmp
 }
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+  %abs1 = call half @llvm.fabs.f16(half %x)
+  %cmp = fcmp ogt half %a

[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168

Requested by: @dtcxzyw

---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff


3 Files Affected:

- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10) 
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
(+19-2) 
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36) 


``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h 
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template  class 
GenericFloatingPointPredicateUtils {
   if (Mode.Input != DenormalMode::IEEE)
 return {Invalid, fcAllFlags, fcAllFlags};
 
+  auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+  Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+  };
+
   switch (Pred) {
   case FCmpInst::FCMP_OEQ: // Match x == 0.0
 return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template  class 
GenericFloatingPointPredicateUtils {
   case FCmpInst::FCMP_UNO:
 return exactClass(Src, fcNan);
   case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
   case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | 
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
   case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
   case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
   case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
   case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | 
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
   case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
   case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
   default:
 llvm_unreachable("all compare types are handled");
   }
-
-  return {Invalid, fcAllFlags, fcAllFlags};
 }
 
 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
 
 define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, 0.0
   ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float 
nofpclass(inf nan) %f) {
 
 define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, -0.0
   ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double 
%unknown) {
   %cmp = fcmp nnan oge float %op, 0.0
   ret i1 %cmp
 }
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+  %abs1 = call half @llvm.fabs.f16(half %x)
+  %cmp = fcmp ogt half %abs1, 0xH
+  %sel = select i1 %cmp, half %x, half 0xH
+  %abs2 = call half @llvm.fabs.f16(half %sel)
+  ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp 
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a

[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: None (llvmbot)


Changes

Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168

Requested by: @dtcxzyw

---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff


3 Files Affected:

- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10) 
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
(+19-2) 
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36) 


``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h 
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template  class 
GenericFloatingPointPredicateUtils {
   if (Mode.Input != DenormalMode::IEEE)
 return {Invalid, fcAllFlags, fcAllFlags};
 
+  auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+  Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+  };
+
   switch (Pred) {
   case FCmpInst::FCMP_OEQ: // Match x == 0.0
 return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template  class 
GenericFloatingPointPredicateUtils {
   case FCmpInst::FCMP_UNO:
 return exactClass(Src, fcNan);
   case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
   case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | 
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
   case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
   case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
   case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
   case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | 
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
   case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
   case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
   default:
 llvm_unreachable("all compare types are handled");
   }
-
-  return {Invalid, fcAllFlags, fcAllFlags};
 }
 
 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
 
 define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, 0.0
   ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float 
nofpclass(inf nan) %f) {
 
 define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, -0.0
   ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double 
%unknown) {
   %cmp = fcmp nnan oge float %op, 0.0
   ret i1 %cmp
 }
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+  %abs1 = call half @llvm.fabs.f16(half %x)
+  %cmp = fcmp ogt half %abs1, 0xH
+  %sel = select i1 %cmp, half %x, half 0xH
+  %abs2 = call half @llvm.fabs.f16(half %sel)
+  ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp 
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a48

[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

llvmbot wrote:

@arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/152921
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/152921
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: None (llvmbot)


Changes

Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168

Requested by: @dtcxzyw

---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff


3 Files Affected:

- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10) 
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
(+19-2) 
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36) 


``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h 
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template  class 
GenericFloatingPointPredicateUtils {
   if (Mode.Input != DenormalMode::IEEE)
 return {Invalid, fcAllFlags, fcAllFlags};
 
+  auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+  Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+  };
+
   switch (Pred) {
   case FCmpInst::FCMP_OEQ: // Match x == 0.0
 return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template  class 
GenericFloatingPointPredicateUtils {
   case FCmpInst::FCMP_UNO:
 return exactClass(Src, fcNan);
   case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
   case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | 
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
   case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
   case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
   case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
   case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | 
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
   case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
   case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
   default:
 llvm_unreachable("all compare types are handled");
   }
-
-  return {Invalid, fcAllFlags, fcAllFlags};
 }
 
 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll 
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
 
 define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, 0.0
   ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float 
nofpclass(inf nan) %f) {
 
 define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
 ; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
 ;
   %r = fmul nsz float %f, -0.0
   ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double 
%unknown) {
   %cmp = fcmp nnan oge float %op, 0.0
   ret i1 %cmp
 }
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+  %abs1 = call half @llvm.fabs.f16(half %x)
+  %cmp = fcmp ogt half %abs1, 0xH
+  %sel = select i1 %cmp, half %x, half 0xH
+  %abs2 = call half @llvm.fabs.f16(half %sel)
+  ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp 
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a48105a1d

[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)

2025-08-10 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/152921
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits