[llvm-branch-commits] [libcxx] release/21.x: [libc++] Add checks for misused hardening macros (#150669) (PR #151582)
https://github.com/philnik777 approved this pull request. https://github.com/llvm/llvm-project/pull/151582 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/21.x: [libc++] Add checks for misused hardening macros (#150669) (PR #151582)
philnik777 wrote: > Who can review? > > btw what's up with the buildkite ci never ending? I don't know; I've pinged the owner. I don't think it should be blocking though. https://github.com/llvm/llvm-project/pull/151582 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
https://github.com/rengolin commented: Looks good, some nit, but I'll let others review and approve. https://github.com/llvm/llvm-project/pull/152912 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2: tensor) -> tenso } // CHECK-LABEL: func @detensor_op_sequence // CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor) -// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]] +// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]] // CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]] -// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]], %[[arg2_val]] -// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]], %[[detensored_res]] +// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]] +// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]], %[[arg2_val]] rengolin wrote: CHECK-DAG can come in any order, but the op here specifies them in a particular order, and the two `arg1` vals have the same pattern. This may randomly fail. https://github.com/llvm/llvm-project/pull/152912 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
@@ -551,8 +567,22 @@ struct LinalgDetensorize
populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+ opsToDetensor.erase(op);
+ detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+ for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+ }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),
rengolin wrote:
Since the callback doesn't return anything, it won't change the success/failure
result, so this should be fine.
https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
https://github.com/rengolin edited https://github.com/llvm/llvm-project/pull/152912 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 90c9271 - Revert "[PatternMatch] Add `m_[Shift]OrSelf` matchers. (#152924)"
Author: Yingwei Zheng
Date: 2025-08-11T11:38:27+08:00
New Revision: 90c9271c1304f5cf1f57f397a6cb9fce9649ed96
URL:
https://github.com/llvm/llvm-project/commit/90c9271c1304f5cf1f57f397a6cb9fce9649ed96
DIFF:
https://github.com/llvm/llvm-project/commit/90c9271c1304f5cf1f57f397a6cb9fce9649ed96.diff
LOG: Revert "[PatternMatch] Add `m_[Shift]OrSelf` matchers. (#152924)"
This reverts commit 1c499351d682aa46c2f087a6f757d22b01d18aa7.
Added:
Modified:
llvm/include/llvm/IR/PatternMatch.h
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/unittests/IR/PatternMatch.cpp
Removed:
diff --git a/llvm/include/llvm/IR/PatternMatch.h
b/llvm/include/llvm/IR/PatternMatch.h
index 76482ad47c771..27c5d5ca08cd6 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1327,45 +1327,6 @@ inline BinaryOp_match
m_AShr(const LHS &L,
return BinaryOp_match(L, R);
}
-template struct ShiftLike_match {
- LHS_t L;
- uint64_t &R;
-
- ShiftLike_match(const LHS_t &LHS, uint64_t &RHS) : L(LHS), R(RHS) {}
-
- template bool match(OpTy *V) const {
-if (auto *Op = dyn_cast(V)) {
- if (Op->getOpcode() == Opcode)
-return m_ConstantInt(R).match(Op->getOperand(1)) &&
- L.match(Op->getOperand(0));
-}
-// Interpreted as shiftop V, 0
-R = 0;
-return L.match(V);
- }
-};
-
-/// Matches shl L, ConstShAmt or L itself.
-template
-inline ShiftLike_match m_ShlOrSelf(const LHS &L,
- uint64_t &R) {
- return ShiftLike_match(L, R);
-}
-
-/// Matches lshr L, ConstShAmt or L itself.
-template
-inline ShiftLike_match m_LShrOrSelf(const LHS &L,
-uint64_t &R) {
- return ShiftLike_match(L, R);
-}
-
-/// Matches ashr L, ConstShAmt or L itself.
-template
-inline ShiftLike_match m_AShrOrSelf(const LHS &L,
-uint64_t &R) {
- return ShiftLike_match(L, R);
-}
-
template
struct OverflowingBinaryOp_match {
diff --git
a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e3c31f96f86d9..40a7f8043034e 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -617,7 +617,7 @@ struct LoadOps {
LoadInst *RootInsert = nullptr;
bool FoundRoot = false;
uint64_t LoadSize = 0;
- uint64_t Shift = 0;
+ const APInt *Shift = nullptr;
Type *ZextType;
AAMDNodes AATags;
};
@@ -627,15 +627,17 @@ struct LoadOps {
// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
AliasAnalysis &AA) {
- uint64_t ShAmt2;
+ const APInt *ShAmt2 = nullptr;
Value *X;
Instruction *L1, *L2;
// Go to the last node with loads.
- if (match(V, m_OneUse(m_c_Or(m_Value(X),
- m_OneUse(m_ShlOrSelf(m_OneUse(m_ZExt(m_OneUse(
-m_Instruction(L2,
-ShAmt2)) {
+ if (match(V, m_OneUse(m_c_Or(
+ m_Value(X),
+
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2,
+ m_APInt(ShAmt2)) ||
+ match(V, m_OneUse(m_Or(m_Value(X),
+ m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2
{
if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
// Avoid Partial chain merge.
return false;
@@ -644,10 +646,11 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps,
const DataLayout &DL,
// Check if the pattern has loads
LoadInst *LI1 = LOps.Root;
- uint64_t ShAmt1 = LOps.Shift;
+ const APInt *ShAmt1 = LOps.Shift;
if (LOps.FoundRoot == false &&
- match(X, m_OneUse(m_ShlOrSelf(
- m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1, ShAmt1 {
+ (match(X, m_OneUse(m_ZExt(m_Instruction(L1 ||
+ match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1,
+ m_APInt(ShAmt1)) {
LI1 = dyn_cast(L1);
}
LoadInst *LI2 = dyn_cast(L2);
@@ -723,6 +726,13 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps,
const DataLayout &DL,
if (IsBigEndian)
std::swap(ShAmt1, ShAmt2);
+ // Find Shifts values.
+ uint64_t Shift1 = 0, Shift2 = 0;
+ if (ShAmt1)
+Shift1 = ShAmt1->getZExtValue();
+ if (ShAmt2)
+Shift2 = ShAmt2->getZExtValue();
+
// First load is always LI1. This is where we put the new load.
// Use the merged load size available from LI1 for forward loads.
[llvm-branch-commits] [llvm] 5bb34ba - Revert "[NFC][MC] Removed unused switch case in `emitCATTR` (#152907)"
Author: Abhishek Kaushik Date: 2025-08-10T22:19:26+05:30 New Revision: 5bb34babaeb25cd133f3c1af79a5c7bb069c49d9 URL: https://github.com/llvm/llvm-project/commit/5bb34babaeb25cd133f3c1af79a5c7bb069c49d9 DIFF: https://github.com/llvm/llvm-project/commit/5bb34babaeb25cd133f3c1af79a5c7bb069c49d9.diff LOG: Revert "[NFC][MC] Removed unused switch case in `emitCATTR` (#152907)" This reverts commit 9247b51a02b5cf92195f6cd6e82e392619ae5235. Added: Modified: llvm/lib/MC/MCAsmInfoGOFF.cpp Removed: diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp index d35020429e346..0a5d1927b1a22 100644 --- a/llvm/lib/MC/MCAsmInfoGOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp @@ -71,6 +71,8 @@ static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode, case GOFF::ESD_RMODE_64: OS << "64"; break; +case GOFF::ESD_RMODE_None: + break; } OS << ')'; } ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)
@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t
Bytes) {
memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
}
+struct DynCGroupMemTy {
+ void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
+Size = 0;
+Ptr = nullptr;
+IsFallback = false;
kevinsala wrote:
With the `clang::loader_uninitialized` attribute in the `DynCGroupMem`
variable, I can't use field initializers or a constructor.
https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)
https://github.com/kevinsala updated
https://github.com/llvm/llvm-project/pull/152831
>From fa3c7425ae9e5ffea83841f2be61b0f494b99038 Mon Sep 17 00:00:00 2001
From: Kevin Sala
Date: Fri, 8 Aug 2025 11:25:14 -0700
Subject: [PATCH 1/2] [OpenMP][Offload] Add offload runtime support for
dyn_groupprivate clause
---
offload/DeviceRTL/include/DeviceTypes.h | 4 +
offload/DeviceRTL/include/Interface.h | 2 +-
offload/DeviceRTL/include/State.h | 2 +-
offload/DeviceRTL/src/Kernel.cpp | 14 +-
offload/DeviceRTL/src/State.cpp | 48 +-
offload/include/Shared/APITypes.h | 6 +-
offload/include/Shared/Environment.h | 4 +-
offload/include/device.h | 3 +
offload/include/omptarget.h | 7 +-
offload/libomptarget/OpenMP/API.cpp | 14 ++
offload/libomptarget/device.cpp | 6 +
offload/libomptarget/exports | 1 +
.../amdgpu/dynamic_hsa/hsa_ext_amd.h | 1 +
offload/plugins-nextgen/amdgpu/src/rtl.cpp| 34 +++--
.../common/include/PluginInterface.h | 33 +++-
.../common/src/PluginInterface.cpp| 86 ---
.../plugins-nextgen/cuda/dynamic_cuda/cuda.h | 1 +
offload/plugins-nextgen/cuda/src/rtl.cpp | 37 +++--
offload/plugins-nextgen/host/src/rtl.cpp | 4 +-
.../offloading/dyn_groupprivate_strict.cpp| 141 ++
openmp/runtime/src/include/omp.h.var | 10 ++
openmp/runtime/src/kmp_csupport.cpp | 9 ++
openmp/runtime/src/kmp_stub.cpp | 16 ++
23 files changed, 418 insertions(+), 65 deletions(-)
create mode 100644 offload/test/offloading/dyn_groupprivate_strict.cpp
diff --git a/offload/DeviceRTL/include/DeviceTypes.h
b/offload/DeviceRTL/include/DeviceTypes.h
index 2e5d92380f040..a43b506d6879e 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
///}
+enum omp_access_t {
+ omp_access_cgroup = 0,
+};
+
#endif
diff --git a/offload/DeviceRTL/include/Interface.h
b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4..672afea206785 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
void __kmpc_target_deinit();
diff --git a/offload/DeviceRTL/include/State.h
b/offload/DeviceRTL/include/State.h
index db396dae6e445..17c3c6f2d3e42 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -116,7 +116,7 @@ extern Local ThreadStates;
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
/// Return the kernel and kernel launch environment associated with the current
/// kernel. The former is static and contains compile time information that
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..58e9a09105a76 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
};
static void
-inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
-KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment)
{
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment)
{
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
if (IsSPMD) {
-inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
-KernelLaunchEnvironment);
+initializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
+ KernelLaunchEnvironment);
synchronize::threadsAligned(atomic::relaxed);
} else {
-inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
-KernelLaunchEnv
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
// Map to keep track of created compares, The key is the pair of operands for
// the compare, to allow detecting and re-using redundant compares.
DenseMap, Value *> SeenCompares;
- for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+ Value *AliasLaneMask = nullptr;
+ for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
- continue;
+if (!VF.isScalar() && UseSafeEltsMask) {
+ Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+ Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);
fhahn wrote:
It doesn't look like there's much re-use going on here. Better to have a
separate function than complicate the logic here?
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -479,7 +479,11 @@ class LoopVectorizationPlanner {
/// Build VPlans for the specified \p UserVF and \p UserIC if they are
/// non-zero or all applicable candidate VFs otherwise. If vectorization and
/// interleaving should be avoided up-front, no plans are generated.
- void plan(ElementCount UserVF, unsigned UserIC);
+ /// DiffChecks is a list of pointer pairs that should be checked for
aliasing,
fhahn wrote:
Better to add the new recipes at the same place where the runtime checks are
attached to VPlan, rather than threading through the general planning stage:
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp#L9359C32-L9359C51
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
PartialReductionExtendKind OpBExtend, std::optional BinOp,
TTI::TargetCostKind CostKind) const;
+ /// \return true if a mask should be formed that disables lanes that could
+ /// alias between two pointers. The mask is created by the
+ /// loop_dependence_{war,raw}_mask intrinsics.
+ LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;
fhahn wrote:
Could we just check the cost of the new intrinsic using the existing hooks?
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Implement isLegalAddressingMode for lsx/lasx (PR #151917)
https://github.com/zhaoqi5 converted_to_draft https://github.com/llvm/llvm-project/pull/151917 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)
@@ -556,8 +563,45 @@ Error GenericKernelTy::launch(GenericDeviceTy
&GenericDevice, void **ArgPtrs,
llvm::SmallVector Args;
llvm::SmallVector Ptrs;
+ uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0],
+KernelArgs.ThreadLimit[1],
+KernelArgs.ThreadLimit[2]};
+ uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
+ KernelArgs.NumTeams[2]};
+ if (!isBareMode()) {
+NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
+NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
+NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
+ }
+
+ uint32_t MaxBlockMemSize = GenericDevice.getMaxBlockSharedMemSize();
+ uint32_t DynBlockMemSize = KernelArgs.DynCGroupMem;
+ uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
+ if (StaticBlockMemSize > MaxBlockMemSize)
+return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Static block memory size exceeds maximum");
arsenm wrote:
Error messages should start with lowercase
https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)
@@ -556,8 +563,45 @@ Error GenericKernelTy::launch(GenericDeviceTy
&GenericDevice, void **ArgPtrs,
llvm::SmallVector Args;
llvm::SmallVector Ptrs;
+ uint32_t NumThreads[3] = {KernelArgs.ThreadLimit[0],
+KernelArgs.ThreadLimit[1],
+KernelArgs.ThreadLimit[2]};
+ uint32_t NumBlocks[3] = {KernelArgs.NumTeams[0], KernelArgs.NumTeams[1],
+ KernelArgs.NumTeams[2]};
+ if (!isBareMode()) {
+NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
+NumBlocks[0] = getNumBlocks(GenericDevice, NumBlocks, KernelArgs.Tripcount,
+NumThreads[0], KernelArgs.ThreadLimit[0] > 0);
+ }
+
+ uint32_t MaxBlockMemSize = GenericDevice.getMaxBlockSharedMemSize();
+ uint32_t DynBlockMemSize = KernelArgs.DynCGroupMem;
+ uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
+ if (StaticBlockMemSize > MaxBlockMemSize)
+return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Static block memory size exceeds maximum");
+ else if (!KernelArgs.Flags.AllowDynCGroupMemFallback &&
+ TotalBlockMemSize > MaxBlockMemSize)
+return Plugin::error(
+ErrorCode::INVALID_ARGUMENT,
+"Static and dynamic block memory size exceeds maximum");
arsenm wrote:
Error messages should start with lowercase
https://github.com/llvm/llvm-project/pull/152831
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/152912 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
llvmbot wrote:
@llvm/pr-subscribers-mlir-linalg
Author: Matthias Springer (matthias-springer)
Changes
The pass used to access erased operations and block arguments in the type
converter. That is no longer supported in the new conversion driver.
---
Full diff: https://github.com/llvm/llvm-project/pull/152912.diff
2 Files Affected:
- (modified) mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp (+32-2)
- (modified) mlir/test/Dialect/Linalg/detensorize_0d.mlir (+4-3)
``diff
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
index 830905495e759..221f95a8d8f33 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -458,6 +458,22 @@ struct LinalgDetensorize
}
};
+ /// A listener that forwards notifyBlockErased and notifyOperationErased to
+ /// the given callbacks.
+ struct CallbackListener : public RewriterBase::Listener {
+CallbackListener(std::function onOperationErased,
+ std::function onBlockErased)
+: onOperationErased(onOperationErased), onBlockErased(onBlockErased) {}
+
+void notifyBlockErased(Block *block) override { onBlockErased(block); }
+void notifyOperationErased(Operation *op) override {
+ onOperationErased(op);
+}
+
+std::function onOperationErased;
+std::function onBlockErased;
+ };
+
void runOnOperation() override {
MLIRContext *context = &getContext();
DetensorizeTypeConverter typeConverter;
@@ -551,8 +567,22 @@ struct LinalgDetensorize
populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+ opsToDetensor.erase(op);
+ detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+ for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+ }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),
+ config)))
signalPassFailure();
RewritePatternSet canonPatterns(context);
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
index 74931cb0830bc..5c29b04630cad 100644
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2:
tensor) -> tenso
}
// CHECK-LABEL: func @detensor_op_sequence
// CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor)
-// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]]
+// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]]
// CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]]
-// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]],
%[[arg2_val]]
-// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]],
%[[detensored_res]]
+// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]]
+// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]],
%[[arg2_val]]
+// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val_1]],
%[[detensored_res]]
// CHECK: %[[detensored_res3:.*]] = arith.divf %[[detensored_res]],
%[[detensored_res2]]
// CHECK: %[[new_tensor_res:.*]] = tensor.from_elements
%[[detensored_res3]]
// CHECK: return %[[new_tensor_res]]
``
https://github.com/llvm/llvm-project/pull/152912
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Migrate Detensorize pass to new dialect conversion driver (PR #152912)
https://github.com/matthias-springer created
https://github.com/llvm/llvm-project/pull/152912
The pass used to access erased operations and block arguments in the type
converter. That is no longer supported in the new conversion driver.
>From c2e90f3a39148223619497eeff16ed810e3cab95 Mon Sep 17 00:00:00 2001
From: Matthias Springer
Date: Sun, 10 Aug 2025 11:41:51 +
Subject: [PATCH] [mlir][linalg] Migrate Detensorize pass to new dialect
conversion driver
---
.../Dialect/Linalg/Transforms/Detensorize.cpp | 34 +--
mlir/test/Dialect/Linalg/detensorize_0d.mlir | 7 ++--
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
index 830905495e759..221f95a8d8f33 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp
@@ -458,6 +458,22 @@ struct LinalgDetensorize
}
};
+ /// A listener that forwards notifyBlockErased and notifyOperationErased to
+ /// the given callbacks.
+ struct CallbackListener : public RewriterBase::Listener {
+CallbackListener(std::function onOperationErased,
+ std::function onBlockErased)
+: onOperationErased(onOperationErased), onBlockErased(onBlockErased) {}
+
+void notifyBlockErased(Block *block) override { onBlockErased(block); }
+void notifyOperationErased(Operation *op) override {
+ onOperationErased(op);
+}
+
+std::function onOperationErased;
+std::function onBlockErased;
+ };
+
void runOnOperation() override {
MLIRContext *context = &getContext();
DetensorizeTypeConverter typeConverter;
@@ -551,8 +567,22 @@ struct LinalgDetensorize
populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter,
shouldConvertBranchOperand);
-if (failed(
-applyFullConversion(getOperation(), target, std::move(patterns
+ConversionConfig config;
+auto onOperationErased = [&](Operation *op) {
+ opsToDetensor.erase(op);
+ detensorableBranchOps.erase(op);
+};
+auto onBlockErased = [&](Block *block) {
+ for (BlockArgument arg : block->getArguments()) {
+blockArgsToDetensor.erase(arg);
+ }
+};
+CallbackListener listener(onOperationErased, onBlockErased);
+
+config.listener = &listener;
+config.allowPatternRollback = false;
+if (failed(applyFullConversion(getOperation(), target, std::move(patterns),
+ config)))
signalPassFailure();
RewritePatternSet canonPatterns(context);
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
index 74931cb0830bc..5c29b04630cad 100644
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -53,10 +53,11 @@ func.func @detensor_op_sequence(%arg1: tensor, %arg2:
tensor) -> tenso
}
// CHECK-LABEL: func @detensor_op_sequence
// CHECK-SAME:(%[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor)
-// CHECK-DAG: %[[arg1_val:.*]] = tensor.extract %[[arg1]]
+// CHECK-DAG: %[[arg1_val_1:.*]] = tensor.extract %[[arg1]]
// CHECK-DAG: %[[arg2_val:.*]] = tensor.extract %[[arg2]]
-// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val]],
%[[arg2_val]]
-// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val]],
%[[detensored_res]]
+// CHECK-DAG: %[[arg1_val_2:.*]] = tensor.extract %[[arg1]]
+// CHECK: %[[detensored_res:.*]] = arith.addf %[[arg1_val_2]],
%[[arg2_val]]
+// CHECK: %[[detensored_res2:.*]] = arith.mulf %[[arg1_val_1]],
%[[detensored_res]]
// CHECK: %[[detensored_res3:.*]] = arith.divf %[[detensored_res]],
%[[detensored_res2]]
// CHECK: %[[new_tensor_res:.*]] = tensor.from_elements
%[[detensored_res3]]
// CHECK: return %[[new_tensor_res]]
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/152921
Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168
Requested by: @dtcxzyw
>From 7ef6f5bdc487cd277fcfa4ac3b4f812f657bbb66 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng
Date: Sun, 10 Aug 2025 22:55:04 +0800
Subject: [PATCH] [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913)
Closes https://github.com/llvm/llvm-project/issues/152824.
(cherry picked from commit d8b1b46cd39c91830bcf49ed91d80f38f78c2168)
---
.../IR/GenericFloatingPointPredicateUtils.h | 24 +++--
.../InstSimplify/floating-point-arithmetic.ll | 21 +--
llvm/unittests/Analysis/ValueTrackingTest.cpp | 36 +++
3 files changed, 69 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template class
GenericFloatingPointPredicateUtils {
if (Mode.Input != DenormalMode::IEEE)
return {Invalid, fcAllFlags, fcAllFlags};
+ auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+ Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+ };
+
switch (Pred) {
case FCmpInst::FCMP_OEQ: // Match x == 0.0
return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template class
GenericFloatingPointPredicateUtils {
case FCmpInst::FCMP_UNO:
return exactClass(Src, fcNan);
case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf |
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf |
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
default:
llvm_unreachable("all compare types are handled");
}
-
- return {Invalid, fcAllFlags, fcAllFlags};
}
const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, 0.0
ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float
nofpclass(inf nan) %f) {
define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, -0.0
ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double
%unknown) {
%cmp = fcmp nnan oge float %op, 0.0
ret i1 %cmp
}
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+ %abs1 = call half @llvm.fabs.f16(half %x)
+ %cmp = fcmp ogt half %a
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
llvmbot wrote:
@llvm/pr-subscribers-llvm-transforms
Author: None (llvmbot)
Changes
Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168
Requested by: @dtcxzyw
---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10)
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
(+19-2)
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36)
``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template class
GenericFloatingPointPredicateUtils {
if (Mode.Input != DenormalMode::IEEE)
return {Invalid, fcAllFlags, fcAllFlags};
+ auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+ Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+ };
+
switch (Pred) {
case FCmpInst::FCMP_OEQ: // Match x == 0.0
return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template class
GenericFloatingPointPredicateUtils {
case FCmpInst::FCMP_UNO:
return exactClass(Src, fcNan);
case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf |
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf |
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
default:
llvm_unreachable("all compare types are handled");
}
-
- return {Invalid, fcAllFlags, fcAllFlags};
}
const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, 0.0
ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float
nofpclass(inf nan) %f) {
define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, -0.0
ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double
%unknown) {
%cmp = fcmp nnan oge float %op, 0.0
ret i1 %cmp
}
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+ %abs1 = call half @llvm.fabs.f16(half %x)
+ %cmp = fcmp ogt half %abs1, 0xH
+ %sel = select i1 %cmp, half %x, half 0xH
+ %abs2 = call half @llvm.fabs.f16(half %sel)
+ ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
llvmbot wrote:
@llvm/pr-subscribers-llvm-analysis
Author: None (llvmbot)
Changes
Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168
Requested by: @dtcxzyw
---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10)
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
(+19-2)
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36)
``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template class
GenericFloatingPointPredicateUtils {
if (Mode.Input != DenormalMode::IEEE)
return {Invalid, fcAllFlags, fcAllFlags};
+ auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+ Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+ };
+
switch (Pred) {
case FCmpInst::FCMP_OEQ: // Match x == 0.0
return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template class
GenericFloatingPointPredicateUtils {
case FCmpInst::FCMP_UNO:
return exactClass(Src, fcNan);
case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf |
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf |
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
default:
llvm_unreachable("all compare types are handled");
}
-
- return {Invalid, fcAllFlags, fcAllFlags};
}
const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, 0.0
ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float
nofpclass(inf nan) %f) {
define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, -0.0
ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double
%unknown) {
%cmp = fcmp nnan oge float %op, 0.0
ret i1 %cmp
}
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+ %abs1 = call half @llvm.fabs.f16(half %x)
+ %cmp = fcmp ogt half %abs1, 0xH
+ %sel = select i1 %cmp, half %x, half 0xH
+ %abs2 = call half @llvm.fabs.f16(half %sel)
+ ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a48
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
llvmbot wrote: @arsenm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/152921 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/152921 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
llvmbot wrote:
@llvm/pr-subscribers-llvm-ir
Author: None (llvmbot)
Changes
Backport d8b1b46cd39c91830bcf49ed91d80f38f78c2168
Requested by: @dtcxzyw
---
Full diff: https://github.com/llvm/llvm-project/pull/152921.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h (+14-10)
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
(+19-2)
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+36)
``diff
diff --git a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
index 8aac9d5b49dbb..448a6e913eb86 100644
--- a/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
+++ b/llvm/include/llvm/IR/GenericFloatingPointPredicateUtils.h
@@ -135,6 +135,12 @@ template class
GenericFloatingPointPredicateUtils {
if (Mode.Input != DenormalMode::IEEE)
return {Invalid, fcAllFlags, fcAllFlags};
+ auto ExactClass = [IsFabs, Src](FPClassTest Mask) {
+if (IsFabs)
+ Mask = llvm::inverse_fabs(Mask);
+return exactClass(Src, Mask);
+ };
+
switch (Pred) {
case FCmpInst::FCMP_OEQ: // Match x == 0.0
return exactClass(Src, fcZero);
@@ -151,26 +157,24 @@ template class
GenericFloatingPointPredicateUtils {
case FCmpInst::FCMP_UNO:
return exactClass(Src, fcNan);
case FCmpInst::FCMP_OGT: // x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf);
case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
-return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf |
fcNan);
+return ExactClass(fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
case FCmpInst::FCMP_OGE: // x >= 0
-return exactClass(Src, fcPositive | fcNegZero);
+return ExactClass(fcPositive | fcNegZero);
case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
-return exactClass(Src, fcPositive | fcNegZero | fcNan);
+return ExactClass(fcPositive | fcNegZero | fcNan);
case FCmpInst::FCMP_OLT: // x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf);
case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
-return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf |
fcNan);
+return ExactClass(fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
case FCmpInst::FCMP_OLE: // x <= 0
-return exactClass(Src, fcNegative | fcPosZero);
+return ExactClass(fcNegative | fcPosZero);
case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
-return exactClass(Src, fcNegative | fcPosZero | fcNan);
+return ExactClass(fcNegative | fcPosZero | fcNan);
default:
llvm_unreachable("all compare types are handled");
}
-
- return {Invalid, fcAllFlags, fcAllFlags};
}
const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index ab4448b460bfc..820fff433e9e0 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -213,7 +213,7 @@ define double @fmul_nnan_ninf_nneg_n0.0_commute(i127 %x) {
define float @fmul_ninf_nnan_mul_zero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_zero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, 0.0
ret float %r
@@ -221,7 +221,7 @@ define float @fmul_ninf_nnan_mul_zero_nsz(float
nofpclass(inf nan) %f) {
define float @fmul_ninf_nnan_mul_nzero_nsz(float nofpclass(inf nan) %f) {
; CHECK-LABEL: @fmul_ninf_nnan_mul_nzero_nsz(
-; CHECK-NEXT: ret float 0.00e+00
+; CHECK-NEXT:ret float 0.00e+00
;
%r = fmul nsz float %f, -0.0
ret float %r
@@ -1255,3 +1255,20 @@ define i1 @fptrunc_round_unknown_positive(double
%unknown) {
%cmp = fcmp nnan oge float %op, 0.0
ret i1 %cmp
}
+
+define half @fabs_select_fabs(half noundef %x) {
+; CHECK-LABEL: @fabs_select_fabs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:[[ABS1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:[[CMP:%.*]] = fcmp ogt half [[ABS1]], 0xH
+; CHECK-NEXT:[[SEL:%.*]] = select i1 [[CMP]], half [[X]], half 0xH
+; CHECK-NEXT:[[ABS2:%.*]] = call half @llvm.fabs.f16(half [[SEL]])
+; CHECK-NEXT:ret half [[ABS2]]
+;
+entry:
+ %abs1 = call half @llvm.fabs.f16(half %x)
+ %cmp = fcmp ogt half %abs1, 0xH
+ %sel = select i1 %cmp, half %x, half 0xH
+ %abs2 = call half @llvm.fabs.f16(half %sel)
+ ret half %abs2
+}
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp
b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7a48105a1d
[llvm-branch-commits] [llvm] release/21.x: [IR] Handle fabs LHS in `fcmpImpliesClass` (#152913) (PR #152921)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/152921 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
