[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits


@@ -14944,6 +14945,51 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
+   DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N1.getOpcode() == ISD::ADD) {
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+//y is not, and (add y, z) is used only once.
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+//z is not, and (add y, z) is used only once.
+// The goal is to move constant offsets to the outermost ptradd, to create
+// more opportunities to fold offsets into memory instructions.
+// Together with the generic combines in DAGCombiner.cpp, this also
+// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+//
+// This transform is here instead of in the general DAGCombiner as it can
+// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
+// AArch64's CPA.
+SDValue X = N0;
+SDValue Y = N1.getOperand(0);
+SDValue Z = N1.getOperand(1);
+bool N1OneUse = N1.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+if ((ZIsConstant != YIsConstant) && N1OneUse) {

ritter-x2a wrote:

Done.

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From ac6d5eb285b1f56b5c32133279224feb2b8bd8a9 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in SelectionDAGAddressAnalysis (PR #142778)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142778

>From af2d3ea3a17b2d7eec54fcf030ff89a1a0422e5a Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:48:02 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in
 SelectionDAGAddressAnalysis

This is used in a bunch of memory-related transforms.

For SWDEV-516125.
---
 .../SelectionDAGAddressAnalysis.cpp   |  6 ++--
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 ++-
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..da92aaa860b2b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -231,6 +231,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
   break;
 case ISD::ADD:
+case ISD::PTRADD:
   if (auto *C = dyn_cast(Base->getOperand(1))) {
 Offset += C->getSExtValue();
 Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
@@ -259,7 +260,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 break;
   }
 
-  if (Base->getOpcode() == ISD::ADD) {
+  if (Base->isAnyAdd()) {
 // TODO: The following code appears to be needless as it just
 //   bails on some Ptrs early, reducing the cases where we
 //   find equivalence. We should be able to remove this.
@@ -282,8 +283,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
 
 // Check if Index Offset pattern
-if (Index->getOpcode() != ISD::ADD ||
-!isa(Index->getOperand(1)))
+if (!Index->isAnyAdd() || !isa(Index->getOperand(1)))
   return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
 
 Offset += cast(Index->getOperand(1))->getSExtValue();
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d3242905ada64..2e76033a480f4 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -130,26 +130,14 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
 ; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
 ; SelectionDAGAddressAnalysis.
 define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
-; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_PTRADD:   ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_LEGACY:   ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+; GFX942-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942:   ; %bb.0: ; %entry
+; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:s_setpc_b64 s[30:31]
 entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][C++03] Remove XFAILs from the non-frozen libc++-specific tests (PR #144101)

2025-06-13 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 created 
https://github.com/llvm/llvm-project/pull/144101

The tests in `libcxx/test/libcxx` aren't run against the frozen headers 
anymore, so we can remove any XFAILs in them.

This is part of https://discourse.llvm.org/t/rfc-freezing-c-03-headers-in-libc.


>From e080572b8168260ecb4c8b2be39111d579056f74 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser 
Date: Fri, 13 Jun 2025 17:49:01 +0200
Subject: [PATCH] [libc++][C++03] Remove XFAILs from the non-frozen
 libc++-specific tests

---
 libcxx/test/libcxx/algorithms/half_positive.pass.cpp   | 2 --
 libcxx/test/libcxx/algorithms/vectorization.compile.pass.cpp   | 2 --
 .../assertions/customize_verbose_abort.link-time.pass.cpp  | 2 --
 libcxx/test/libcxx/assertions/default_verbose_abort.pass.cpp   | 2 --
 libcxx/test/libcxx/assertions/modes/none.pass.cpp  | 2 --
 libcxx/test/libcxx/assertions/single_expression.pass.cpp   | 2 --
 .../atomics.types.operations.req/atomic_fetch_add.verify.cpp   | 2 --
 .../atomic_fetch_add_explicit.verify.cpp   | 2 --
 .../atomics.types.operations.req/atomic_fetch_sub.verify.cpp   | 2 --
 .../atomic_fetch_sub_explicit.verify.cpp   | 2 --
 libcxx/test/libcxx/clang_modules_include.gen.py| 2 --
 libcxx/test/libcxx/clang_tidy.gen.py   | 3 ---
 .../containers/associative/tree_balance_after_insert.pass.cpp  | 2 --
 .../containers/associative/tree_key_value_traits.pass.cpp  | 2 --
 .../libcxx/containers/associative/tree_left_rotate.pass.cpp| 2 --
 libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp | 2 --
 .../libcxx/containers/associative/tree_right_rotate.pass.cpp   | 2 --
 .../containers/associative/unord.map/abi.compile.pass.cpp  | 2 --
 .../containers/associative/unord.set/abi.compile.pass.cpp  | 2 --
 .../test/libcxx/containers/container_traits.compile.pass.cpp   | 2 --
 libcxx/test/libcxx/containers/unord/key_value_traits.pass.cpp  | 2 --
 libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp | 2 --
 libcxx/test/libcxx/containers/unord/next_prime.pass.cpp| 2 --
 libcxx/test/libcxx/depr/depr.c.headers/extern_c.pass.cpp   | 2 --
 .../libcxx/experimental/fexperimental-library.compile.pass.cpp | 2 --
 libcxx/test/libcxx/header_inclusions.gen.py| 1 -
 .../string.streams/stringbuf/const_sso_buffer.pass.cpp | 2 --
 libcxx/test/libcxx/iterators/aliasing_iterator.pass.cpp| 2 --
 libcxx/test/libcxx/iterators/bounded_iter/arithmetic.pass.cpp  | 2 --
 libcxx/test/libcxx/iterators/bounded_iter/comparison.pass.cpp  | 2 --
 .../test/libcxx/iterators/bounded_iter/pointer_traits.pass.cpp | 2 --
 .../test/libcxx/iterators/bounded_iter/types.compile.pass.cpp  | 2 --
 .../iterators/contiguous_iterators.conv.compile.pass.cpp   | 2 --
 libcxx/test/libcxx/iterators/contiguous_iterators.verify.cpp   | 2 --
 .../iterator.primitives/iterator.operations/prev.verify.cpp| 2 --
 .../language.support/support.dynamic/libcpp_deallocate.sh.cpp  | 2 --
 libcxx/test/libcxx/memory/allocation_guard.pass.cpp| 2 --
 libcxx/test/libcxx/memory/swap_allocator.pass.cpp  | 2 --
 libcxx/test/libcxx/numerics/bit.ops.pass.cpp   | 2 --
 libcxx/test/libcxx/numerics/clamp_to_integral.pass.cpp | 2 --
 .../libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp | 2 --
 libcxx/test/libcxx/selftest/test_macros.pass.cpp   | 2 --
 .../strings/basic.string/string.capacity/max_size.pass.cpp | 2 --
 .../test/libcxx/strings/c.strings/constexpr_memmove.pass.cpp   | 2 --
 libcxx/test/libcxx/system_reserved_names.gen.py| 2 --
 libcxx/test/libcxx/transitive_includes.gen.py  | 2 --
 libcxx/test/libcxx/type_traits/datasizeof.compile.pass.cpp | 2 --
 libcxx/test/libcxx/type_traits/desugars_to.compile.pass.cpp| 2 --
 libcxx/test/libcxx/type_traits/is_constant_evaluated.pass.cpp  | 2 --
 libcxx/test/libcxx/type_traits/is_replaceable.compile.pass.cpp | 2 --
 .../type_traits/is_trivially_comparable.compile.pass.cpp   | 2 --
 .../type_traits/is_trivially_relocatable.compile.pass.cpp  | 2 --
 libcxx/test/libcxx/utilities/exception_guard.odr.sh.cpp| 2 --
 .../function.objects/refwrap/desugars_to.compile.pass.cpp  | 2 --
 libcxx/test/libcxx/utilities/is_pointer_in_range.pass.cpp  | 2 --
 libcxx/test/libcxx/utilities/is_valid_range.pass.cpp   | 2 --
 .../libcxx/utilities/meta/is_referenceable.compile.pass.cpp| 2 --
 libcxx/test/libcxx/utilities/meta/meta_base.pass.cpp   | 2 --
 libcxx/test/libcxx/utilities/no_destroy.pass.cpp   | 2 --
 libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp | 2 --
 .../utilities/utility/private_constructor_tag.compile.pass.cpp | 2 --
 61 files changed, 122 deletions(-)

diff --git a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp 
b/libcxx/test/libcxx/algorithms/half_positiv

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143880

>From 3f69917b67760c64fdafcb42b5783b8aaafb1406 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 45 +++
 1 file changed, 45 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) 
{
   store float 1.0, ptr addrspace(1) %p1
   ret void
 }
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+  ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %a_masked = and i64 %a, u0xf
+  %b_masked = and i64 %b, u0xf
+  %mul = mul i64 %a_masked, %b_masked
+  %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+  ret ptr %gep
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/142422

>From b950757c234900db941ed950ea3469b520d2e28a Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Mon, 2 Jun 2025 15:13:13 +
Subject: [PATCH 1/8] [MLIR] Fix incorrect slice contiguity inference in
 `vector::isContiguousSlice`

Previously, slices were sometimes marked as non-contiguous when
they were actually contiguous. This occurred when the vector type had
leading unit dimensions, e.g., `vector<1x1x...x1xd0xd1x...xdn-1xT>``.
In such cases, only the trailing n dimensions of the memref need to be
contiguous, not the entire vector rank.

This affects how `FlattenContiguousRowMajorTransfer{Read,Write}Pattern`
flattens `transfer_read` and `transfer_write`` ops. The pattern used
to collapse a number of dimensions equal the vector rank, which
may be is incorrect when leading dimensions are unit-sized.

This patch fixes the issue by collapsing only as many trailing memref
dimensions as are actually contiguous.
---
 .../mlir/Dialect/Vector/Utils/VectorUtils.h   |  54 -
 .../Transforms/VectorTransferOpTransforms.cpp |   8 +-
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp |  25 ++--
 .../Vector/vector-transfer-flatten.mlir   | 108 +-
 4 files changed, 120 insertions(+), 75 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h 
b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
index 6609b28d77b6c..ed06d7a029494 100644
--- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
+++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
@@ -49,35 +49,37 @@ FailureOr> 
isTranspose2DSlice(vector::TransposeOp op);
 
 /// Return true if `vectorType` is a contiguous slice of `memrefType`.
 ///
-/// Only the N = vectorType.getRank() trailing dims of `memrefType` are
-/// checked (the other dims are not relevant). Note that for `vectorType` to be
-/// a contiguous slice of `memrefType`, the trailing dims of the latter have
-/// to be contiguous - this is checked by looking at the corresponding strides.
+/// The leading unit dimensions of the vector type are ignored as they
+/// are not relevant to the result. Let N be the number of the vector
+/// dimensions after ignoring a leading sequence of unit ones.
 ///
-/// There might be some restriction on the leading dim of `VectorType`:
+/// For `vectorType` to be a contiguous slice of `memrefType`
+///   a) the N trailing dimensions of the latter must be contiguous, and
+///   b) the trailing N dimensions of `vectorType` and `memrefType`,
+///  except the first of them, must match.
 ///
-/// Case 1. If all the trailing dims of `vectorType` match the trailing dims
-/// of `memrefType` then the leading dim of `vectorType` can be
-/// arbitrary.
-///
-///Ex. 1.1 contiguous slice, perfect match
-///  vector<4x3x2xi32> from memref<5x4x3x2xi32>
-///Ex. 1.2 contiguous slice, the leading dim does not match (2 != 4)
-///  vector<2x3x2xi32> from memref<5x4x3x2xi32>
-///
-/// Case 2. If an "internal" dim of `vectorType` does not match the
-/// corresponding trailing dim in `memrefType` then the remaining
-/// leading dims of `vectorType` have to be 1 (the first non-matching
-/// dim can be arbitrary).
+/// Examples:
 ///
-///Ex. 2.1 non-contiguous slice, 2 != 3 and the leading dim != <1>
-///  vector<2x2x2xi32> from memref<5x4x3x2xi32>
-///Ex. 2.2  contiguous slice, 2 != 3 and the leading dim == <1>
-///  vector<1x2x2xi32> from memref<5x4x3x2xi32>
-///Ex. 2.3. contiguous slice, 2 != 3 and the leading dims == <1x1>
-///  vector<1x1x2x2xi32> from memref<5x4x3x2xi32>
-///Ex. 2.4. non-contiguous slice, 2 != 3 and the leading dims != <1x1>
-/// vector<2x1x2x2xi32> from memref<5x4x3x2xi32>)
+///   Ex.1 contiguous slice, perfect match
+/// vector<4x3x2xi32> from memref<5x4x3x2xi32>
+///   Ex.2 contiguous slice, the leading dim does not match (2 != 4)
+/// vector<2x3x2xi32> from memref<5x4x3x2xi32>
+///   Ex.3 non-contiguous slice, 2 != 3
+/// vector<2x2x2xi32> from memref<5x4x3x2xi32>
+///   Ex.4 contiguous slice, leading unit dimension of the vector ignored,
+///2 != 3 (allowed)
+/// vector<1x2x2xi32> from memref<5x4x3x2xi32>
+///   Ex.5. contiguous slice, leasing two unit dims of the vector ignored,
+/// 2 != 3 (allowed)
+/// vector<1x1x2x2xi32> from memref<5x4x3x2xi32>
+///   Ex.6. non-contiguous slice, 2 != 3, no leading sequence of unit dims
+/// vector<2x1x2x2xi32> from memref<5x4x3x2xi32>)
+///   Ex.7 contiguous slice, memref needs to be contiguous only on the last
+///dimension
+/// vector<1x1x2xi32> from memref<2x2x2xi32, strided<[8, 4, 1]>>
+///   Ex.8 non-contiguous slice, memref needs to be contiguous one the last
+///two dimensions, and it isn't
+/// vector<1x2x2xi32> from memref<2x2x2xi32, strided<[8, 4, 1]>>
 bool isContiguo

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143673

>From 10494be4478143e69a6116653228170195c00dc2 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines

This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.

For SWDEV-516125.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 139 
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 151 ++
 3 files changed, 160 insertions(+), 134 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, 
EVT VT,
 return SDValue();
   int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
-  case ISD::ADD: break;
+  case ISD::ADD:
+  case ISD::PTRADD:
+break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f645b09db734b..bd123fc4ffd1b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
 #include 
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "si-lower"
 
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, 
const SDLoc &SL,
 // instead of a tree.
 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::ADD);
+  assert(N->isAnyAdd());
 
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 for (SDNode *User : LHS->users()) {
   // There is a use that does not feed into addition, so the multiply can't
   // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
-  if (User->getOpcode() != ISD::ADD)
+  if (!User->isAnyAdd())
 return SDValue();
 
   // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@ 
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
 
 SDValue Hi = getHiHalf64(LHS, DAG);
 SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+  Opcode = ISD::ADD;
 SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, 
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
 
 SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,42 +14954,116 @@ SDValue 
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-if (N1.hasOneUse()) {
-  bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-  bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-  if (ZIsConstant != YIsConstant) {
-// If both additions in the original were NUW, the new ones are as 
well.
-

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142739

>From 6ea714e83e4714d9fe025e5e9fee48b41f223cb8 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 03:32:32 -0400
Subject: [PATCH 1/6] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines

This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one
that is closely connected.

The generic DAG combine is based on a part of PR #105669 by @rgwott, which was
adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello
LLVM tree. I added some parts and removed several disjuncts from the
reassociation condition:
- `isNullConstant(X)`, since there are address spaces where 0 is a perfectly
  normal value that shouldn't be treated specially,
- `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since
  they cause regressions in AMDGPU.

For SWDEV-516125.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  92 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  49 +
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |   1 +
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 194 ++
 4 files changed, 201 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..505cb264ae948 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -419,6 +419,7 @@ namespace {
 SDValue visitADDLike(SDNode *N);
 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
 SDNode *LocReference);
+SDValue visitPTRADD(SDNode *N);
 SDValue visitSUB(SDNode *N);
 SDValue visitADDSAT(SDNode *N);
 SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool 
DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
 return false;
 
   auto *C2 = dyn_cast(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:return visitTokenFactor(N);
   case ISD::MERGE_VALUES:   return visitMERGE_VALUES(N);
   case ISD::ADD:return visitADD(N);
+  case ISD::PTRADD: return visitPTRADD(N);
   case ISD::SUB:return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:return visitADDSAT(N);
@@ -2628,6 +2630,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd undef, y) -> undef
+  if (N0.isUndef())
+return N0;
+
+  // fold (ptradd x, undef) -> undef
+  if (N1.isUndef())
+return DAG.getUNDEF(PtrVT);
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0))
+return N1;
+
+  if (N0.getOpcode() == ISD::PTRADD &&
+  !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) 
{
+SDValue X = N0.getOperand(0);
+SDValue Y = N0.getOperand(1);
+SDValue Z = N1;
+bool N0OneUse = N0.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+//   * y is a constant and (ptradd x, y) has one use; or
+//   * y and z are both constants.
+if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N0->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;
+  SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+  AddToWorklist(Add.getNode());
+  return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+}
+
+// TODO: There is another possible fold here that was proven useful.
+// It would be this:
+//
+// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+//   * (ptradd x, y) has one use; and
+//   * y is a constant; and
+//   * z is not a constant.
+//
+// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+// opportunity to select more complex instructions such as SUBPT and
+// MSUBPT. H

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142777

>From e8eccce3f9221dd52f15341873b03f220ef84739 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:30:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in
 SelectionDAGAddressAnalysis

Pre-committing test to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index b78dea1684545..d3242905ada64 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -126,3 +126,31 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
   store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
   ret void
 }
+
+; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
+; SelectionDAGAddressAnalysis.
+define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
+; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
+  ret void
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread Mircea Trofin via llvm-branch-commits


@@ -259,32 +306,40 @@ Error IR2VecVocabAnalysis::readVocabulary() {
 return createFileError(VocabFile, BufOrError.getError());
 
   auto Content = BufOrError.get()->getBuffer();
-  json::Path::Root Path("");
+
   Expected ParsedVocabValue = json::parse(Content);
   if (!ParsedVocabValue)
 return ParsedVocabValue.takeError();
 
-  bool Res = json::fromJSON(*ParsedVocabValue, Vocabulary, Path);
-  if (!Res)
-return createStringError(errc::illegal_byte_sequence,
- "Unable to parse the vocabulary");
+  ir2vec::Vocab OpcodeVocab, TypeVocab, ArgVocab;
+  unsigned OpcodeDim, TypeDim, ArgDim;

mtrofin wrote:

Initialize at declaration

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread Mircea Trofin via llvm-branch-commits


@@ -259,32 +306,40 @@ Error IR2VecVocabAnalysis::readVocabulary() {
 return createFileError(VocabFile, BufOrError.getError());
 
   auto Content = BufOrError.get()->getBuffer();
-  json::Path::Root Path("");
+
   Expected ParsedVocabValue = json::parse(Content);
   if (!ParsedVocabValue)
 return ParsedVocabValue.takeError();
 
-  bool Res = json::fromJSON(*ParsedVocabValue, Vocabulary, Path);
-  if (!Res)
-return createStringError(errc::illegal_byte_sequence,
- "Unable to parse the vocabulary");
+  ir2vec::Vocab OpcodeVocab, TypeVocab, ArgVocab;
+  unsigned OpcodeDim, TypeDim, ArgDim;
+  if (auto Err = parseVocabSection("Opcodes", *ParsedVocabValue, OpcodeVocab,

mtrofin wrote:

This changes the format, best to also update the doc.

Also, this means the sections must all be present, even if empty, correct? 
SGTM, just something worth spelling out.

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)

2025-06-13 Thread Alexis Engelke via llvm-branch-commits

aengelke wrote:

Reused an existing test case, this also shows the difference in the resulting 
block order. If preferred, I can also write a separate test case.

https://github.com/llvm/llvm-project/pull/142584
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits

https://github.com/momchil-velikov edited 
https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b81d5e0 - [InstCombine] Fold shuffles through all trivially vectorizable intrinsics (#141979)

2025-06-13 Thread via llvm-branch-commits

Author: Luke Lau
Date: 2025-06-13T18:25:07+01:00
New Revision: b81d5e06c7cba8c9f1f5380daed4b9ee139214ba

URL: 
https://github.com/llvm/llvm-project/commit/b81d5e06c7cba8c9f1f5380daed4b9ee139214ba
DIFF: 
https://github.com/llvm/llvm-project/commit/b81d5e06c7cba8c9f1f5380daed4b9ee139214ba.diff

LOG: [InstCombine] Fold shuffles through all trivially vectorizable intrinsics 
(#141979)

This addresses a TODO in foldShuffledIntrinsicOperands to use
isTriviallyVectorizable instead of a hardcoded list of intrinsics, which
in turn allows more intriniscs to be scalarized by VectorCombine.

>From what I can tell every intrinsic here should be speculatable so an
assertion was added.

Because this enables intrinsics like abs which have a scalar operand, we
need to also check isVectorIntrinsicWithScalarOpAtArg.

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/abs-1.ll
llvm/test/Transforms/InstCombine/fma.ll
llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
llvm/test/Transforms/InstCombine/powi.ll
llvm/test/Transforms/InstCombine/scmp.ll
llvm/test/Transforms/InstCombine/sqrt.ll
llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c169ab25b2106..8c8cc0859e4af 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1400,42 +1400,46 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst 
*II) {
 /// try to shuffle after the intrinsic.
 Instruction *
 InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
-  // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
-  //   etc. Use llvm::isTriviallyVectorizable() and related to determine
-  //   which intrinsics are safe to shuffle?
-  switch (II->getIntrinsicID()) {
-  case Intrinsic::smax:
-  case Intrinsic::smin:
-  case Intrinsic::umax:
-  case Intrinsic::umin:
-  case Intrinsic::fma:
-  case Intrinsic::fshl:
-  case Intrinsic::fshr:
-break;
-  default:
+  if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
+  !II->getCalledFunction()->isSpeculatable())
+return nullptr;
+
+  // fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
+  // avoid undoing it.
+  if (match(II, m_FAbs(m_Value(
 return nullptr;
-  }
 
   Value *X;
   Constant *C;
   ArrayRef Mask;
-  auto *NonConstArg = find_if_not(II->args(), IsaPred);
+  auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
+return isa(Arg.get()) ||
+   isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
+  Arg.getOperandNo(), nullptr);
+  });
   if (!NonConstArg ||
   !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask
 return nullptr;
 
-  // At least 1 operand must have 1 use because we are creating 2 instructions.
-  if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
+  // At least 1 operand must be a shuffle with 1 use because we are creating 2
+  // instructions.
+  if (none_of(II->args(), [](Value *V) {
+return isa(V) && V->hasOneUse();
+  }))
 return nullptr;
 
   // See if all arguments are shuffled with the same mask.
   SmallVector NewArgs;
   Type *SrcTy = X->getType();
-  for (Value *Arg : II->args()) {
-if (match(Arg, m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
-X->getType() == SrcTy)
+  for (Use &Arg : II->args()) {
+if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
+   Arg.getOperandNo(), nullptr))
+  NewArgs.push_back(Arg);
+else if (match(&Arg,
+   m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
+ X->getType() == SrcTy)
   NewArgs.push_back(X);
-else if (match(Arg, m_ImmConstant(C))) {
+else if (match(&Arg, m_ImmConstant(C))) {
   // If it's a constant, try find the constant that would be shuffled to C.
   if (Constant *ShuffledC =
   unshuffleConstant(Mask, C, cast(SrcTy)))
@@ -1448,8 +1452,12 @@ 
InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
 
   // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
   Instruction *FPI = isa(II) ? II : nullptr;
+  // Result type might be a 
diff erent vector width.
+  // TODO: Check that the result type isn't widened?
+  VectorType *ResTy =
+  VectorType::get(II->getType()->getScalarType(), cast(SrcTy));
   Value *NewIntrinsic =
-  Builder.CreateIntr

[llvm-branch-commits] [libc] fd43215 - [libc] Fix bugs found when testing with all headers (#144049)

2025-06-13 Thread via llvm-branch-commits

Author: William Huynh
Date: 2025-06-13T10:26:40-07:00
New Revision: fd432151a607a997c417f32cb70650fc7728629a

URL: 
https://github.com/llvm/llvm-project/commit/fd432151a607a997c417f32cb70650fc7728629a
DIFF: 
https://github.com/llvm/llvm-project/commit/fd432151a607a997c417f32cb70650fc7728629a.diff

LOG: [libc] Fix bugs found when testing with all headers (#144049)

Fixes a couple of bugs found when building. The PR to enable the headers
can be found here: #144114.

- math.yaml: float128 guard
- wchar.yaml: __restrict keyword order

Added: 


Modified: 
libc/include/math.yaml
libc/include/wchar.yaml
libc/test/src/stdio/printf_core/converter_test.cpp

Removed: 




diff  --git a/libc/include/math.yaml b/libc/include/math.yaml
index 466c08ade6fc4..11bead0745954 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -734,7 +734,7 @@ functions:
   - type: float128
   - type: float128
   - type: float128
-guards: LIBC_TYPES_HAS_FLOAT128
+guard: LIBC_TYPES_HAS_FLOAT128
   - name: ffmal
 standards:
   - stdc

diff  --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1af15a6c112b5..84db73d8f01ea 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -109,8 +109,8 @@ functions:
   - stdc
 return_type: wchar_t *
 arguments: 
-  - type: __restrict wchar_t *
-  - type: const __restrict wchar_t *
+  - type: wchar_t *__restrict 
+  - type: const wchar_t *__restrict
   - type: size_t
   - name: wmemmove
 standards:
@@ -125,16 +125,16 @@ functions:
   - stdc
 return_type: wchar_t *
 arguments:
-  - type: __restrict wchar_t *
-  - type: const __restrict wchar_t *
+  - type: wchar_t *__restrict
+  - type: const wchar_t *__restrict
   - type: size_t
   - name: wcscat
 standards:
   - stdc
 return_type: wchar_t *
 arguments: 
-  - type: __restrict wchar_t *
-  - type: const __restrict wchar_t *
+  - type: wchar_t *__restrict
+  - type: const wchar_t *__restrict
   - name: wcsstr
 standards:
   - stdc
@@ -147,13 +147,13 @@ functions:
   - stdc
 return_type: wchar_t *
 arguments:
-  - type: __restrict wchar_t *
-  - type: const __restrict wchar_t *
+  - type: wchar_t *__restrict
+  - type: const wchar_t *__restrict
   - type: size_t
   - name: wcscpy
 standards:
   - stdc
 return_type: wchar_t *
 arguments:
-  - type: __restrict wchar_t *
-  - type: const __restrict wchar_t *
+  - type: wchar_t *__restrict
+  - type: const wchar_t *__restrict

diff  --git a/libc/test/src/stdio/printf_core/converter_test.cpp 
b/libc/test/src/stdio/printf_core/converter_test.cpp
index 96a00ae598ec2..bf088937e4104 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -124,7 +124,7 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionSimple) 
{
 TEST_F(LlvmLibcPrintfConverterTest, StringConversionPrecisionHigh) {
   LIBC_NAMESPACE::printf_core::FormatSection high_precision_conv;
   high_precision_conv.has_conv = true;
-  high_precision_conv.raw_string = "%4s";
+  high_precision_conv.raw_string = "%.4s";
   high_precision_conv.conv_name = 's';
   high_precision_conv.precision = 4;
   high_precision_conv.conv_val_ptr = const_cast("456");



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] c609112 - Fix/reapply "[libc] Migrate stdio tests to ErrnoCheckingTest." (#143972)

2025-06-13 Thread via llvm-branch-commits

Author: Alexey Samsonov
Date: 2025-06-13T10:25:26-07:00
New Revision: c609112a5383c10272e3afceedd4d03f26437cf0

URL: 
https://github.com/llvm/llvm-project/commit/c609112a5383c10272e3afceedd4d03f26437cf0
DIFF: 
https://github.com/llvm/llvm-project/commit/c609112a5383c10272e3afceedd4d03f26437cf0.diff

LOG: Fix/reapply "[libc] Migrate stdio tests to ErrnoCheckingTest." (#143972)

This reverts commit a93e55e57ed00a55f822c64e3520c7c732b58480 and fixes
build and test failures:

* Proper include added to setvbuf_test.cpp
* fgetc/fgetc_unlocked/fgets tests are ported to ErrnoSetterMatcher and
are made more precise. This fixes inconsistencies between expectations
in regular and GPU builds - ErrnoSetterMatcher is configured to omit
errno matching on GPUs, as fgetc implementation on GPU doesn't set
errno, in contrast to Linux.

Added: 


Modified: 
libc/test/src/stdio/CMakeLists.txt
libc/test/src/stdio/fdopen_test.cpp
libc/test/src/stdio/fgetc_test.cpp
libc/test/src/stdio/fgetc_unlocked_test.cpp
libc/test/src/stdio/fgets_test.cpp
libc/test/src/stdio/fileop_test.cpp
libc/test/src/stdio/fopencookie_test.cpp
libc/test/src/stdio/remove_test.cpp
libc/test/src/stdio/rename_test.cpp
libc/test/src/stdio/setvbuf_test.cpp
libc/test/src/stdio/unlocked_fileop_test.cpp
libc/test/src/stdlib/StrtolTest.h
libc/test/src/stdlib/strtold_test.cpp

Removed: 




diff  --git a/libc/test/src/stdio/CMakeLists.txt 
b/libc/test/src/stdio/CMakeLists.txt
index ce2171f19597b..4aa8b95880018 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -20,6 +20,7 @@ add_libc_test(
 libc.src.stdio.fread
 libc.src.stdio.fseek
 libc.src.stdio.fwrite
+libc.test.UnitTest.ErrnoCheckingTest
 )
 
 add_libc_test(
@@ -68,6 +69,7 @@ add_libc_test(
 libc.src.stdio.fread
 libc.src.stdio.fwrite
 libc.src.stdio.setvbuf
+libc.test.UnitTest.ErrnoCheckingTest
 )
 
 add_libc_test(
@@ -88,6 +90,7 @@ add_libc_test(
 libc.src.stdio.fread_unlocked
 libc.src.stdio.funlockfile
 libc.src.stdio.fwrite_unlocked
+libc.test.UnitTest.ErrnoCheckingTest
 )
 
 add_libc_test(
@@ -109,6 +112,7 @@ add_libc_test(
 libc.src.stdio.fread
 libc.src.stdio.fseek
 libc.src.stdio.fwrite
+libc.test.UnitTest.ErrnoCheckingTest
   LINK_LIBRARIES
 LibcMemoryHelpers
 )
@@ -438,6 +442,7 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   libc.src.sys.stat.mkdirat
   libc.src.unistd.access
   libc.src.unistd.close
+  libc.test.UnitTest.ErrnoCheckingTest
   )
 
   add_libc_test(
@@ -452,6 +457,7 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   libc.src.stdio.rename
   libc.src.unistd.access
   libc.src.unistd.close
+  libc.test.UnitTest.ErrnoCheckingTest
   libc.test.UnitTest.ErrnoSetterMatcher
   )
 
@@ -468,6 +474,7 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   libc.src.stdio.fgets
   libc.src.stdio.fputs
   libc.src.unistd.close
+  libc.test.UnitTest.ErrnoCheckingTest
   libc.test.UnitTest.ErrnoSetterMatcher
   )
 endif()
@@ -488,6 +495,8 @@ add_libc_test(
 libc.src.stdio.fopen
 libc.src.stdio.fwrite
 libc.src.stdio.getc
+libc.test.UnitTest.ErrnoCheckingTest
+libc.test.UnitTest.ErrnoSetterMatcher
 )
 
 add_libc_test(
@@ -510,6 +519,8 @@ add_libc_test(
 libc.src.stdio.funlockfile
 libc.src.stdio.fwrite
 libc.src.stdio.getc_unlocked
+libc.test.UnitTest.ErrnoCheckingTest
+libc.test.UnitTest.ErrnoSetterMatcher
 )
 
 add_libc_test(
@@ -527,6 +538,8 @@ add_libc_test(
 libc.src.stdio.fgets
 libc.src.stdio.fopen
 libc.src.stdio.fwrite
+libc.test.UnitTest.ErrnoCheckingTest
+libc.test.UnitTest.ErrnoSetterMatcher
 )
 
 add_libc_test(

diff  --git a/libc/test/src/stdio/fdopen_test.cpp 
b/libc/test/src/stdio/fdopen_test.cpp
index 104fc478b100e..b53184c30be36 100644
--- a/libc/test/src/stdio/fdopen_test.cpp
+++ b/libc/test/src/stdio/fdopen_test.cpp
@@ -9,20 +9,21 @@
 #include "src/stdio/fdopen.h"
 
 #include "hdr/fcntl_macros.h"
-#include "src/__support/libc_errno.h"
 #include "src/fcntl/open.h"
 #include "src/stdio/fclose.h"
 #include "src/stdio/fgets.h"
 #include "src/stdio/fputs.h"
 #include "src/unistd/close.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
 #include  // For S_IRWXU
 
-TEST(LlvmLibcStdioFdopenTest, WriteAppendRead) {
+using LlvmLibcStdioFdopenTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcStdioFdopenTest, WriteAppendRead) {
   using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
-  libc_errno = 0;
   constexpr const char *TEST_FILE_NAME = "testdata/write_read_append.test";
   auto TEST_FILE = libc_make_test_file_path(TEST_FILE_NAME);
   int fd = LIBC_NAMESPACE::open(TEST_FILE, O_CREAT | O_TRUNC | O_RDWR, 
S_IRWXU);
@@ -52,8 +53,7 @@ TEST(LlvmLib

[llvm-branch-commits] [clang] 9a30822 - [CIR][NFC] Fix forrange.cpp test (#144123)

2025-06-13 Thread via llvm-branch-commits

Author: Andy Kaylor
Date: 2025-06-13T10:28:03-07:00
New Revision: 9a3082276d21873a37925d0c6ad89bd28d065cea

URL: 
https://github.com/llvm/llvm-project/commit/9a3082276d21873a37925d0c6ad89bd28d065cea
DIFF: 
https://github.com/llvm/llvm-project/commit/9a3082276d21873a37925d0c6ad89bd28d065cea.diff

LOG: [CIR][NFC] Fix forrange.cpp test (#144123)

A recent change has cause the begin and end iterators in the
forrange.cpp CIR codegen test to be marked as 'init' causing the test to
fail. This change fixes the checks in the test.

Added: 


Modified: 
clang/test/CIR/CodeGen/forrange.cpp

Removed: 




diff  --git a/clang/test/CIR/CodeGen/forrange.cpp 
b/clang/test/CIR/CodeGen/forrange.cpp
index 6b6ccc79e59dd..45e146e9091d0 100644
--- a/clang/test/CIR/CodeGen/forrange.cpp
+++ b/clang/test/CIR/CodeGen/forrange.cpp
@@ -115,8 +115,8 @@ void for_range3() {
 // CIR:%[[C_ADDR:.*]] = cir.alloca !rec_C3{{.*}} ["c"]
 // CIR:cir.scope {
 // CIR:  %[[RANGE_ADDR:.*]] = cir.alloca !cir.ptr{{.*}} 
["__range1", init, const]
-// CIR:  %[[BEGIN_ADDR:.*]] = cir.alloca !rec_Iterator, 
!cir.ptr{{.*}} ["__begin1"]
-// CIR:  %[[END_ADDR:.*]] = cir.alloca !rec_Iterator, 
!cir.ptr{{.*}} ["__end1"]
+// CIR:  %[[BEGIN_ADDR:.*]] = cir.alloca !rec_Iterator, 
!cir.ptr{{.*}} ["__begin1", init]
+// CIR:  %[[END_ADDR:.*]] = cir.alloca !rec_Iterator, 
!cir.ptr{{.*}} ["__end1", init]
 // CIR:  %[[E_ADDR:.*]] = cir.alloca !cir.ptr{{.*}} ["e", 
init, const]
 // CIR:  cir.store{{.*}} %[[C_ADDR]], %[[RANGE_ADDR]]
 // CIR:  cir.for : cond {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 62eea86 - [CIR] Update isSized with upstreamed types (#143960)

2025-06-13 Thread via llvm-branch-commits

Author: Amr Hesham
Date: 2025-06-13T19:29:21+02:00
New Revision: 62eea86424c4eacd38ad8a03f4bdae78687e3ade

URL: 
https://github.com/llvm/llvm-project/commit/62eea86424c4eacd38ad8a03f4bdae78687e3ade
DIFF: 
https://github.com/llvm/llvm-project/commit/62eea86424c4eacd38ad8a03f4bdae78687e3ade.diff

LOG: [CIR] Update isSized with upstreamed types (#143960)

Update `isSized` function with the upstreamed types

Added: 


Modified: 
clang/lib/CIR/CodeGen/CIRGenBuilder.h
clang/lib/CIR/CodeGen/CIRGenTypes.cpp
clang/test/CIR/CodeGen/array.cpp

Removed: 




diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 36c89809b4d90..a4bc69619d60c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -139,8 +139,9 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
 
   bool isSized(mlir::Type ty) {
-if (mlir::isa(ty))
+if (mlir::isa(
+ty))
   return true;
 
 if (const auto vt = mlir::dyn_cast(ty))

diff  --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp 
b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index eaba3dfd1105e..bab47924dd719 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -419,6 +419,15 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
   case Type::ConstantArray: {
 const ConstantArrayType *arrTy = cast(ty);
 mlir::Type elemTy = convertTypeForMem(arrTy->getElementType());
+
+// TODO(CIR): In LLVM, "lower arrays of undefined struct type to arrays of
+// i8 just to have a concrete type"
+if (!builder.isSized(elemTy)) {
+  cgm.errorNYI(SourceLocation(), "arrays of undefined struct type", type);
+  resultType = cgm.UInt32Ty;
+  break;
+}
+
 resultType = cir::ArrayType::get(elemTy, arrTy->getSize().getZExtValue());
 break;
   }
@@ -432,8 +441,8 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
   }
 
   case Type::Enum: {
-const EnumDecl *ED = cast(ty)->getDecl();
-if (auto integerType = ED->getIntegerType(); !integerType.isNull())
+const EnumDecl *ed = cast(ty)->getDecl();
+if (auto integerType = ed->getIntegerType(); !integerType.isNull())
   return convertType(integerType);
 // Return a placeholder 'i32' type.  This can be changed later when the
 // type is defined (see UpdateCompletedType), but is likely to be the

diff  --git a/clang/test/CIR/CodeGen/array.cpp 
b/clang/test/CIR/CodeGen/array.cpp
index 7b90c1682ec45..26e172a006451 100644
--- a/clang/test/CIR/CodeGen/array.cpp
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -473,3 +473,26 @@ void func10(int *a) {
 // OGCG:  %[[ELE:.*]] = getelementptr inbounds i32, ptr %[[TMP_1]], i64 5
 // OGCG:  %[[TMP_2:.*]] = load i32, ptr %[[ELE]], align 4
 // OGCG:  store i32 %[[TMP_2]], ptr %[[INIT]], align 4
+
+void func11() { int _Complex a[4]; }
+
+// CIR: %[[ARR:.*]] = cir.alloca !cir.array x 4>, 
!cir.ptr x 4>>, ["a"]
+
+// LLVM: %[[ARR:.*]] = alloca [4 x { i32, i32 }], i64 1, align 16
+
+// OGCG: %[[ARR:.*]] = alloca [4 x { i32, i32 }], align 16
+
+void func12() {
+  struct Point {
+int x;
+int y;
+  };
+
+  Point a[4];
+}
+
+// CIR: %[[ARR:.*]] = cir.alloca !cir.array, 
!cir.ptr>, ["a"]
+
+// LLVM: %[[ARR:.*]] = alloca [4 x %struct.Point], i64 1, align 16
+
+// OGCG: %[[ARR:.*]] = alloca [4 x %struct.Point], align 16



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 493c161 - [SPIRV] Fix ExecutionMode_fragment.ll test (#144116)

2025-06-13 Thread via llvm-branch-commits

Author: Steven Perron
Date: 2025-06-13T13:26:26-04:00
New Revision: 493c1612d6f8f7a40d0bf0ba28fb753be83fac1c

URL: 
https://github.com/llvm/llvm-project/commit/493c1612d6f8f7a40d0bf0ba28fb753be83fac1c
DIFF: 
https://github.com/llvm/llvm-project/commit/493c1612d6f8f7a40d0bf0ba28fb753be83fac1c.diff

LOG: [SPIRV] Fix ExecutionMode_fragment.ll test (#144116)

Fix test broken by https://github.com/llvm/llvm-project/pull/143412.

Added: 


Modified: 
llvm/test/CodeGen/SPIRV/ExecutionMode_Fragment.ll

Removed: 




diff  --git a/llvm/test/CodeGen/SPIRV/ExecutionMode_Fragment.ll 
b/llvm/test/CodeGen/SPIRV/ExecutionMode_Fragment.ll
index 4fa764fe192d3..aab0ae05753fa 100644
--- a/llvm/test/CodeGen/SPIRV/ExecutionMode_Fragment.ll
+++ b/llvm/test/CodeGen/SPIRV/ExecutionMode_Fragment.ll
@@ -4,17 +4,16 @@
 ; CHECK-DAG: OpEntryPoint Fragment %[[#entry:]] "main" {{.*}}
 ; CHECK-DAG: OpExecutionMode %[[#entry]] OriginUpperLeft
 
+@.str.b0 = private unnamed_addr constant [3 x i8] c"B0\00", align 1
 
 define void @main() #0 {
 entry:
-  %0 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) 
@llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 
1, i32 1, i32 0, i1 false)
+  %0 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) 
@llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 
1, i32 1, i32 0, i1 false, ptr nonnull @.str.b0)
   %1 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) 
@llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer",
 [0 x i32], 12, 1) %0, i32 0)
   store i32 1, ptr addrspace(11) %1, align 4
 
   ret void
 }
 
-declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) 
@llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32, i32, 
i32, i32, i1) #1
-
 attributes #0 = { "hlsl.shader"="pixel" }
 attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn 
memory(none) }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits


@@ -104,7 +106,10 @@ MODULE_PASS("lower-ifunc", LowerIFuncPass())
 MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass())
 MODULE_PASS("fatlto-cleanup", FatLtoCleanup())
-MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? 
PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default))
+MODULE_PASS("pgo-force-function-attrs",
+PGOForceFunctionAttrsPass(PGOOpt

svkeerthy wrote:

Yeah, will do. Missed the unrelated formatting changes.

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits


@@ -259,32 +306,40 @@ Error IR2VecVocabAnalysis::readVocabulary() {
 return createFileError(VocabFile, BufOrError.getError());
 
   auto Content = BufOrError.get()->getBuffer();
-  json::Path::Root Path("");
+
   Expected ParsedVocabValue = json::parse(Content);
   if (!ParsedVocabValue)
 return ParsedVocabValue.takeError();
 
-  bool Res = json::fromJSON(*ParsedVocabValue, Vocabulary, Path);
-  if (!Res)
-return createStringError(errc::illegal_byte_sequence,
- "Unable to parse the vocabulary");
+  ir2vec::Vocab OpcodeVocab, TypeVocab, ArgVocab;
+  unsigned OpcodeDim, TypeDim, ArgDim;
+  if (auto Err = parseVocabSection("Opcodes", *ParsedVocabValue, OpcodeVocab,

svkeerthy wrote:

Correct. Will put it in the doc.

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread Mircea Trofin via llvm-branch-commits


@@ -234,6 +237,8 @@ class IR2VecVocabResult {
 class IR2VecVocabAnalysis : public AnalysisInfoMixin {
   ir2vec::Vocab Vocabulary;
   Error readVocabulary();
+  Error parseVocabSection(const char *Key, const json::Value ParsedVocabValue,

mtrofin wrote:

s/const char*/StringRef

s/const json::Value/const json::Value&

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Scale embeddings once in vocab analysis instead of repetitive scaling (PR #143986)

2025-06-13 Thread Mircea Trofin via llvm-branch-commits


@@ -104,7 +106,10 @@ MODULE_PASS("lower-ifunc", LowerIFuncPass())
 MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass())
 MODULE_PASS("fatlto-cleanup", FatLtoCleanup())
-MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? 
PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default))
+MODULE_PASS("pgo-force-function-attrs",
+PGOForceFunctionAttrsPass(PGOOpt

mtrofin wrote:

can you make the unrelated stylistic changes to this file in a separate patch?

https://github.com/llvm/llvm-project/pull/143986
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add symbol table scopes for `teams` and `parallel` (PR #144015)

2025-06-13 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

LGTM. Thanks for the fix

https://github.com/llvm/llvm-project/pull/144015
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits

https://github.com/momchil-velikov edited 
https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits

momchil-velikov wrote:

Commit message updated.

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)

2025-06-13 Thread Alexis Engelke via llvm-branch-commits

https://github.com/aengelke updated 
https://github.com/llvm/llvm-project/pull/142584

>From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001
From: Alexis Engelke 
Date: Wed, 4 Jun 2025 09:21:02 +
Subject: [PATCH 1/2] Move one check to beginning of function

Created using spr 1.3.5-bogner
---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp 
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e96f3f8193b09..2dbabfe345d5e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   if (SuccChain.UnscheduledPredecessors == 0)
 return false;
 
+  // Compile-time optimization: runtime is quadratic in the number of
+  // predecessors. For such uncommon cases, exit early.
+  if (Succ->pred_size() > PredecessorLimit)
+return false;
+
   // There are two basic scenarios here:
   // -
   // Case 1: triangular shape CFG (if-then):
@@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
   bool BadCFGConflict = false;
 
-  // Compile-time optimization: runtime is quadratic in the number of
-  // predecessors. For such uncommon cases, exit early.
-  if (Succ->pred_size() > PredecessorLimit)
-return false;
-
   for (MachineBasicBlock *Pred : Succ->predecessors()) {
 BlockChain *PredChain = BlockToChain[Pred];
 if (Pred == Succ || PredChain == &SuccChain ||

>From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001
From: Alexis Engelke 
Date: Fri, 13 Jun 2025 15:43:00 +
Subject: [PATCH 2/2] Test new command line flag

Created using spr 1.3.5-bogner
---
 llvm/test/CodeGen/RISCV/branch.ll | 49 +++
 1 file changed, 49 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/branch.ll 
b/llvm/test/CodeGen/RISCV/branch.ll
index 578080cd3a240..ed86ca8ca4dd1 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs 
-block-placement-predecessor-limit=10 < %s \
+; RUN:   | FileCheck -check-prefix=RV32I-MBPLIMIT %s
 
 define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
 ; RV32I-LABEL: foo:
@@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
 ; RV32I-NEXT:lw zero, 0(a1)
 ; RV32I-NEXT:  .LBB0_14: # %end
 ; RV32I-NEXT:ret
+;
+; RV32I-MBPLIMIT-LABEL: foo:
+; RV32I-MBPLIMIT:   # %bb.0:
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2
+; RV32I-MBPLIMIT-NEXT:  .LBB0_1: # %end
+; RV32I-MBPLIMIT-NEXT:ret
+; RV32I-MBPLIMIT-NEXT:  .LBB0_2: # %test2
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.3: # %test3
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.4: # %test4
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.5: # %test5
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.6: # %test6
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.7: # %test7
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.8: # %test8
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.9: # %test9
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.10: # %test10
+; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.11: # %test11
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1
+; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.12: # %test12
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.13: # %test13
+; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.14: # %test14
+; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:ret
   %val1 = load volatile i32, ptr %b
   %tst1 = icmp eq i32 %val1, %a
   br i1 %tst1, label %end, label %test2

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/c

[llvm-branch-commits] [llvm] [MLGO][IR2Vec] Integrating IR2Vec with MLInliner (PR #143479)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143479

>From a2bec77ad03e20cd76b6870149863049a96c4f9e Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Tue, 10 Jun 2025 05:40:38 +
Subject: [PATCH] [MLIniner][IR2Vec] Integrating IR2Vec with MLInliner

---
 .../Analysis/FunctionPropertiesAnalysis.h |  26 +++-
 llvm/include/llvm/Analysis/InlineAdvisor.h|   4 +
 .../llvm/Analysis/InlineModelFeatureMaps.h|   8 +-
 llvm/include/llvm/Analysis/MLInlineAdvisor.h  |   1 +
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 115 +-
 llvm/lib/Analysis/InlineAdvisor.cpp   |  29 
 llvm/lib/Analysis/MLInlineAdvisor.cpp |  34 +++-
 .../FunctionPropertiesAnalysisTest.cpp| 145 --
 8 files changed, 338 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h 
b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index babb6d9d6cf0c..06dbfc35a5294 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/IR2Vec.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Compiler.h"
@@ -32,17 +33,19 @@ class FunctionPropertiesInfo {
   void updateAggregateStats(const Function &F, const LoopInfo &LI);
   void reIncludeBB(const BasicBlock &BB);
 
+  ir2vec::Embedding FunctionEmbedding = ir2vec::Embedding(0.0);
+  std::optional IR2VecVocab;
+
 public:
   LLVM_ABI static FunctionPropertiesInfo
   getFunctionPropertiesInfo(const Function &F, const DominatorTree &DT,
-const LoopInfo &LI);
+const LoopInfo &LI,
+const IR2VecVocabResult *VocabResult);
 
   LLVM_ABI static FunctionPropertiesInfo
   getFunctionPropertiesInfo(Function &F, FunctionAnalysisManager &FAM);
 
-  bool operator==(const FunctionPropertiesInfo &FPI) const {
-return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
-  }
+  bool operator==(const FunctionPropertiesInfo &FPI) const;
 
   bool operator!=(const FunctionPropertiesInfo &FPI) const {
 return !(*this == FPI);
@@ -137,6 +140,19 @@ class FunctionPropertiesInfo {
   int64_t CallReturnsVectorPointerCount = 0;
   int64_t CallWithManyArgumentsCount = 0;
   int64_t CallWithPointerArgumentCount = 0;
+
+  const ir2vec::Embedding &getFunctionEmbedding() const {
+return FunctionEmbedding;
+  }
+
+  const std::optional &getIR2VecVocab() const {
+return IR2VecVocab;
+  }
+
+  // Helper intended to be useful for unittests
+  void setFunctionEmbeddingForTest(const ir2vec::Embedding &Embedding) {
+FunctionEmbedding = Embedding;
+  }
 };
 
 // Analysis pass
@@ -192,7 +208,7 @@ class FunctionPropertiesUpdater {
 
   DominatorTree &getUpdatedDominatorTree(FunctionAnalysisManager &FAM) const;
 
-  DenseSet Successors;
+  DenseSet Successors, CallUsers;
 
   // Edges we might potentially need to remove from the dominator tree.
   SmallVector DomTreeUpdates;
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h 
b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 9d15136e81d10..50ba3c13da70f 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -331,6 +331,10 @@ class InlineAdvisorAnalysis : public 
AnalysisInfoMixin {
   };
 
   Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); }
+
+private:
+  static bool initializeIR2VecVocabIfRequested(Module &M,
+   ModuleAnalysisManager &MAM);
 };
 
 /// Printer pass for the InlineAdvisorAnalysis results.
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h 
b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index 961d5091bf9f3..a166621243cad 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -142,6 +142,12 @@ enum class FeatureIndex : size_t {
   INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
 #undef POPULATE_INDICES
 
+// IR2Vec embeddings
+// Dimensions of embeddings are not known in the compile time (until vocab is 
+// read). Hence macros cannot be used here.
+  callee_embedding,
+  caller_embedding,
+
   NumberOfFeatures
 };
 // clang-format on
@@ -154,7 +160,7 @@ inlineCostFeatureToMlFeature(InlineCostFeatureIndex 
Feature) {
 constexpr size_t NumberOfFeatures =
 static_cast(FeatureIndex::NumberOfFeatures);
 
-LLVM_ABI extern const std::vector FeatureMap;
+LLVM_ABI extern std::vector FeatureMap;
 
 LLVM_ABI extern const char *const DecisionName;
 LLVM_ABI extern const TensorSpec InlineDecisionSpec;
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h 
b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 580dd5e95d760..8262dd0846ede 100644
--- a/llvm/include/llvm/Analysis/MLInlin

[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143999

>From d71dd503f4794abf8a396ddb8a5deeafe0d75c83 Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Thu, 12 Jun 2025 23:54:10 +
Subject: [PATCH] Simplifying creation of Embedder

---
 llvm/docs/MLGO.rst|  7 +--
 llvm/include/llvm/Analysis/IR2Vec.h   |  4 +-
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 10 ++---
 llvm/lib/Analysis/IR2Vec.cpp  | 17 +++
 .../FunctionPropertiesAnalysisTest.cpp|  7 ++-
 llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++
 6 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
index 28095447f6a5a..0b849f3382f63 100644
--- a/llvm/docs/MLGO.rst
+++ b/llvm/docs/MLGO.rst
@@ -482,14 +482,9 @@ embeddings can be computed and accessed via an 
``ir2vec::Embedder`` instance.
 
   // Assuming F is an llvm::Function&
   // For example, using IR2VecKind::Symbolic:
-  Expected> EmbOrErr =
+  std::unique_ptr Emb =
   ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary);
 
-  if (auto Err = EmbOrErr.takeError()) {
-// Handle error in embedder creation
-return;
-  }
-  std::unique_ptr Emb = std::move(*EmbOrErr);
 
 3. **Compute and Access Embeddings**:
Call ``getFunctionVector()`` to get the embedding for the function. 
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h 
b/llvm/include/llvm/Analysis/IR2Vec.h
index 2a7a6edda70a8..06312562060aa 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,8 +170,8 @@ class Embedder {
   virtual ~Embedder() = default;
 
   /// Factory method to create an Embedder object.
-  static Expected>
-  create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary);
+  static std::unique_ptr create(IR2VecKind Mode, const Function &F,
+  const Vocab &Vocabulary);
 
   /// Returns a map containing instructions and the corresponding embeddings 
for
   /// the function F if it has been computed. If not, it computes the 
embeddings
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp 
b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 29d3aaf46dc06..dd4eb7f0df053 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock 
&BB,
 // We instantiate the IR2Vec embedder each time, as having an unique
 // pointer to the embedder as member of the class would make it
 // non-copyable. Instantiating the embedder in itself is not costly.
-auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic,
+auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic,
  *BB.getParent(), *IR2VecVocab);
-if (Error Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-BB.getContext().emitError("Error creating IR2Vec embeddings: " +
-  EI.message());
-  });
+if (!Embedder) {
+  BB.getContext().emitError("Error creating IR2Vec embeddings");
   return;
 }
-auto Embedder = std::move(*EmbOrErr);
 const auto &BBEmbedding = Embedder->getBBVector(BB);
 // Subtract BBEmbedding from Function embedding if the direction is -1,
 // and add it if the direction is +1.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 7ff7acebedf4e..27cc2a4109879 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab 
&Vocabulary)
   Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight),
   TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {}
 
-Expected>
-Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) {
+std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F,
+   const Vocab &Vocabulary) {
   switch (Mode) {
   case IR2VecKind::Symbolic:
 return std::make_unique(F, Vocabulary);
   }
-  return make_error("Unknown IR2VecKind", errc::invalid_argument);
+  llvm_unreachable("Unknown IR2Vec kind");
+  return nullptr;
 }
 
 // FIXME: Currently lookups are string based. Use numeric Keys
@@ -384,17 +385,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
 
   auto Vocab = IR2VecVocabResult.getVocabulary();
   for (Function &F : M) {
-Expected> EmbOrErr =
+std::unique_ptr Emb =
 Embedder::create(IR2VecKind::Symbolic, F, Vocab);
-if (auto Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n";
-  });
+if (!Emb) {
+  OS << "Error creating I

[llvm-branch-commits] [llvm] [MLGO][IR2Vec] Integrating IR2Vec with MLInliner (PR #143479)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143479

>From a2bec77ad03e20cd76b6870149863049a96c4f9e Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Tue, 10 Jun 2025 05:40:38 +
Subject: [PATCH] [MLIniner][IR2Vec] Integrating IR2Vec with MLInliner

---
 .../Analysis/FunctionPropertiesAnalysis.h |  26 +++-
 llvm/include/llvm/Analysis/InlineAdvisor.h|   4 +
 .../llvm/Analysis/InlineModelFeatureMaps.h|   8 +-
 llvm/include/llvm/Analysis/MLInlineAdvisor.h  |   1 +
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 115 +-
 llvm/lib/Analysis/InlineAdvisor.cpp   |  29 
 llvm/lib/Analysis/MLInlineAdvisor.cpp |  34 +++-
 .../FunctionPropertiesAnalysisTest.cpp| 145 --
 8 files changed, 338 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h 
b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index babb6d9d6cf0c..06dbfc35a5294 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/IR2Vec.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Compiler.h"
@@ -32,17 +33,19 @@ class FunctionPropertiesInfo {
   void updateAggregateStats(const Function &F, const LoopInfo &LI);
   void reIncludeBB(const BasicBlock &BB);
 
+  ir2vec::Embedding FunctionEmbedding = ir2vec::Embedding(0.0);
+  std::optional IR2VecVocab;
+
 public:
   LLVM_ABI static FunctionPropertiesInfo
   getFunctionPropertiesInfo(const Function &F, const DominatorTree &DT,
-const LoopInfo &LI);
+const LoopInfo &LI,
+const IR2VecVocabResult *VocabResult);
 
   LLVM_ABI static FunctionPropertiesInfo
   getFunctionPropertiesInfo(Function &F, FunctionAnalysisManager &FAM);
 
-  bool operator==(const FunctionPropertiesInfo &FPI) const {
-return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
-  }
+  bool operator==(const FunctionPropertiesInfo &FPI) const;
 
   bool operator!=(const FunctionPropertiesInfo &FPI) const {
 return !(*this == FPI);
@@ -137,6 +140,19 @@ class FunctionPropertiesInfo {
   int64_t CallReturnsVectorPointerCount = 0;
   int64_t CallWithManyArgumentsCount = 0;
   int64_t CallWithPointerArgumentCount = 0;
+
+  const ir2vec::Embedding &getFunctionEmbedding() const {
+return FunctionEmbedding;
+  }
+
+  const std::optional &getIR2VecVocab() const {
+return IR2VecVocab;
+  }
+
+  // Helper intended to be useful for unittests
+  void setFunctionEmbeddingForTest(const ir2vec::Embedding &Embedding) {
+FunctionEmbedding = Embedding;
+  }
 };
 
 // Analysis pass
@@ -192,7 +208,7 @@ class FunctionPropertiesUpdater {
 
   DominatorTree &getUpdatedDominatorTree(FunctionAnalysisManager &FAM) const;
 
-  DenseSet Successors;
+  DenseSet Successors, CallUsers;
 
   // Edges we might potentially need to remove from the dominator tree.
   SmallVector DomTreeUpdates;
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h 
b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 9d15136e81d10..50ba3c13da70f 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -331,6 +331,10 @@ class InlineAdvisorAnalysis : public 
AnalysisInfoMixin {
   };
 
   Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); }
+
+private:
+  static bool initializeIR2VecVocabIfRequested(Module &M,
+   ModuleAnalysisManager &MAM);
 };
 
 /// Printer pass for the InlineAdvisorAnalysis results.
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h 
b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index 961d5091bf9f3..a166621243cad 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -142,6 +142,12 @@ enum class FeatureIndex : size_t {
   INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
 #undef POPULATE_INDICES
 
+// IR2Vec embeddings
+// Dimensions of embeddings are not known in the compile time (until vocab is 
+// read). Hence macros cannot be used here.
+  callee_embedding,
+  caller_embedding,
+
   NumberOfFeatures
 };
 // clang-format on
@@ -154,7 +160,7 @@ inlineCostFeatureToMlFeature(InlineCostFeatureIndex 
Feature) {
 constexpr size_t NumberOfFeatures =
 static_cast(FeatureIndex::NumberOfFeatures);
 
-LLVM_ABI extern const std::vector FeatureMap;
+LLVM_ABI extern std::vector FeatureMap;
 
 LLVM_ABI extern const char *const DecisionName;
 LLVM_ABI extern const TensorSpec InlineDecisionSpec;
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h 
b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 580dd5e95d760..8262dd0846ede 100644
--- a/llvm/include/llvm/Analysis/MLInlin

[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143999

>From d71dd503f4794abf8a396ddb8a5deeafe0d75c83 Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Thu, 12 Jun 2025 23:54:10 +
Subject: [PATCH] Simplifying creation of Embedder

---
 llvm/docs/MLGO.rst|  7 +--
 llvm/include/llvm/Analysis/IR2Vec.h   |  4 +-
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 10 ++---
 llvm/lib/Analysis/IR2Vec.cpp  | 17 +++
 .../FunctionPropertiesAnalysisTest.cpp|  7 ++-
 llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++
 6 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
index 28095447f6a5a..0b849f3382f63 100644
--- a/llvm/docs/MLGO.rst
+++ b/llvm/docs/MLGO.rst
@@ -482,14 +482,9 @@ embeddings can be computed and accessed via an 
``ir2vec::Embedder`` instance.
 
   // Assuming F is an llvm::Function&
   // For example, using IR2VecKind::Symbolic:
-  Expected> EmbOrErr =
+  std::unique_ptr Emb =
   ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary);
 
-  if (auto Err = EmbOrErr.takeError()) {
-// Handle error in embedder creation
-return;
-  }
-  std::unique_ptr Emb = std::move(*EmbOrErr);
 
 3. **Compute and Access Embeddings**:
Call ``getFunctionVector()`` to get the embedding for the function. 
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h 
b/llvm/include/llvm/Analysis/IR2Vec.h
index 2a7a6edda70a8..06312562060aa 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,8 +170,8 @@ class Embedder {
   virtual ~Embedder() = default;
 
   /// Factory method to create an Embedder object.
-  static Expected>
-  create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary);
+  static std::unique_ptr create(IR2VecKind Mode, const Function &F,
+  const Vocab &Vocabulary);
 
   /// Returns a map containing instructions and the corresponding embeddings 
for
   /// the function F if it has been computed. If not, it computes the 
embeddings
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp 
b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 29d3aaf46dc06..dd4eb7f0df053 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock 
&BB,
 // We instantiate the IR2Vec embedder each time, as having an unique
 // pointer to the embedder as member of the class would make it
 // non-copyable. Instantiating the embedder in itself is not costly.
-auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic,
+auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic,
  *BB.getParent(), *IR2VecVocab);
-if (Error Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-BB.getContext().emitError("Error creating IR2Vec embeddings: " +
-  EI.message());
-  });
+if (!Embedder) {
+  BB.getContext().emitError("Error creating IR2Vec embeddings");
   return;
 }
-auto Embedder = std::move(*EmbOrErr);
 const auto &BBEmbedding = Embedder->getBBVector(BB);
 // Subtract BBEmbedding from Function embedding if the direction is -1,
 // and add it if the direction is +1.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 7ff7acebedf4e..27cc2a4109879 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab 
&Vocabulary)
   Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight),
   TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {}
 
-Expected>
-Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) {
+std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F,
+   const Vocab &Vocabulary) {
   switch (Mode) {
   case IR2VecKind::Symbolic:
 return std::make_unique(F, Vocabulary);
   }
-  return make_error("Unknown IR2VecKind", errc::invalid_argument);
+  llvm_unreachable("Unknown IR2Vec kind");
+  return nullptr;
 }
 
 // FIXME: Currently lookups are string based. Use numeric Keys
@@ -384,17 +385,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
 
   auto Vocab = IR2VecVocabResult.getVocabulary();
   for (Function &F : M) {
-Expected> EmbOrErr =
+std::unique_ptr Emb =
 Embedder::create(IR2VecKind::Symbolic, F, Vocab);
-if (auto Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n";
-  });
+if (!Emb) {
+  OS << "Error creating I

[llvm-branch-commits] [llvm] 69ba6fa - Revert "[PowerPC][NFC] Pre-commit test case for checking whether `mtvsrbmi` …"

2025-06-13 Thread via llvm-branch-commits

Author: zhijian lin
Date: 2025-06-13T09:24:56-04:00
New Revision: 69ba6fa610e19baa1d0d18f04a27cb5f45db1711

URL: 
https://github.com/llvm/llvm-project/commit/69ba6fa610e19baa1d0d18f04a27cb5f45db1711
DIFF: 
https://github.com/llvm/llvm-project/commit/69ba6fa610e19baa1d0d18f04a27cb5f45db1711.diff

LOG: Revert "[PowerPC][NFC] Pre-commit test case for checking whether  
`mtvsrbmi` …"

This reverts commit 9c2e0bd59ce0438fcad61b0468fd939c6282d048.

Added: 


Modified: 


Removed: 
llvm/test/CodeGen/PowerPC/mtvsrbmi.ll



diff  --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll 
b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
deleted file mode 100644
index 7ed57c300ec71..0
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; Verify whether the generated assembly for the following function includes 
the mtvsrbmi instruction.
-; vector unsigned char v00FF()
-; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
-; }
-
-; RUN: llc < %s -ppc-asm-full-reg-names  -mtriple=powerpc-ibm-aix -mcpu=pwr10  
-verify-machineinstrs \
-; RUN:   | FileCheck %s --check-prefix=CHECK
-
-define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:lwz r3, L..C0(r2) # %const.0
-; CHECK-NEXT:lxv vs34, 0(r3)
-; CHECK-NEXT:blr
-entry:
-  ret <16 x i8> 
-}
-
-; CHECK:  L..CPI0_0:
-; CHECK-NEXT:   .byte   255 # 0xff
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-; CHECK-NEXT:   .byte   0   # 0x0
-
-; CHECK:  ._Z5v00FFv:
-; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT:   lwz r3, L..C0(r2)
-; CHECK-NEXT:   lxv vs34, 0(r3)
-; CHECK-NEXT:   blr



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Legalize certain `vector.transfer_read` ops of scalable vectors (PR #143146)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/143146

>From 493955781f28b8b6caaeff1b45f7b7a06b43086c Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Wed, 14 May 2025 09:03:49 +
Subject: [PATCH 1/3] [MLIR] Legalize certain `vector.transfer_read` ops of
 scalable vectors

THis patch add a transform  of `transfer_read` operation to change the
vector type to one that can be mapped to an LLVM type. This is done by
collapsing trailing dimensions so we obtain a vector type with a single
scalable dimension in the rightmost position.
---
 .../Transforms/LegalizeVectorStorage.cpp  | 110 -
 .../ArmSVE/legalize-transfer-read.mlir| 226 ++
 .../transfer-read-scalable-not-rightmost.mlir |  72 ++
 3 files changed, 407 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir
 create mode 100644 
mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-not-rightmost.mlir

diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp 
b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp
index d2ac850a5f70b..f16d33c004fec 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp
@@ -298,6 +298,113 @@ struct LegalizeSVEMaskLoadConversion : public 
OpRewritePattern {
   }
 };
 
+/// Transforms a `transfer_read` operation so it reads vector of a type that
+/// can be mapped to an LLVM type. This is done by collapsing trailing
+/// dimensions so we obtain a vector type with a single scalable dimension in
+/// the rightmost position.
+///
+/// Example:
+/// ```
+/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8
+///   {in_bounds = [false, true, true, true]}
+///   : memref, vector<2x[4]x2x8xi8>
+/// ```
+/// is rewriten to
+/// ```
+/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]]
+///   : memref into memref
+/// %0 = vector.transfer_read  %collapse_shape[%i, %j], %c0_i8
+///   {in_bounds = [false, true]}
+///   : memref, vector<2x[64]xi8>
+/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8>
+/// ```
+struct LegalizeTransferRead : public OpRewritePattern {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(vector::TransferReadOp readOp,
+PatternRewriter &rewriter) const override {
+
+if (!readOp.getPermutationMap().isMinorIdentity())
+  return rewriter.notifyMatchFailure(readOp, "non-identity permutation");
+
+// We handle transfers of vectors with rank >= 2 and a single scalable
+// dimension.
+VectorType origVT = readOp.getVectorType();
+ArrayRef origScalableDims = origVT.getScalableDims();
+const int64_t origVRank = origVT.getRank();
+if (origVRank < 2 || llvm::count(origScalableDims, true) != 1)
+  return rewriter.notifyMatchFailure(readOp, "wrong dimensions");
+
+// Number of trailing dimensions to collapse, including the scalable
+// dimension.  Nothing to do if the single scalable dimension is already 
the
+// last one.
+const int64_t numCollapseDims = std::distance(
+llvm::find(origScalableDims, true), origScalableDims.end());
+if (numCollapseDims < 2)
+  return rewriter.notifyMatchFailure(readOp,
+ "scalable dimension is trailing");
+
+// We want a simple memref (not a tensor) with contiguous elements for at
+// least all the trailing dimensions up to and including the scalable one.
+auto memTy = dyn_cast(readOp.getBase().getType());
+if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims)))
+  return rewriter.notifyMatchFailure(
+  readOp, "non-contiguous memref dimensions to collapse");
+
+// The collapsed dimensions (excluding the scalable one) of the vector and
+// the memref must match and the corresponding indices must be in-bounds 
(it
+// follows these indices would be zero). This guarantees that the operation
+// transfers a contiguous block.
+if (!llvm::equal(memTy.getShape().take_back(numCollapseDims - 1),
+ origVT.getShape().take_back(numCollapseDims - 1)))
+  return rewriter.notifyMatchFailure(
+  readOp, "memref and vector dimensions do not match");
+
+SmallVector origInBounds = readOp.getInBoundsValues();
+if (!llvm::all_of(
+ArrayRef(origInBounds).take_back(numCollapseDims - 1),
+[](bool v) { return v; }))
+  return rewriter.notifyMatchFailure(readOp,
+ "out-if-bounds index to collapse");
+
+// Collapse the trailing dimensions of the memref.
+SmallVector reassoc;
+for (int64_t i = 0; i < memTy.getRank() - numCollapseDims + 1; ++i)
+  reassoc.push_back({i});
+for (int64_t i = memTy.getRank() - numCollapseDims + 1; i < 
memTy.getRank();
+ ++i)
+  reassoc.

[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread James Newling via llvm-branch-commits


@@ -203,21 +206,21 @@ func.func @transfer_read_dynamic_dim_to_flatten(
   return %res : vector<1x2x6xi32>
 }
 
-// CHECK: #[[$MAP:.*]] = affine_map<()[s0, s1] -> (s0 * 24 + s1 * 6)>
+// CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 24 + s1 * 6)>
 
 // CHECK-LABEL: func.func @transfer_read_dynamic_dim_to_flatten
 // CHECK-SAME:%[[IDX_1:arg0]]
 // CHECK-SAME:%[[IDX_2:arg1]]
 // CHECK-SAME:%[[MEM:arg2]]
-// CHECK:  %[[C0_I32:.*]] = arith.constant 0 : i32

newling wrote:

Makes sense, thanks

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][C++03] Fix a bunch of random tests (PR #144117)

2025-06-13 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 created 
https://github.com/llvm/llvm-project/pull/144117

This fixes/removes a bunch of random tests. They all failed in relatively 
simple to fix ways.

Specificially (all inside `libcxx/test/libcxx-03`):
- `utilities/template.bitset/includes.pass.cpp`: the header guards have 
different names now (guard names fixed)
- `utilities/meta/is_referenceable.compile.pass.cpp`: The name changed from 
`__libcpp_is_referenceable` (reverted name)
- `utilities/function.objects/refwrap/desugars_to.compile.pass.cpp`: 
Optimization has been added after the header split (test removed)
- `type_traits/is_replaceable.compile.pass.cpp`: `__is_replacable_v` has been 
added after the header split (test removed)
- `type_traits/is_constant_evaluated.pass.cpp`: Ran C++11 code accidentally 
(C++11 test parts removed)
- `type_traits/desugars_to.compile.pass.cpp`: Optimization has been added after 
the header split (test removed)
- `numerics/bit.ops.pass.cpp`: Tried to include header which doesn't exist 
(removed include and related code which wasn't executed in C++03)
- `experimental/fexperimental-library.compile.pass.cpp`: This test is 
irrelevant for C++03, since there are no C++03 experimental features (test 
removed)
- `containers/container_traits.compile.pass.cpp`: `container_traits` have been 
introduced after the header split (test removed)



>From 94255420a3a9e470973d3f3d4f7bed76bef39d23 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser 
Date: Fri, 13 Jun 2025 18:51:26 +0200
Subject: [PATCH] [libc++][C++03] Fix a bunch of random tests

---
 .../container_traits.compile.pass.cpp | 165 -
 .../fexperimental-library.compile.pass.cpp|  31 --
 .../bounded_iter/comparison.pass.cpp  |   4 +-
 .../test/libcxx-03/numerics/bit.ops.pass.cpp  |  12 +-
 .../type_traits/desugars_to.compile.pass.cpp  |  42 ---
 .../is_constant_evaluated.pass.cpp|   8 +-
 .../is_replaceable.compile.pass.cpp   | 313 --
 .../refwrap/desugars_to.compile.pass.cpp  |  36 --
 .../meta/is_referenceable.compile.pass.cpp| 230 +++--
 .../template.bitset/includes.pass.cpp |   8 +-
 10 files changed, 121 insertions(+), 728 deletions(-)
 delete mode 100644 
libcxx/test/libcxx-03/containers/container_traits.compile.pass.cpp
 delete mode 100644 
libcxx/test/libcxx-03/experimental/fexperimental-library.compile.pass.cpp
 delete mode 100644 
libcxx/test/libcxx-03/type_traits/desugars_to.compile.pass.cpp
 delete mode 100644 
libcxx/test/libcxx-03/type_traits/is_replaceable.compile.pass.cpp
 delete mode 100644 
libcxx/test/libcxx-03/utilities/function.objects/refwrap/desugars_to.compile.pass.cpp

diff --git a/libcxx/test/libcxx-03/containers/container_traits.compile.pass.cpp 
b/libcxx/test/libcxx-03/containers/container_traits.compile.pass.cpp
deleted file mode 100644
index 22be217487951..0
--- a/libcxx/test/libcxx-03/containers/container_traits.compile.pass.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-//===--===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// <__type_traits/container_traits.h>
-//
-
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
-#include <__type_traits/container_traits.h>
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "test_allocator.h"
-#include "test_macros.h"
-#include "MoveOnly.h"
-
-struct ThrowOnMove {
-  ThrowOnMove();
-  ThrowOnMove(const ThrowOnMove&) TEST_NOEXCEPT_COND(false);
-  ThrowOnMove(ThrowOnMove&&) TEST_NOEXCEPT_COND(false);
-  ThrowOnMove& operator=(ThrowOnMove&&) TEST_NOEXCEPT_COND(false);
-  ThrowOnMove& operator=(const ThrowOnMove&) TEST_NOEXCEPT_COND(false);
-
-  bool operator<(ThrowOnMove const&) const;
-  bool operator==(ThrowOnMove const&) const;
-};
-
-struct NonCopyThrowOnMove {
-  NonCopyThrowOnMove();
-  NonCopyThrowOnMove(NonCopyThrowOnMove&&) TEST_NOEXCEPT_COND(false);
-  NonCopyThrowOnMove(const NonCopyThrowOnMove&) = delete;
-  NonCopyThrowOnMove& operator=(NonCopyThrowOnMove&&) 
TEST_NOEXCEPT_COND(false);
-  NonCopyThrowOnMove& operator=(const NonCopyThrowOnMove&) = delete;
-
-  bool operator<(NonCopyThrowOnMove const&) const;
-  bool operator==(NonCopyThrowOnMove const&) const;
-};
-
-struct ThrowingHash {
-  template 
-  std::size_t operator()(const T&) const TEST_NOEXCEPT_COND(false);
-};
-
-struct NoThrowHash {
-  template 
-  std::size_t operator()(const T&) const TEST_NOEXCEPT;
-};
-
-template 
-void check() {
-  static_assert(
-  
std::__container_traits::__emplacement_has_strong_exception_safety_guarantee
 == Expected, "");
-}
-
-void test() {
-  check >();
-  check > >();
-  check >();
-  check >();
-  check >();
-
-  check >(

[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread James Newling via llvm-branch-commits


@@ -630,7 +639,10 @@ class FlattenContiguousRowMajorTransferReadPattern
 if (transferReadOp.getMask())
   return failure();
 
-int64_t firstDimToCollapse = sourceType.getRank() - vectorType.getRank();

newling wrote:

Looks good, thanks!

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread James Newling via llvm-branch-commits


@@ -630,7 +639,10 @@ class FlattenContiguousRowMajorTransferReadPattern
 if (transferReadOp.getMask())
   return failure();
 
-int64_t firstDimToCollapse = sourceType.getRank() - vectorType.getRank();

newling wrote:

> For memrefs with dynamic dimensions and no strides or maps, e.g. 
> memref<2x?x2xi8>

Makes sense yes

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][C++03] Fix a bunch of random tests (PR #144117)

2025-06-13 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- 
libcxx/test/libcxx-03/iterators/bounded_iter/comparison.pass.cpp 
libcxx/test/libcxx-03/numerics/bit.ops.pass.cpp 
libcxx/test/libcxx-03/type_traits/is_constant_evaluated.pass.cpp 
libcxx/test/libcxx-03/utilities/meta/is_referenceable.compile.pass.cpp 
libcxx/test/libcxx-03/utilities/template.bitset/includes.pass.cpp
``





View the diff from clang-format here.


``diff
diff --git a/libcxx/test/libcxx-03/utilities/template.bitset/includes.pass.cpp 
b/libcxx/test/libcxx-03/utilities/template.bitset/includes.pass.cpp
index f6e6960c9..d69a4b3b8 100644
--- a/libcxx/test/libcxx-03/utilities/template.bitset/includes.pass.cpp
+++ b/libcxx/test/libcxx-03/utilities/template.bitset/includes.pass.cpp
@@ -13,15 +13,15 @@
 #include "test_macros.h"
 
 #ifndef _LIBCPP___CXX03_STRING
-#error  has not been included
+#  error  has not been included
 #endif
 
 #ifndef _LIBCPP___CXX03_STDEXCEPT
-#error  has not been included
+#  error  has not been included
 #endif
 
 #ifndef _LIBCPP___CXX03_IOSFWD
-#error  has not been included
+#  error  has not been included
 #endif
 
 int main(int, char**)

``




https://github.com/llvm/llvm-project/pull/144117
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)

2025-06-13 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143999

>From 0d921416a0f81e5634705dc9dfc5363d721a55bf Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Thu, 12 Jun 2025 23:54:10 +
Subject: [PATCH] Simplifying creation of Embedder

---
 llvm/docs/MLGO.rst|  7 +--
 llvm/include/llvm/Analysis/IR2Vec.h   |  4 +-
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 10 ++---
 llvm/lib/Analysis/IR2Vec.cpp  | 17 +++
 .../FunctionPropertiesAnalysisTest.cpp|  7 ++-
 llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++
 6 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
index 4f8fb3f59ca19..e7bba9995b75b 100644
--- a/llvm/docs/MLGO.rst
+++ b/llvm/docs/MLGO.rst
@@ -479,14 +479,9 @@ embeddings can be computed and accessed via an 
``ir2vec::Embedder`` instance.
 
   // Assuming F is an llvm::Function&
   // For example, using IR2VecKind::Symbolic:
-  Expected> EmbOrErr =
+  std::unique_ptr Emb =
   ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary);
 
-  if (auto Err = EmbOrErr.takeError()) {
-// Handle error in embedder creation
-return;
-  }
-  std::unique_ptr Emb = std::move(*EmbOrErr);
 
 3. **Compute and Access Embeddings**:
Call ``getFunctionVector()`` to get the embedding for the function. 
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h 
b/llvm/include/llvm/Analysis/IR2Vec.h
index f1aaf4cd2e013..6efa6eac56af9 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,8 +170,8 @@ class Embedder {
   virtual ~Embedder() = default;
 
   /// Factory method to create an Embedder object.
-  static Expected>
-  create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary);
+  static std::unique_ptr create(IR2VecKind Mode, const Function &F,
+  const Vocab &Vocabulary);
 
   /// Returns a map containing instructions and the corresponding embeddings 
for
   /// the function F if it has been computed. If not, it computes the 
embeddings
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp 
b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 29d3aaf46dc06..dd4eb7f0df053 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock 
&BB,
 // We instantiate the IR2Vec embedder each time, as having an unique
 // pointer to the embedder as member of the class would make it
 // non-copyable. Instantiating the embedder in itself is not costly.
-auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic,
+auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic,
  *BB.getParent(), *IR2VecVocab);
-if (Error Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-BB.getContext().emitError("Error creating IR2Vec embeddings: " +
-  EI.message());
-  });
+if (!Embedder) {
+  BB.getContext().emitError("Error creating IR2Vec embeddings");
   return;
 }
-auto Embedder = std::move(*EmbOrErr);
 const auto &BBEmbedding = Embedder->getBBVector(BB);
 // Subtract BBEmbedding from Function embedding if the direction is -1,
 // and add it if the direction is +1.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index de9c2db9531e8..308c3d86a7668 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab 
&Vocabulary)
   Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight),
   TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {}
 
-Expected>
-Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) {
+std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F,
+   const Vocab &Vocabulary) {
   switch (Mode) {
   case IR2VecKind::Symbolic:
 return std::make_unique(F, Vocabulary);
   }
-  return make_error("Unknown IR2VecKind", errc::invalid_argument);
+  llvm_unreachable("Unknown IR2Vec kind");
+  return nullptr;
 }
 
 // FIXME: Currently lookups are string based. Use numeric Keys
@@ -388,17 +389,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
 
   auto Vocab = IR2VecVocabResult.getVocabulary();
   for (Function &F : M) {
-Expected> EmbOrErr =
+std::unique_ptr Emb =
 Embedder::create(IR2VecKind::Symbolic, F, Vocab);
-if (auto Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n";
-  });
+if (!Emb) {
+  OS << "Error creating I

[llvm-branch-commits] [llvm] WebAssembly: Stop directly using RuntimeLibcalls.def (PR #143054)

2025-06-13 Thread Thomas Lively via llvm-branch-commits


@@ -528,23 +528,20 @@ RuntimeLibcallSignatureTable 
&getRuntimeLibcallSignatures() {
 // constructor for use with a static variable
 struct StaticLibcallNameMap {
   StringMap Map;
-  StaticLibcallNameMap() {
-static const std::pair NameLibcalls[] = {
-#define HANDLE_LIBCALL(code, name) {(const char *)name, RTLIB::code},
-#include "llvm/IR/RuntimeLibcalls.def"
-#undef HANDLE_LIBCALL
-};
-for (const auto &NameLibcall : NameLibcalls) {
-  if (NameLibcall.first != nullptr &&
-  getRuntimeLibcallSignatures().Table[NameLibcall.second] !=
-  unsupported) {
-assert(!Map.contains(NameLibcall.first) &&
+  StaticLibcallNameMap(const Triple &TT) {
+// FIXME: This is broken if there are ever different triples compiled with
+// different libcalls.
+RTLIB::RuntimeLibcallsInfo RTCI(TT);
+for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
+  RTLIB::Libcall LC = static_cast(I);
+  const char *NameLibcall = RTCI.getLibcallName(LC);
+  if (NameLibcall != nullptr &&
+  getRuntimeLibcallSignatures().Table[LC] != unsupported) {
+assert(!Map.contains(NameLibcall) &&
"duplicate libcall names in name map");
-Map[NameLibcall.first] = NameLibcall.second;
+Map[NameLibcall] = LC;
   }
 }
-
-Map["emscripten_return_address"] = RTLIB::RETURN_ADDRESS;

tlively wrote:

How is this handled in the new version?

https://github.com/llvm/llvm-project/pull/143054
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CodeGen][NFC] Fix quadratic c-t for large jump tables (PR #144108)

2025-06-13 Thread Alexis Engelke via llvm-branch-commits

https://github.com/aengelke created 
https://github.com/llvm/llvm-project/pull/144108

Deleting a basic block removes all references from jump tables, which
is O(n). When freeing a MachineFunction, all basic blocks are deleted
before the jump tables, causing O(n^2) runtime. Fix this by deallocating
the jump table first.

Test case generator:

import sys

n = int(sys.argv[1])
print("define void @f(i64 %c, ptr %p) {")
print("  switch i64 %c, label %d [")
for i in range(n):
print(f"i64 {i}, label %h{i}")
print(f"  ]")
for i in range(n):
print(f'h{i}:')
print(f'  store i64 {i*i}, ptr %p')
print(f'  ret void')
print('d:')
print('  ret void')
print('}')



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][C++03] Fix tests which only fail due to incorrect includes (PR #144110)

2025-06-13 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 created 
https://github.com/llvm/llvm-project/pull/144110

Quite a few of the frozen header tests only fail because the include path is 
incorrect due to copying the headers. This patch fixes the tests where that's 
the only problem.

This is part of https://discourse.llvm.org/t/rfc-freezing-c-03-headers-in-libc.


>From 748f899d6b70933aa50f73bbe45ab198b8aacc38 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser 
Date: Fri, 13 Jun 2025 18:14:22 +0200
Subject: [PATCH] [libc++][C++03] Fix tests which only fail due to incorrect
 includes

---
 .../test/libcxx-03/algorithms/half_positive.pass.cpp   |  4 +---
 .../assertions/default_verbose_abort.pass.cpp  |  4 +---
 libcxx/test/libcxx-03/assertions/modes/none.pass.cpp   |  4 +---
 .../libcxx-03/assertions/single_expression.pass.cpp|  4 +---
 .../associative/tree_balance_after_insert.pass.cpp |  4 +---
 .../associative/tree_key_value_traits.pass.cpp |  4 +---
 .../containers/associative/tree_left_rotate.pass.cpp   |  4 +---
 .../containers/associative/tree_remove.pass.cpp|  4 +---
 .../containers/associative/tree_right_rotate.pass.cpp  |  4 +---
 .../containers/unord/key_value_traits.pass.cpp |  4 +---
 .../libcxx-03/containers/unord/next_prime.pass.cpp |  4 +---
 .../libcxx-03/depr/depr.c.headers/extern_c.pass.cpp|  4 +---
 .../libcxx-03/iterators/aliasing_iterator.pass.cpp |  4 +---
 .../iterators/bounded_iter/arithmetic.pass.cpp |  4 +---
 .../iterators/bounded_iter/pointer_traits.pass.cpp |  4 +---
 .../iterators/bounded_iter/types.compile.pass.cpp  |  4 +---
 libcxx/test/libcxx-03/memory/allocation_guard.pass.cpp |  4 +---
 libcxx/test/libcxx-03/memory/swap_allocator.pass.cpp   |  4 +---
 .../test/libcxx-03/numerics/clamp_to_integral.pass.cpp |  4 +---
 libcxx/test/libcxx-03/selftest/test_macros.pass.cpp|  4 +---
 .../strings/c.strings/constexpr_memmove.pass.cpp   |  4 +---
 .../is_trivially_comparable.compile.pass.cpp   |  8 +++-
 .../is_trivially_relocatable.compile.pass.cpp  |  4 +---
 .../libcxx-03/utilities/exception_guard.odr.sh.cpp |  4 +---
 .../libcxx-03/utilities/is_pointer_in_range.pass.cpp   |  4 +---
 .../test/libcxx-03/utilities/is_valid_range.pass.cpp   |  4 +---
 .../test/libcxx-03/utilities/meta/meta_base.pass.cpp   | 10 --
 libcxx/test/libcxx-03/utilities/no_destroy.pass.cpp|  4 +---
 .../utility/private_constructor_tag.compile.pass.cpp   |  4 +---
 29 files changed, 34 insertions(+), 92 deletions(-)

diff --git a/libcxx/test/libcxx-03/algorithms/half_positive.pass.cpp 
b/libcxx/test/libcxx-03/algorithms/half_positive.pass.cpp
index 88a18e8592921..292fcf356554b 100644
--- a/libcxx/test/libcxx-03/algorithms/half_positive.pass.cpp
+++ b/libcxx/test/libcxx-03/algorithms/half_positive.pass.cpp
@@ -11,9 +11,7 @@
 // __half_positive divides an integer number by 2 as unsigned number for known 
types.
 // It can be an important optimization for lower bound, for example.
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
-#include <__algorithm/half_positive.h>
+#include <__cxx03/__algorithm/half_positive.h>
 #include 
 #include 
 #include 
diff --git a/libcxx/test/libcxx-03/assertions/default_verbose_abort.pass.cpp 
b/libcxx/test/libcxx-03/assertions/default_verbose_abort.pass.cpp
index 803868b757794..27169da5e1c41 100644
--- a/libcxx/test/libcxx-03/assertions/default_verbose_abort.pass.cpp
+++ b/libcxx/test/libcxx-03/assertions/default_verbose_abort.pass.cpp
@@ -9,9 +9,7 @@
 // Test that the default verbose termination function aborts the program.
 // XFAIL: availability-verbose_abort-missing
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
-#include <__verbose_abort>
+#include <__cxx03/__verbose_abort>
 #include 
 #include 
 
diff --git a/libcxx/test/libcxx-03/assertions/modes/none.pass.cpp 
b/libcxx/test/libcxx-03/assertions/modes/none.pass.cpp
index b64290a31a129..e79dee906ae69 100644
--- a/libcxx/test/libcxx-03/assertions/modes/none.pass.cpp
+++ b/libcxx/test/libcxx-03/assertions/modes/none.pass.cpp
@@ -11,9 +11,7 @@
 
 // REQUIRES: libcpp-hardening-mode=none
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
-#include <__assert>
+#include <__cxx03/__assert>
 #include 
 
 bool executed_condition = false;
diff --git a/libcxx/test/libcxx-03/assertions/single_expression.pass.cpp 
b/libcxx/test/libcxx-03/assertions/single_expression.pass.cpp
index 474edc9dc0833..bbda6f11e4f6a 100644
--- a/libcxx/test/libcxx-03/assertions/single_expression.pass.cpp
+++ b/libcxx/test/libcxx-03/assertions/single_expression.pass.cpp
@@ -10,9 +10,7 @@
 // This is useful so we can use them  in places that require an expression, 
such as
 // in a constructor initializer list.
 
-// XFAIL: FROZEN-CXX03-HEADERS-FIXME
-
-#include <__assert>
+#include <__cxx03/__assert>
 #include 
 
 void f() {
diff --git 
a/libcxx/test/libcxx-03/containers/associative/tree_balance_after_insert.pass.cpp
 
b/libcxx/test/libcxx-03/containers/associative/tree_balance_after_insert.pa

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits


@@ -14944,6 +14945,51 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
+   DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N1.getOpcode() == ISD::ADD) {
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+//y is not, and (add y, z) is used only once.
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+//z is not, and (add y, z) is used only once.
+// The goal is to move constant offsets to the outermost ptradd, to create
+// more opportunities to fold offsets into memory instructions.
+// Together with the generic combines in DAGCombiner.cpp, this also
+// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+//
+// This transform is here instead of in the general DAGCombiner as it can
+// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
+// AArch64's CPA.
+SDValue X = N0;
+SDValue Y = N1.getOperand(0);
+SDValue Z = N1.getOperand(1);
+bool N1OneUse = N1.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+if ((ZIsConstant != YIsConstant) && N1OneUse) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N1->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;

ritter-x2a wrote:

Done (here and also for similar code in DAGCombiner.cpp).

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP][NFC] Refactor to avoid global variable (PR #144087)

2025-06-13 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah created 
https://github.com/llvm/llvm-project/pull/144087

Based on top of #144013

I was really hoping this would also work for `hostEvalInfo` but unfortunately 
that needed to be shared to a greater degree.

The same technique should work for that but it would need that class to be made 
public and then the state kept between calls to `genOpenMP*Construct`, which 
felt like more trouble than it was worth.

I'm open to abandoning this patch if solving one global variable doesn't feel 
worth this much churn.

Making these changes I was wondering if we should implement this file with one 
big class to wrap up all the state passed to every function. Any thoughts?

>From b962af9da5a74b2b5509f654299c3b9c35dca05d Mon Sep 17 00:00:00 2001
From: Tom Eccles 
Date: Fri, 13 Jun 2025 14:58:56 +
Subject: [PATCH] [flang][OpenMP][NFC] Refactor to avoid global variable

I was really hoping this would also work for `hostEvalInfo` but
unfortunately that needed to be shared to a greater degree.

The same technique should work for that but it would need that class to
be made public and then the state kept between calls to
`genOpenMP*Construct`, which felt like more trouble than it was worth.

I'm open to abandoning this patch if solving one global variable doesn't
feel worth this much churn.

Making these changes I was wondering if we should implement this file
with one big class to wrap up all the state passed to every function.
Any thoughts?
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 560 +-
 1 file changed, 310 insertions(+), 250 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 060eba1b906e3..9c0bfa95f8382 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -48,6 +48,10 @@ using namespace Fortran::common::openmp;
 
 static llvm::cl::opt DumpAtomicAnalysis("fdebug-dump-atomic-analysis");
 
+namespace {
+struct OmpLoweringContext;
+} // namespace
+
 
//===--===//
 // Code generation helper functions
 
//===--===//
@@ -55,6 +59,7 @@ static llvm::cl::opt 
DumpAtomicAnalysis("fdebug-dump-atomic-analysis");
 static void genOMPDispatch(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
+   OmpLoweringContext &ompCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item);
@@ -191,18 +196,28 @@ class HostEvalInfo {
   llvm::SmallVector iv;
   bool loopNestApplied = false, parallelApplied = false;
 };
-} // namespace
 
 /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
 /// operations being created.
 ///
 /// The current implementation prevents nested 'target' regions from breaking
 /// the handling of the outer region by keeping a stack of information
-/// structures, but it will probably still require some further work to support
-/// reverse offloading.
-static llvm::SmallVector hostEvalInfo;
-static llvm::SmallVector
-sectionsStack;
+/// structures, but it will probably still require some further work to
+/// support reverse offloading.
+///
+/// This has to be a global rather than in OmpLoweringContext because different
+/// calls to  void Fortran::lower::genOpenMPConstruct and
+/// Fortran::lower::genOpenMPDeclarativeConstruct need to share the same
+/// instance. FIXME: Maybe this should be promoted into the interface for those
+/// functions.
+llvm::SmallVector hostEvalInfo;
+
+struct OmpLoweringContext {
+  /// Stack of parse tree information about the sections construct to allow 
each
+  /// section to be lowered as part of the enclosing sections construct.
+  llvm::SmallVector sectionsStack;
+};
+} // namespace
 
 /// Bind symbols to their corresponding entry block arguments.
 ///
@@ -1151,10 +1166,11 @@ struct OpWithBodyGenInfo {
 
   OpWithBodyGenInfo(lower::AbstractConverter &converter,
 lower::SymMap &symTable,
-semantics::SemanticsContext &semaCtx, mlir::Location loc,
+semantics::SemanticsContext &semaCtx,
+OmpLoweringContext &ompCtx, mlir::Location loc,
 lower::pft::Evaluation &eval, llvm::omp::Directive dir)
-  : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
-eval(eval), dir(dir) {}
+  : converter(converter), symTable(symTable), semaCtx(semaCtx),
+ompCtx(ompCtx), loc(loc), eval(eval), dir(dir) {}
 
   OpWithBodyGenInfo &setClauses(const List *value) {
 clauses = value;
@@ -1187,6 +1203,8 @@ struct OpWithBodyGenInfo {
   lower::SymMap &symTable;
   /// [in] Semantics context
   semantic

[llvm-branch-commits] [flang] [flang][OpenMP][NFC] Refactor to avoid global variable (PR #144087)

2025-06-13 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Tom Eccles (tblah)


Changes

Based on top of #144013

I was really hoping this would also work for `hostEvalInfo` but unfortunately 
that needed to be shared to a greater degree.

The same technique should work for that but it would need that class to be made 
public and then the state kept between calls to `genOpenMP*Construct`, which 
felt like more trouble than it was worth.

I'm open to abandoning this patch if solving one global variable doesn't feel 
worth this much churn.

Making these changes I was wondering if we should implement this file with one 
big class to wrap up all the state passed to every function. Any thoughts?

---

Patch is 75.76 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/144087.diff


1 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+310-250) 


``diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 060eba1b906e3..9c0bfa95f8382 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -48,6 +48,10 @@ using namespace Fortran::common::openmp;
 
 static llvm::cl::opt DumpAtomicAnalysis("fdebug-dump-atomic-analysis");
 
+namespace {
+struct OmpLoweringContext;
+} // namespace
+
 
//===--===//
 // Code generation helper functions
 
//===--===//
@@ -55,6 +59,7 @@ static llvm::cl::opt 
DumpAtomicAnalysis("fdebug-dump-atomic-analysis");
 static void genOMPDispatch(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
+   OmpLoweringContext &ompCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item);
@@ -191,18 +196,28 @@ class HostEvalInfo {
   llvm::SmallVector iv;
   bool loopNestApplied = false, parallelApplied = false;
 };
-} // namespace
 
 /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
 /// operations being created.
 ///
 /// The current implementation prevents nested 'target' regions from breaking
 /// the handling of the outer region by keeping a stack of information
-/// structures, but it will probably still require some further work to support
-/// reverse offloading.
-static llvm::SmallVector hostEvalInfo;
-static llvm::SmallVector
-sectionsStack;
+/// structures, but it will probably still require some further work to
+/// support reverse offloading.
+///
+/// This has to be a global rather than in OmpLoweringContext because different
+/// calls to  void Fortran::lower::genOpenMPConstruct and
+/// Fortran::lower::genOpenMPDeclarativeConstruct need to share the same
+/// instance. FIXME: Maybe this should be promoted into the interface for those
+/// functions.
+llvm::SmallVector hostEvalInfo;
+
+struct OmpLoweringContext {
+  /// Stack of parse tree information about the sections construct to allow 
each
+  /// section to be lowered as part of the enclosing sections construct.
+  llvm::SmallVector sectionsStack;
+};
+} // namespace
 
 /// Bind symbols to their corresponding entry block arguments.
 ///
@@ -1151,10 +1166,11 @@ struct OpWithBodyGenInfo {
 
   OpWithBodyGenInfo(lower::AbstractConverter &converter,
 lower::SymMap &symTable,
-semantics::SemanticsContext &semaCtx, mlir::Location loc,
+semantics::SemanticsContext &semaCtx,
+OmpLoweringContext &ompCtx, mlir::Location loc,
 lower::pft::Evaluation &eval, llvm::omp::Directive dir)
-  : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
-eval(eval), dir(dir) {}
+  : converter(converter), symTable(symTable), semaCtx(semaCtx),
+ompCtx(ompCtx), loc(loc), eval(eval), dir(dir) {}
 
   OpWithBodyGenInfo &setClauses(const List *value) {
 clauses = value;
@@ -1187,6 +1203,8 @@ struct OpWithBodyGenInfo {
   lower::SymMap &symTable;
   /// [in] Semantics context
   semantics::SemanticsContext &semaCtx;
+  /// [in] OpenMP context
+  OmpLoweringContext &ompCtx;
   /// [in] location in source code.
   mlir::Location loc;
   /// [in] current PFT node/evaluation.
@@ -1290,8 +1308,8 @@ static void createBodyOfOp(mlir::Operation &op, const 
OpWithBodyGenInfo &info,
   if (!info.genSkeletonOnly) {
 if (ConstructQueue::const_iterator next = std::next(item);
 next != queue.end()) {
-  genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
- info.loc, queue, next);
+  genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.ompCtx,
+ info.eval, info.loc, queue, next);

[llvm-branch-commits] [llvm] 6bf398b - Revert "[llvm-cov] Add support for baseline coverage (#117910)"

2025-06-13 Thread via llvm-branch-commits

Author: Keith Smiley
Date: 2025-06-13T10:04:08-07:00
New Revision: 6bf398b89982b4a47edf48ce2c8c627e8a94ccf9

URL: 
https://github.com/llvm/llvm-project/commit/6bf398b89982b4a47edf48ce2c8c627e8a94ccf9
DIFF: 
https://github.com/llvm/llvm-project/commit/6bf398b89982b4a47edf48ce2c8c627e8a94ccf9.diff

LOG: Revert "[llvm-cov] Add support for baseline coverage (#117910)"

This reverts commit dc9e300f12f3b9c8160dbfb0bc32252ad99c3ba7.

Added: 


Modified: 
llvm/docs/CommandGuide/llvm-cov.rst
llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
llvm/tools/llvm-cov/CodeCoverage.cpp
llvm/unittests/ProfileData/CoverageMappingTest.cpp

Removed: 
llvm/test/tools/llvm-cov/showLineExecutionCounts-lcov-baseline.test



diff  --git a/llvm/docs/CommandGuide/llvm-cov.rst 
b/llvm/docs/CommandGuide/llvm-cov.rst
index f4db60cf06fa7..968f3c452f558 100644
--- a/llvm/docs/CommandGuide/llvm-cov.rst
+++ b/llvm/docs/CommandGuide/llvm-cov.rst
@@ -380,11 +380,6 @@ OPTIONS
  Fail if an object file cannot be found for a binary ID present in the profile,
  neither on the command line nor via binary ID lookup.
 
-.. option:: -empty-profile
-
- Display the baseline coverage of the binaries with all zero execution counts.
- Mutually exclusive with -instr-profile.
-
 .. program:: llvm-cov report
 
 .. _llvm-cov-report:
@@ -475,11 +470,6 @@ OPTIONS
  Fail if an object file cannot be found for a binary ID present in the profile,
  neither on the command line nor via binary ID lookup.
 
-.. option:: -empty-profile
-
- Display the baseline coverage of the binaries with all zero execution counts.
- Mutually exclusive with -instr-profile.
-
 .. program:: llvm-cov export
 
 .. _llvm-cov-export:
@@ -572,11 +562,6 @@ OPTIONS
  Fail if an object file cannot be found for a binary ID present in the profile,
  neither on the command line nor via binary ID lookup.
 
-.. option:: -empty-profile
-
- Export the baseline coverage of the binaries with all zero execution counts.
- Mutually exclusive with -instr-profile.
-
 CONVERT-FOR-TESTING COMMAND
 ---
 

diff  --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h 
b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index d1230b0ba7c58..e62ce5e3d8fa6 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -991,23 +991,18 @@ class CoverageMapping {
   // Load coverage records from readers.
   static Error loadFromReaders(
   ArrayRef> CoverageReaders,
-  std::optional>
-  &ProfileReader,
-  CoverageMapping &Coverage);
+  IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage);
 
   // Load coverage records from file.
   static Error
   loadFromFile(StringRef Filename, StringRef Arch, StringRef CompilationDir,
-   std::optional>
-   &ProfileReader,
-   CoverageMapping &Coverage, bool &DataFound,
+   IndexedInstrProfReader &ProfileReader, CoverageMapping 
&Coverage,
+   bool &DataFound,
SmallVectorImpl *FoundBinaryIDs = nullptr);
 
   /// Add a function record corresponding to \p Record.
-  Error loadFunctionRecord(
-  const CoverageMappingRecord &Record,
-  const std::optional>
-  &ProfileReader);
+  Error loadFunctionRecord(const CoverageMappingRecord &Record,
+   IndexedInstrProfReader &ProfileReader);
 
   /// Look up the indices for function records which are at least partially
   /// defined in the specified file. This is guaranteed to return a superset of
@@ -1023,16 +1018,15 @@ class CoverageMapping {
   /// Load the coverage mapping using the given readers.
   LLVM_ABI static Expected>
   load(ArrayRef> CoverageReaders,
-   std::optional>
-   &ProfileReader);
+   IndexedInstrProfReader &ProfileReader);
 
   /// Load the coverage mapping from the given object files and profile. If
   /// \p Arches is non-empty, it must specify an architecture for each object.
   /// Ignores non-instrumented object files unless all are not instrumented.
   LLVM_ABI static Expected>
-  load(ArrayRef ObjectFilenames,
-   std::optional ProfileFilename, vfs::FileSystem &FS,
-   ArrayRef Arches = {}, StringRef CompilationDir = "",
+  load(ArrayRef ObjectFilenames, StringRef ProfileFilename,
+   vfs::FileSystem &FS, ArrayRef Arches = {},
+   StringRef CompilationDir = "",
const object::BuildIDFetcher *BIDFetcher = nullptr,
bool CheckBinaryIDs = false);
 

diff  --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp 
b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 429ec5c19f1f8..dd74eb054a34c 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -823,8 +823,7 @@ class MCDCD

[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread James Newling via llvm-branch-commits

https://github.com/newling approved this pull request.

LGTM; thanks! 

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] 2238fd9 - Revert "[mlir][vector] Fix for WarpOpScfForOp failure when scf.for has result…"

2025-06-13 Thread via llvm-branch-commits

Author: Charitha Saumya
Date: 2025-06-13T10:18:24-07:00
New Revision: 2238fd9a756ae1a0b6aa2302e96cc217b08d6c3b

URL: 
https://github.com/llvm/llvm-project/commit/2238fd9a756ae1a0b6aa2302e96cc217b08d6c3b
DIFF: 
https://github.com/llvm/llvm-project/commit/2238fd9a756ae1a0b6aa2302e96cc217b08d6c3b.diff

LOG: Revert "[mlir][vector] Fix for WarpOpScfForOp failure when scf.for has 
result…"

This reverts commit 10dc8bc519130f491d70318bd8b47555307cdc3f.

Added: 


Modified: 
mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
mlir/test/Dialect/Vector/vector-warp-distribute.mlir

Removed: 




diff  --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp 
b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
index 52a9cedb43cc0..045c192787f10 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
@@ -1554,36 +1554,22 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
 llvm::SmallSetVector escapingValues;
 SmallVector inputTypes;
 SmallVector distTypes;
-auto collectEscapingValues = [&](Value value) {
-  if (!escapingValues.insert(value))
-return;
-  Type distType = value.getType();
-  if (auto vecType = dyn_cast(distType)) {
-AffineMap map = distributionMapFn(value);
-distType = getDistributedType(vecType, map, warpOp.getWarpSize());
-  }
-  inputTypes.push_back(value.getType());
-  distTypes.push_back(distType);
-};
-
 mlir::visitUsedValuesDefinedAbove(
 forOp.getBodyRegion(), [&](OpOperand *operand) {
   Operation *parent = operand->get().getParentRegion()->getParentOp();
   if (warpOp->isAncestor(parent)) {
-collectEscapingValues(operand->get());
+if (!escapingValues.insert(operand->get()))
+  return;
+Type distType = operand->get().getType();
+if (auto vecType = dyn_cast(distType)) {
+  AffineMap map = distributionMapFn(operand->get());
+  distType = getDistributedType(vecType, map, 
warpOp.getWarpSize());
+}
+inputTypes.push_back(operand->get().getType());
+distTypes.push_back(distType);
   }
 });
 
-// Any forOp result that is not already yielded by the warpOp
-// region is also considered escaping and must be returned by the
-// original warpOp.
-for (OpResult forResult : forOp.getResults()) {
-  // Check if this forResult is already yielded by the yield op.
-  if (llvm::is_contained(yield->getOperands(), forResult))
-continue;
-  collectEscapingValues(forResult);
-}
-
 if (llvm::is_contained(distTypes, Type{}))
   return failure();
 
@@ -1623,12 +1609,7 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
 forOp.getResultTypes().end());
 llvm::SmallDenseMap argIndexMapping;
 for (auto [i, retIdx] : llvm::enumerate(newRetIndices)) {
-  auto newWarpResult = newWarpOp.getResult(retIdx);
-  // Unused forOp results yielded by the warpOp region are already included
-  // in the new ForOp.
-  if (llvm::is_contained(newOperands, newWarpResult))
-continue;
-  warpInput.push_back(newWarpResult);
+  warpInput.push_back(newWarpOp.getResult(retIdx));
   argIndexMapping[escapingValues[i]] = warpInputType.size();
   warpInputType.push_back(inputTypes[i]);
 }

diff  --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir 
b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index 6c7ac7a5196a7..38771f2593449 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -584,42 +584,6 @@ func.func @warp_scf_for_multiple_yield(%arg0: index, 
%arg1: memref, %arg2
   return
 }
 
-// -
-// CHECK-PROP-LABEL: func.func @warp_scf_for_unused_yield(
-//   CHECK-PROP: %[[W0:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> 
(vector<4xf32>, vector<4xf32>) {
-//   CHECK-PROP: %[[INI0:.*]] = "some_def"() : () -> vector<128xf32>
-//   CHECK-PROP: %[[INI1:.*]] = "some_def"() : () -> vector<128xf32>
-//   CHECK-PROP: gpu.yield %[[INI0]], %[[INI1]] : vector<128xf32>, 
vector<128xf32>
-//   CHECK-PROP: }
-//   CHECK-PROP: %[[F:.*]]:2 = scf.for %{{.*}} iter_args(%{{.*}} = 
%[[W0]]#0, %{{.*}} = %[[W0]]#1) -> (vector<4xf32>, vector<4xf32>) {
-//   CHECK-PROP: %[[W1:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[32] 
args(%{{.*}} : vector<4xf32>, vector<4xf32>) -> (vector<4xf32>, vector<4xf32>) {
-//   CHECK-PROP: %[[ACC0:.*]] = "some_def"(%{{.*}}) : (vector<128xf32>, 
index) -> vector<128xf32>
-//   CHECK-PROP: %[[ACC1:.*]] = "some_def"(%{{.*}}) : (index, 
vector<128xf32>, vector<128xf32>) -> vector<128xf32>
-//   CHECK-PROP: gpu.yield %[[ACC1]], %[[ACC0]] : vector<12

[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)

2025-06-13 Thread Momchil Velikov via llvm-branch-commits


@@ -83,16 +84,48 @@ func.func @transfer_read_dims_mismatch_contiguous(
   return %res : vector<1x1x2x2xi8>
 }
 
-// CHECK-LABEL:   func.func @transfer_read_dims_mismatch_contiguous(
+// CHECK-LABEL:   func.func @transfer_read_dims_mismatch_contiguous_unit_dims(
 // CHECK-SAME:  %[[MEM:.*]]: memref<5x4x3x2xi8, strided<[24, 6, 2, 1], 
offset: ?>>) -> vector<1x1x2x2xi8> {
 // CHECK:   %[[VAL_1:.*]] = arith.constant 0 : i8
 // CHECK:   %[[VAL_2:.*]] = arith.constant 0 : index
-// CHECK:   %[[VAL_3:.*]] = memref.collapse_shape %[[MEM]] {{\[\[}}0, 
1, 2, 3]] : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> into 
memref<120xi8, strided<[1], offset: ?>>
-// CHECK:   %[[VAL_4:.*]] = vector.transfer_read 
%[[VAL_3]]{{\[}}%[[VAL_2]]], %[[VAL_1]] {in_bounds = [true]} : memref<120xi8, 
strided<[1], offset: ?>>, vector<4xi8>
+// CHECK:   %[[VAL_3:.*]] = memref.collapse_shape %[[MEM]]
+// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]]
+// CHECK-SAME:: memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> 
into memref<5x4x6xi8, strided<[24, 6, 1], offset: ?>>
+// CHECK:   %[[VAL_4:.*]] = vector.transfer_read 
%[[VAL_3]][%[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true]} 
: memref<5x4x6xi8, strided<[24, 6, 1], offset: ?>>, vector<4xi8>
 // CHECK:   %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : 
vector<4xi8> to vector<1x1x2x2xi8>
 // CHECK:   return %[[VAL_5]] : vector<1x1x2x2xi8>
 
-// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_contiguous(
+// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_contiguous_unit_dims(
+//   CHECK-128B:   memref.collapse_shape
+
+// -
+
+// The shape of the memref and the vector don't match, but the vector is a
+// contiguous subset of the memref, so "flattenable"
+
+func.func @transfer_read_dims_mismatch_contiguous_non_unit_dims(
+%mem : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> 
vector<2x3x2xi8> {
+
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0 : i8
+  %res = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst :
+memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<2x3x2xi8>
+  return %res : vector<2x3x2xi8>
+}
+
+// CHECK-LABEL: func.func 
@transfer_read_dims_mismatch_contiguous_non_unit_dims(
+// CHECK-SAME:%[[MEM:.+]]: memref<5x4x3x2xi8, {{.+}}>) -> vector<2x3x2xi8> 
{
+// CHECK: %[[C0_I8:.+]] = arith.constant 0 : i8
+// CHECK: %[[C0:.+]] = arith.constant 0 : index
+// CHECK: %[[COLLAPSED_MEM:.+]] = memref.collapse_shape %[[MEM]]
+// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]]
+// CHECK-SAME:  : memref<5x4x3x2xi8, {{.+}}> into memref<5x24xi8, 
{{.+}}>
+// CHECK: %[[VEC_1D:.+]] = vector.transfer_read 
%[[COLLAPSED_MEM]][%[[C0]], %[[C0]]], %[[C0_I8]] {in_bounds = [true]}
+// CHECK-SAME:  : memref<5x24xi8, strided<[24, 1], offset: ?>>, 
vector<12xi8>
+// CHECK: %[[VEC:.+]] = vector.shape_cast %[[VEC_1D]] : vector<12xi8> 
to vector<2x3x2xi8>
+// CHECK: return %[[VEC]] : vector<2x3x2xi8>

momchil-velikov wrote:

I don't understand the rationale behind having these in a particular order.

https://github.com/llvm/llvm-project/pull/142422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits


@@ -2628,6 +2630,87 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0) && PtrVT == IntVT)
+return N1;

ritter-x2a wrote:

I've applied the suggested change for now.

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Teresa Johnson via llvm-branch-commits

https://github.com/teresajohnson commented:

This needs a caveat somewhere (either in printed usage message or in a comment) 
that this won't work for local linkage symbols (I suppose the user could give 
the "file:" prefix but that won't work if -funique-internal-linkage-names was 
specified etc). Can you also add a test?

https://github.com/llvm/llvm-project/pull/143992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns (PR #143881)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143881

>From 46090a8031fde937a76268ce7adbbdc6f42911ad Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 07:44:37 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns

This patch mirrors similar patterns for ISD::ADD. The main difference is
that ISD::ADD is commutative, so that a pattern definition for, e.g.,
(add (mul x, y), z), automatically also handles (add z, (mul x, y)).
ISD::PTRADD is not commutative, so we would need to handle these cases
explicitly. This patch only implements (ptradd z, (op x, y)) patterns,
where the nested operation (shift or multiply) is the offset of the
ptradd (i.e., the right operand), since base pointers that are the
result of a shift or multiply seem less likely.

For SWDEV-516125.
---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td| 36 +++-
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 41 ++
 llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll   | 42 +++
 3 files changed, 52 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a005e0245b8ff..8054e75782539 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -484,12 +484,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], 
True16Predicate = NotHasTrue
   defm: Ternary_i16_Pats_gfx9;
 } // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = 
NotHasTrue16BitInsts
 
-class ThreeOpFragSDAG : PatFrag<
+class ThreeOpFragSDAG : PatFrag<
   (ops node:$x, node:$y, node:$z),
   // When the inner operation is used multiple times, selecting 3-op
   // instructions may still be beneficial -- if the other users can be
   // combined similarly. Let's be conservative for now.
-  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z),
+  !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp node:$x, node:$y)),
+  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z)),
   [{
 // Only use VALU ops when the result is divergent.
 if (!N->isDivergent())
@@ -516,7 +517,10 @@ class ThreeOpFragSDAG : PatFrag<
   let PredicateCodeUsesOperands = 1;
 }
 
-class ThreeOpFrag : 
ThreeOpFragSDAG {
+// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and
+// matches (op2 z, (op1, x, y)) if op1IsRight = 1.
+class ThreeOpFrag : ThreeOpFragSDAG {
   // The divergence predicate is irrelevant in GlobalISel, as we have
   // proper register bank checks. We just need to verify the constant
   // bus restriction when all the sources are considered.
@@ -806,12 +810,19 @@ def : GCNPat<
  (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1),
  (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
 
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX940Plus in {
 def : GCNPat<
   (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2),
   (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
 >;
 
+def : GCNPat <
+  // (ptradd z, (shl x, y)) -> ((x << y) + z)
+  (ThreeOpFrag i64:$src0, i32:$src1, 
i64:$src2),
+  (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
+>;
+} // End SubtargetPredicate = isGFX940Plus
+
 def : VOPBinOpClampPat;
 def : VOPBinOpClampPat;
 
@@ -880,19 +891,24 @@ multiclass IMAD32_Pats  {
 
 // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a 
normal mul.
 // We need to separate this because otherwise OtherPredicates would be 
overriden.
-class IMAD32_Mul24_Pat: GCNPat <
-(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
-(inst $src0, $src1, $src2, 0 /* clamp */)
->;
+class IMAD32_Mul24_Pats_Impl : GCNPat <
+!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, 
i32:$src1,
+(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), 
i64:$src2))),
+(inst $src0, $src1, $src2, 0 /* clamp */)>;
+
+multiclass IMAD32_Mul24_Pats {
+  def : IMAD32_Mul24_Pats_Impl;
+  def : IMAD32_Mul24_Pats_Impl;
+}
 
 // exclude pre-GFX9 where it was slow
 let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus 
in {
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in 
{
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 
 def VOP3_PERMLANE_Profile : VOP3_Profile, 
VOP3_OPSEL> {
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d48bfe0bb7f21..34bb98550de04 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -266,18 +266,11 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) 
%p) {
 
 ; Use non-zero shift amounts in v_lshl_add_u64.
 define ptr @select_v_lshl_add_u64(ptr %base, 

[llvm-branch-commits] [libcxx] [libc++][C++03] Remove XFAILs from the non-frozen libc++-specific tests (PR #144101)

2025-06-13 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: Python code formatter, darker found issues in your code. :warning:



You can test this locally with the following command:


``bash
darker --check --diff -r HEAD~1...HEAD 
libcxx/test/libcxx/clang_modules_include.gen.py 
libcxx/test/libcxx/clang_tidy.gen.py 
libcxx/test/libcxx/header_inclusions.gen.py 
libcxx/test/libcxx/system_reserved_names.gen.py 
libcxx/test/libcxx/transitive_includes.gen.py
``





View the diff from darker here.


``diff
--- clang_tidy.gen.py   2025-06-13 15:49:01.00 +
+++ clang_tidy.gen.py   2025-06-13 15:53:41.838897 +
@@ -17,11 +17,12 @@
 import sys
 sys.path.append(sys.argv[1])
 from libcxx.header_information import lit_header_restrictions, 
lit_header_undeprecations, public_headers
 
 for header in public_headers:
-  print(f"""\
+print(
+f"""\
 //--- {header}.sh.cpp
 
 // REQUIRES: has-clang-tidy
 
 // The GCC compiler flags are not always compatible with clang-tidy.
@@ -32,6 +33,7 @@
 
 // TODO: run clang-tidy with modules enabled once they are supported
 // RUN: %{{clang-tidy}} %s --warnings-as-errors=* -header-filter=.* 
--config-file=%{{libcxx-dir}}/.clang-tidy 
--load=%{{test-tools-dir}}/clang_tidy_checks/libcxx-tidy.plugin -- 
-Wweak-vtables %{{compile_flags}} -fno-modules
 
 #include <{header}>
-""")
+"""
+)

``




https://github.com/llvm/llvm-project/pull/144101
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Peter Collingbourne via llvm-branch-commits

https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/143992

>From f11d7d544cc61dce582de538608bfd512147f90a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne 
Date: Thu, 12 Jun 2025 16:06:14 -0700
Subject: [PATCH 1/2] Upload correct patch

Created using spr 1.3.6-beta.1
---
 llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp 
b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 5f5c954c6a57d..d35868ffafe1e 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -610,7 +610,9 @@ int main(int argc, char **argv) {
 return dumpSymtab(argc - 1, argv + 1);
   if (Subcommand == "run")
 return run(argc - 1, argv + 1);
-  if (Subcommand == "print-guid" && argc > 2)
-outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]);
+  if (Subcommand == "print-guid" && argc > 2) {
+outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]) << '\n';
+return 0;
+  }
   return usage();
 }

>From c7cb16abb3c30e54a12ec1b9ce325d49cf37d2bc Mon Sep 17 00:00:00 2001
From: Peter Collingbourne 
Date: Fri, 13 Jun 2025 14:34:28 -0700
Subject: [PATCH 2/2] Add comment

Created using spr 1.3.6-beta.1
---
 llvm/tools/llvm-lto2/llvm-lto2.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp 
b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d35868ffafe1e..fbde6a596 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -611,6 +611,8 @@ int main(int argc, char **argv) {
   if (Subcommand == "run")
 return run(argc - 1, argv + 1);
   if (Subcommand == "print-guid" && argc > 2) {
+// Note the name of the function we're calling: this won't return the right
+// answer for internal linkage symbols.
 outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]) << '\n';
 return 0;
   }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Peter Collingbourne via llvm-branch-commits

pcc wrote:

> At least a comment in the code would be good. A variety of people end up 
> using these tools for tests, and I could see someone getting confused as to 
> why the guid doesn't match what's e.g. in the ThinLTO index. For that 
> understanding you'd have to read more than just what this source file is 
> calling.

I added a comment.

https://github.com/llvm/llvm-project/pull/143992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle assorted AVX permutations (PR #143462)

2025-06-13 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer updated 
https://github.com/llvm/llvm-project/pull/143462

>From e7f58f76d921bdf3e7f4a585a25a2612d66fee33 Mon Sep 17 00:00:00 2001
From: Florian Mayer 
Date: Fri, 13 Jun 2025 15:14:20 -0700
Subject: [PATCH] assert

Created using spr 1.3.4
---
 .../Instrumentation/MemorySanitizer.cpp   |  8 ++
 .../X86/avx512vl-intrinsics.ll| 73 ---
 2 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 2ede88d0f0b37..fb55bd7bfe567 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4174,6 +4174,14 @@ struct MemorySanitizerVisitor : public 
InstVisitor {
   // Instrument AVX permutation intrinsic.
   // We apply the same permutation (argument index 1) to the shadow.
   void handleAVXPermutation(IntrinsicInst &I) {
+assert(I.arg_size() == 2);
+assert(isa(I.getArgOperand(0)->getType()));
+assert(isa(I.getArgOperand(1)->getType()));
+[[maybe_unused]] auto ArgVectorSize =
+cast(I.getArgOperand(0)->getType())->getNumElements();
+assert(cast(I.getArgOperand(1)->getType())
+   ->getNumElements() == ArgVectorSize);
+assert(I.getType() == I.getArgOperand(0)->getType());
 IRBuilder<> IRB(&I);
 Value *Shadow = getShadow(&I, 0);
 insertShadowCheck(I.getArgOperand(1), &I);
diff --git 
a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll 
b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index 1a067ec67d218..40b5e9338e45e 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -8633,18 +8633,18 @@ define <4 x 
double>@test_int_x86_avx512_permvar_df_256(<4 x double> %x0, <4 x i6
 ; CHECK-NEXT:[[TMP5:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 
ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:call void @llvm.donothing()
-; CHECK-NEXT:[[TMP3:%.*]] = bitcast <4 x i64> [[TMP5]] to i256
-; CHECK-NEXT:[[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
+; CHECK-NEXT:[[TMP3:%.*]] = bitcast <4 x i64> [[TMP5]] to <4 x double>
+; CHECK-NEXT:[[TMP6:%.*]] = call <4 x double> 
@llvm.x86.avx512.permvar.df.256(<4 x double> [[TMP3]], <4 x i64> [[X1]])
+; CHECK-NEXT:[[TMP7:%.*]] = bitcast <4 x double> [[TMP6]] to <4 x i64>
 ; CHECK-NEXT:[[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
 ; CHECK-NEXT:[[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT:[[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof 
[[PROF1]]
-; CHECK:   [[BB5]]:
+; CHECK-NEXT:br i1 [[_MSCMP1]], label %[[BB7:.*]], label %[[BB8:.*]], 
!prof [[PROF1]]
+; CHECK:   [[BB7]]:
 ; CHECK-NEXT:call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:unreachable
-; CHECK:   [[BB6]]:
+; CHECK:   [[BB8]]:
 ; CHECK-NEXT:[[TMP1:%.*]] = call <4 x double> 
@llvm.x86.avx512.permvar.df.256(<4 x double> [[X0]], <4 x i64> [[X1]])
-; CHECK-NEXT:store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, 
align 8
+; CHECK-NEXT:store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:ret <4 x double> [[TMP1]]
 ;
   %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 
x i64> %x1)
@@ -8660,26 +8660,26 @@ define <4 x 
double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4
 ; CHECK-NEXT:[[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint 
(ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
 ; CHECK-NEXT:[[TMP13:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 
ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 ; CHECK-NEXT:call void @llvm.donothing()
-; CHECK-NEXT:[[TMP14:%.*]] = bitcast <4 x i64> [[TMP8]] to i256
-; CHECK-NEXT:[[_MSCMP:%.*]] = icmp ne i256 [[TMP14]], 0
+; CHECK-NEXT:[[TMP14:%.*]] = bitcast <4 x i64> [[TMP8]] to <4 x double>
+; CHECK-NEXT:[[TMP16:%.*]] = call <4 x double> 
@llvm.x86.avx512.permvar.df.256(<4 x double> [[TMP14]], <4 x i64> [[X1]])
+; CHECK-NEXT:[[TMP18:%.*]] = bitcast <4 x double> [[TMP16]] to <4 x i64>
 ; CHECK-NEXT:[[TMP15:%.*]] = bitcast <4 x i64> [[TMP11]] to i256
 ; CHECK-NEXT:[[_MSCMP1:%.*]] = icmp ne i256 [[TMP15]], 0
-; CHECK-NEXT:[[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:br i1 [[_MSOR]], label %[[BB7:.*]], label %[[BB8:.*]], !prof 
[[PROF1]]
-; CHECK:   [[BB7]]:
+; CHECK-NEXT:br i1 [[_MSCMP1]], label %[[BB9:.*]], label %[[BB10:.*]], 
!prof [[PROF1]]
+; CHECK:   [[BB9]]:
 ; CHECK-NEXT:call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:unreachable
-; CHECK:  

[llvm-branch-commits] [MSAN] handle assorted AVX permutations (PR #143462)

2025-06-13 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer updated 
https://github.com/llvm/llvm-project/pull/143462


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [MSAN] handle assorted AVX permutations (PR #143462)

2025-06-13 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer updated 
https://github.com/llvm/llvm-project/pull/143462


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle assorted AVX permutations (PR #143462)

2025-06-13 Thread Florian Mayer via llvm-branch-commits

fmayer wrote:

ready for review

https://github.com/llvm/llvm-project/pull/143462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle assorted AVX permutations (PR #143462)

2025-06-13 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/143462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle AVX vpermi2var (PR #143463)

2025-06-13 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/143463
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle AVX vpermi2var (PR #143463)

2025-06-13 Thread Thurston Dang via llvm-branch-commits


@@ -4191,6 +4191,15 @@ struct MemorySanitizerVisitor : public 
InstVisitor {
   // We apply the same permutation (argument index 1) to the shadows.
   void handleAVXVpermil2var(IntrinsicInst &I) {
 assert(I.arg_size() == 3);
+assert(isa(I.getArgOperand(0)->getType()));
+assert(isa(I.getArgOperand(1)->getType()));
+assert(isa(I.getArgOperand(2)->getType()));
+[[maybe_unused]] auto ArgVectorSize =
+cast(I.getArgOperand(0)->getType())->getNumElements();
+assert(cast(I.getArgOperand(1)->getType())
+   ->getNumElements() == ArgVectorSize);
+assert(cast(I.getArgOperand(2)->getType())
+   ->getNumElements() == ArgVectorSize);

thurstond wrote:

Some of the assertions are redundant (e.g., if operand 0 is a vector, and 
operand 0's type is the same as operand 2's type, then operand 2 must be a 
vector with the same number of elements as operand 0), but that's fine.

https://github.com/llvm/llvm-project/pull/143463
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] 9a2e40b - Revert "Turn LIBC_COPT_STRING_UNSAFE_WIDE_READ on by default (#144163)"

2025-06-13 Thread via llvm-branch-commits

Author: Amy Huang
Date: 2025-06-13T15:29:51-07:00
New Revision: 9a2e40b9eea4297631c2462a345d1cbc8d01f373

URL: 
https://github.com/llvm/llvm-project/commit/9a2e40b9eea4297631c2462a345d1cbc8d01f373
DIFF: 
https://github.com/llvm/llvm-project/commit/9a2e40b9eea4297631c2462a345d1cbc8d01f373.diff

LOG: Revert "Turn LIBC_COPT_STRING_UNSAFE_WIDE_READ on by default (#144163)"

This reverts commit a591bd222b2e0356b8132b515422fe480b87322b.

Added: 


Modified: 
libc/config/config.json

Removed: 




diff  --git a/libc/config/config.json b/libc/config/config.json
index 0354b16997cdd..d53b2936edb07 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -59,7 +59,7 @@
   },
   "string": {
 "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
-  "value": true,
+  "value": false,
   "doc": "Read more than a byte at a time to perform byte-string 
operations like strlen."
 },
 "LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CodeGen][NFC] Fix quadratic c-t for large jump tables (PR #144108)

2025-06-13 Thread Arthur Eubanks via llvm-branch-commits

https://github.com/aeubanks approved this pull request.

makes sense. can you put some compile time numbers in the description before 
and after this patch?

https://github.com/llvm/llvm-project/pull/144108
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MSAN] handle AVX vpermi2var (PR #143463)

2025-06-13 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer closed 
https://github.com/llvm/llvm-project/pull/143463
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] WebAssembly: Stop directly using RuntimeLibcalls.def (PR #143054)

2025-06-13 Thread Matt Arsenault via llvm-branch-commits


@@ -528,23 +528,20 @@ RuntimeLibcallSignatureTable 
&getRuntimeLibcallSignatures() {
 // constructor for use with a static variable
 struct StaticLibcallNameMap {
   StringMap Map;
-  StaticLibcallNameMap() {
-static const std::pair NameLibcalls[] = {
-#define HANDLE_LIBCALL(code, name) {(const char *)name, RTLIB::code},
-#include "llvm/IR/RuntimeLibcalls.def"
-#undef HANDLE_LIBCALL
-};
-for (const auto &NameLibcall : NameLibcalls) {
-  if (NameLibcall.first != nullptr &&
-  getRuntimeLibcallSignatures().Table[NameLibcall.second] !=
-  unsupported) {
-assert(!Map.contains(NameLibcall.first) &&
+  StaticLibcallNameMap(const Triple &TT) {
+// FIXME: This is broken if there are ever different triples compiled with
+// different libcalls.
+RTLIB::RuntimeLibcallsInfo RTCI(TT);
+for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
+  RTLIB::Libcall LC = static_cast(I);
+  const char *NameLibcall = RTCI.getLibcallName(LC);
+  if (NameLibcall != nullptr &&
+  getRuntimeLibcallSignatures().Table[LC] != unsupported) {
+assert(!Map.contains(NameLibcall) &&
"duplicate libcall names in name map");
-Map[NameLibcall.first] = NameLibcall.second;
+Map[NameLibcall] = LC;
   }
 }
-
-Map["emscripten_return_address"] = RTLIB::RETURN_ADDRESS;

arsenm wrote:

RuntimeLibcallsInfo directly sets this, this was already moved in the parent PR 

https://github.com/llvm/llvm-project/pull/143054
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Teresa Johnson via llvm-branch-commits

teresajohnson wrote:

> > This needs a caveat somewhere (either in printed usage message or in a 
> > comment) that this won't work for local linkage symbols (I suppose the user 
> > could give the "file:" prefix but that won't work if 
> > -funique-internal-linkage-names was specified etc).
> 
> I'm not sure that is worth it. The intent is that users of these development 
> tools will refer to the source code. And if you read the source code you'll 
> see the function name `getGUIDAssumingExternalLinkage` which tells you what 
> you need to know.

At least a comment in the code would be good. A variety of people end up using 
these tools for tests, and I could see someone getting confused as to why the 
guid doesn't match what's e.g. in the ThinLTO index. For that understanding 
you'd have to read more than just what this source file is calling.

https://github.com/llvm/llvm-project/pull/143992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Teresa Johnson via llvm-branch-commits

https://github.com/teresajohnson approved this pull request.

lgtm otherwise

https://github.com/llvm/llvm-project/pull/143992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Peter Collingbourne via llvm-branch-commits

pcc wrote:

> This needs a caveat somewhere (either in printed usage message or in a 
> comment) that this won't work for local linkage symbols (I suppose the user 
> could give the "file:" prefix but that won't work if 
> -funique-internal-linkage-names was specified etc).

I'm not sure that is worth it. The intent is that users of these development 
tools will refer to the source code. And if you read the source code you'll see 
the function name `getGUIDAssumingExternalLinkage` which tells you what you 
need to know.

> Can you also add a test?

Done.

https://github.com/llvm/llvm-project/pull/143992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Peter Collingbourne via llvm-branch-commits

https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/143992

>From f11d7d544cc61dce582de538608bfd512147f90a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne 
Date: Thu, 12 Jun 2025 16:06:14 -0700
Subject: [PATCH] Upload correct patch

Created using spr 1.3.6-beta.1
---
 llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp 
b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 5f5c954c6a57d..d35868ffafe1e 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -610,7 +610,9 @@ int main(int argc, char **argv) {
 return dumpSymtab(argc - 1, argv + 1);
   if (Subcommand == "run")
 return run(argc - 1, argv + 1);
-  if (Subcommand == "print-guid" && argc > 2)
-outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]);
+  if (Subcommand == "print-guid" && argc > 2) {
+outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]) << '\n';
+return 0;
+  }
   return usage();
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-lto2: Add print-guid subcommand. (PR #143992)

2025-06-13 Thread Peter Collingbourne via llvm-branch-commits

https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/143992

>From f11d7d544cc61dce582de538608bfd512147f90a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne 
Date: Thu, 12 Jun 2025 16:06:14 -0700
Subject: [PATCH] Upload correct patch

Created using spr 1.3.6-beta.1
---
 llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp 
b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 5f5c954c6a57d..d35868ffafe1e 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -610,7 +610,9 @@ int main(int argc, char **argv) {
 return dumpSymtab(argc - 1, argv + 1);
   if (Subcommand == "run")
 return run(argc - 1, argv + 1);
-  if (Subcommand == "print-guid" && argc > 2)
-outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]);
+  if (Subcommand == "print-guid" && argc > 2) {
+outs() << GlobalValue::getGUIDAssumingExternalLinkage(argv[2]) << '\n';
+return 0;
+  }
   return usage();
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Test all projects when CI scripts change (PR #144034)

2025-06-13 Thread Aiden Grossman via llvm-branch-commits

https://github.com/boomanaiden154 created 
https://github.com/llvm/llvm-project/pull/144034

This patch resolves a fixme in the compute_projects script to actually
test all the projects we can when touching something in the .ci
directory. This ensures we test things like compiler-rt before landing
changes.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns (PR #143881)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143881

>From f93590bac710750f993c86005c217b843cc5a863 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 07:44:37 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns

This patch mirrors similar patterns for ISD::ADD. The main difference is
that ISD::ADD is commutative, so that a pattern definition for, e.g.,
(add (mul x, y), z), automatically also handles (add z, (mul x, y)).
ISD::PTRADD is not commutative, so we would need to handle these cases
explicitly. This patch only implements (ptradd z, (op x, y)) patterns,
where the nested operation (shift or multiply) is the offset of the
ptradd (i.e., the right operand), since base pointers that are the
result of a shift or multiply seem less likely.

For SWDEV-516125.
---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td| 36 +++-
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 41 ++
 llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll   | 42 +++
 3 files changed, 52 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a005e0245b8ff..8054e75782539 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -484,12 +484,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], 
True16Predicate = NotHasTrue
   defm: Ternary_i16_Pats_gfx9;
 } // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = 
NotHasTrue16BitInsts
 
-class ThreeOpFragSDAG : PatFrag<
+class ThreeOpFragSDAG : PatFrag<
   (ops node:$x, node:$y, node:$z),
   // When the inner operation is used multiple times, selecting 3-op
   // instructions may still be beneficial -- if the other users can be
   // combined similarly. Let's be conservative for now.
-  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z),
+  !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp node:$x, node:$y)),
+  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z)),
   [{
 // Only use VALU ops when the result is divergent.
 if (!N->isDivergent())
@@ -516,7 +517,10 @@ class ThreeOpFragSDAG : PatFrag<
   let PredicateCodeUsesOperands = 1;
 }
 
-class ThreeOpFrag : 
ThreeOpFragSDAG {
+// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and
+// matches (op2 z, (op1, x, y)) if op1IsRight = 1.
+class ThreeOpFrag : ThreeOpFragSDAG {
   // The divergence predicate is irrelevant in GlobalISel, as we have
   // proper register bank checks. We just need to verify the constant
   // bus restriction when all the sources are considered.
@@ -806,12 +810,19 @@ def : GCNPat<
  (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1),
  (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
 
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX940Plus in {
 def : GCNPat<
   (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2),
   (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
 >;
 
+def : GCNPat <
+  // (ptradd z, (shl x, y)) -> ((x << y) + z)
+  (ThreeOpFrag i64:$src0, i32:$src1, 
i64:$src2),
+  (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
+>;
+} // End SubtargetPredicate = isGFX940Plus
+
 def : VOPBinOpClampPat;
 def : VOPBinOpClampPat;
 
@@ -880,19 +891,24 @@ multiclass IMAD32_Pats  {
 
 // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a 
normal mul.
 // We need to separate this because otherwise OtherPredicates would be 
overriden.
-class IMAD32_Mul24_Pat: GCNPat <
-(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
-(inst $src0, $src1, $src2, 0 /* clamp */)
->;
+class IMAD32_Mul24_Pats_Impl : GCNPat <
+!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, 
i32:$src1,
+(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), 
i64:$src2))),
+(inst $src0, $src1, $src2, 0 /* clamp */)>;
+
+multiclass IMAD32_Mul24_Pats {
+  def : IMAD32_Mul24_Pats_Impl;
+  def : IMAD32_Mul24_Pats_Impl;
+}
 
 // exclude pre-GFX9 where it was slow
 let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus 
in {
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in 
{
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 
 def VOP3_PERMLANE_Profile : VOP3_Profile, 
VOP3_OPSEL> {
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d48bfe0bb7f21..34bb98550de04 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -266,18 +266,11 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) 
%p) {
 
 ; Use non-zero shift amounts in v_lshl_add_u64.
 define ptr @select_v_lshl_add_u64(ptr %base, 

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in SelectionDAGAddressAnalysis (PR #142778)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142778

>From c959592b27205064e3b6f53c7330032bce84f857 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:48:02 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in
 SelectionDAGAddressAnalysis

This is used in a bunch of memory-related transforms.

For SWDEV-516125.
---
 .../SelectionDAGAddressAnalysis.cpp   |  6 ++--
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 ++-
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..da92aaa860b2b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -231,6 +231,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
   break;
 case ISD::ADD:
+case ISD::PTRADD:
   if (auto *C = dyn_cast(Base->getOperand(1))) {
 Offset += C->getSExtValue();
 Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
@@ -259,7 +260,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 break;
   }
 
-  if (Base->getOpcode() == ISD::ADD) {
+  if (Base->isAnyAdd()) {
 // TODO: The following code appears to be needless as it just
 //   bails on some Ptrs early, reducing the cases where we
 //   find equivalence. We should be able to remove this.
@@ -282,8 +283,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
 
 // Check if Index Offset pattern
-if (Index->getOpcode() != ISD::ADD ||
-!isa(Index->getOperand(1)))
+if (!Index->isAnyAdd() || !isa(Index->getOperand(1)))
   return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
 
 Offset += cast(Index->getOperand(1))->getSExtValue();
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d3242905ada64..2e76033a480f4 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -130,26 +130,14 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
 ; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
 ; SelectionDAGAddressAnalysis.
 define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
-; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_PTRADD:   ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_LEGACY:   ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+; GFX942-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942:   ; %bb.0: ; %entry
+; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:s_setpc_b64 s[30:31]
 entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143673

>From a3d204e9a8aae5de008a83904215d44d8d0c3380 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines

This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.

For SWDEV-516125.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 151 ++
 3 files changed, 167 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, 
EVT VT,
 return SDValue();
   int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
-  case ISD::ADD: break;
+  case ISD::ADD:
+  case ISD::PTRADD:
+break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
 #include 
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "si-lower"
 
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, 
const SDLoc &SL,
 // instead of a tree.
 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::ADD);
+  assert(N->isAnyAdd());
 
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 for (SDNode *User : LHS->users()) {
   // There is a use that does not feed into addition, so the multiply can't
   // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
-  if (User->getOpcode() != ISD::ADD)
+  if (!User->isAnyAdd())
 return SDValue();
 
   // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@ 
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
 
 SDValue Hi = getHiHalf64(LHS, DAG);
 SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+  Opcode = ISD::ADD;
 SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, 
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
 
 SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue 
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
-  SDNodeFlags Flags;
-  // If both additions in the original we

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From c9cbbce907dc77f1580019bb78ae3c175f99af37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add tests for ISD::PTRADD DAG combines (PR #142738)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142738

>From cdea7dfe63d04d4b2879d7f73408753ff70e20dc Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 3 Jun 2025 09:49:19 -0400
Subject: [PATCH 1/2] [AMDGPU][SDAG] Add tests for ISD::PTRADD DAG combines

Pre-committing tests to show improvements in a follow-up PR with the
combines.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 207 ++
 1 file changed, 207 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
new file mode 100644
index 0..0241be9197e1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 
-disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=1 < %s | 
FileCheck --check-prefixes=GFX942,GFX942_PTRADD %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 
-disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=0 < %s | 
FileCheck --check-prefixes=GFX942,GFX942_LEGACY %s
+
+; Tests for DAG combines and folds related to the ISD::PTRADD SelectionDAG
+; opcode. The RUN lines uses -disable-separate-const-offset-from-gep to disable
+; similar transformations in that pass.
+
+; Tests reassociation (ptradd N0:(ptradd p, c1), z) where N0 has only one use.
+define i64 @global_load_ZTwoUses(ptr addrspace(1) %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: global_load_ZTwoUses:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, 24
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[0:1], v[0:1], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: global_load_ZTwoUses:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_LEGACY-NEXT:global_load_dwordx2 v[0:1], v[0:1], off offset:24
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %base, i64 24
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %voffset
+  %l = load i64, ptr addrspace(1) %gep1, align 8
+  %r = add i64 %l, %voffset
+  ret i64 %r
+}
+
+define i64 @global_load_gep_add_reassoc(ptr addrspace(1) %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: global_load_gep_add_reassoc:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[2:3], 0, 24
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[0:1], v[0:1], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: global_load_gep_add_reassoc:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_load_dwordx2 v[0:1], v[0:1], off offset:24
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %add0 = add nuw nsw i64 %voffset, 24
+  %gep0 = getelementptr nuw inbounds i8, ptr addrspace(1) %base, i64 %add0
+  %l = load i64, ptr addrspace(1) %gep0, align 8
+  ret i64 %l
+}
+
+; Tests reassociation (ptradd (ptradd p, c1), c2) with two constants. These
+; would be folded away in most cases, but the index computation introduced by
+; the legalization of wide vector stores can for example introduce them.
+define amdgpu_kernel void @store_v16i32(ptr addrspace(1) %out, <16 x i32> %a) {
+; GFX942_PTRADD-LABEL: store_v16i32:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:s_load_dwordx16 s[8:23], s[4:5], 0x40
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v4, 0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_add_u32 s2, s0, 32
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v0, s20
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, s21
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v2, s22
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v3, s23
+; GFX942_PTRADD-NEXT:s_addc_u32 s3, s1, 0
+; GFX942_PTRADD-NEXT:global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
+; GFX942_PTRADD-NEXT:s_nop 1
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v0, s16
+; GFX942_PTRADD-NEXT:v_mo

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add tests for ISD::PTRADD DAG combines (PR #142738)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142738

>From cdea7dfe63d04d4b2879d7f73408753ff70e20dc Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 3 Jun 2025 09:49:19 -0400
Subject: [PATCH 1/2] [AMDGPU][SDAG] Add tests for ISD::PTRADD DAG combines

Pre-committing tests to show improvements in a follow-up PR with the
combines.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 207 ++
 1 file changed, 207 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
new file mode 100644
index 0..0241be9197e1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 
-disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=1 < %s | 
FileCheck --check-prefixes=GFX942,GFX942_PTRADD %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 
-disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=0 < %s | 
FileCheck --check-prefixes=GFX942,GFX942_LEGACY %s
+
+; Tests for DAG combines and folds related to the ISD::PTRADD SelectionDAG
+; opcode. The RUN lines uses -disable-separate-const-offset-from-gep to disable
+; similar transformations in that pass.
+
+; Tests reassociation (ptradd N0:(ptradd p, c1), z) where N0 has only one use.
+define i64 @global_load_ZTwoUses(ptr addrspace(1) %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: global_load_ZTwoUses:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, 24
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[0:1], v[0:1], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: global_load_ZTwoUses:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_LEGACY-NEXT:global_load_dwordx2 v[0:1], v[0:1], off offset:24
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %base, i64 24
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 %voffset
+  %l = load i64, ptr addrspace(1) %gep1, align 8
+  %r = add i64 %l, %voffset
+  ret i64 %r
+}
+
+define i64 @global_load_gep_add_reassoc(ptr addrspace(1) %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: global_load_gep_add_reassoc:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[2:3], 0, 24
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[0:1], v[0:1], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: global_load_gep_add_reassoc:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_load_dwordx2 v[0:1], v[0:1], off offset:24
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %add0 = add nuw nsw i64 %voffset, 24
+  %gep0 = getelementptr nuw inbounds i8, ptr addrspace(1) %base, i64 %add0
+  %l = load i64, ptr addrspace(1) %gep0, align 8
+  ret i64 %l
+}
+
+; Tests reassociation (ptradd (ptradd p, c1), c2) with two constants. These
+; would be folded away in most cases, but the index computation introduced by
+; the legalization of wide vector stores can for example introduce them.
+define amdgpu_kernel void @store_v16i32(ptr addrspace(1) %out, <16 x i32> %a) {
+; GFX942_PTRADD-LABEL: store_v16i32:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:s_load_dwordx16 s[8:23], s[4:5], 0x40
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v4, 0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_add_u32 s2, s0, 32
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v0, s20
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, s21
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v2, s22
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v3, s23
+; GFX942_PTRADD-NEXT:s_addc_u32 s3, s1, 0
+; GFX942_PTRADD-NEXT:global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
+; GFX942_PTRADD-NEXT:s_nop 1
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v0, s16
+; GFX942_PTRADD-NEXT:v_mo

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142739

>From 743ecdf0cf69d300859d6817fa4a9c48218aa9e5 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 03:32:32 -0400
Subject: [PATCH 1/5] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines

This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one
that is closely connected.

The generic DAG combine is based on a part of PR #105669 by @rgwott, which was
adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello
LLVM tree. I added some parts and removed several disjuncts from the
reassociation condition:
- `isNullConstant(X)`, since there are address spaces where 0 is a perfectly
  normal value that shouldn't be treated specially,
- `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since
  they cause regressions in AMDGPU.

For SWDEV-516125.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  92 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  49 +
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |   1 +
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 194 ++
 4 files changed, 201 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..505cb264ae948 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -419,6 +419,7 @@ namespace {
 SDValue visitADDLike(SDNode *N);
 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
 SDNode *LocReference);
+SDValue visitPTRADD(SDNode *N);
 SDValue visitSUB(SDNode *N);
 SDValue visitADDSAT(SDNode *N);
 SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool 
DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
 return false;
 
   auto *C2 = dyn_cast(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:return visitTokenFactor(N);
   case ISD::MERGE_VALUES:   return visitMERGE_VALUES(N);
   case ISD::ADD:return visitADD(N);
+  case ISD::PTRADD: return visitPTRADD(N);
   case ISD::SUB:return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:return visitADDSAT(N);
@@ -2628,6 +2630,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd undef, y) -> undef
+  if (N0.isUndef())
+return N0;
+
+  // fold (ptradd x, undef) -> undef
+  if (N1.isUndef())
+return DAG.getUNDEF(PtrVT);
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0))
+return N1;
+
+  if (N0.getOpcode() == ISD::PTRADD &&
+  !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) 
{
+SDValue X = N0.getOperand(0);
+SDValue Y = N0.getOperand(1);
+SDValue Z = N1;
+bool N0OneUse = N0.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+//   * y is a constant and (ptradd x, y) has one use; or
+//   * y and z are both constants.
+if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N0->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;
+  SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+  AddToWorklist(Add.getNode());
+  return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+}
+
+// TODO: There is another possible fold here that was proven useful.
+// It would be this:
+//
+// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+//   * (ptradd x, y) has one use; and
+//   * y is a constant; and
+//   * z is not a constant.
+//
+// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+// opportunity to select more complex instructions such as SUBPT and
+// MSUBPT. H

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142739

>From 743ecdf0cf69d300859d6817fa4a9c48218aa9e5 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 03:32:32 -0400
Subject: [PATCH 1/5] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines

This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one
that is closely connected.

The generic DAG combine is based on a part of PR #105669 by @rgwott, which was
adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello
LLVM tree. I added some parts and removed several disjuncts from the
reassociation condition:
- `isNullConstant(X)`, since there are address spaces where 0 is a perfectly
  normal value that shouldn't be treated specially,
- `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since
  they cause regressions in AMDGPU.

For SWDEV-516125.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  92 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  49 +
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |   1 +
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 194 ++
 4 files changed, 201 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..505cb264ae948 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -419,6 +419,7 @@ namespace {
 SDValue visitADDLike(SDNode *N);
 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
 SDNode *LocReference);
+SDValue visitPTRADD(SDNode *N);
 SDValue visitSUB(SDNode *N);
 SDValue visitADDSAT(SDNode *N);
 SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool 
DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
 return false;
 
   auto *C2 = dyn_cast(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:return visitTokenFactor(N);
   case ISD::MERGE_VALUES:   return visitMERGE_VALUES(N);
   case ISD::ADD:return visitADD(N);
+  case ISD::PTRADD: return visitPTRADD(N);
   case ISD::SUB:return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:return visitADDSAT(N);
@@ -2628,6 +2630,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd undef, y) -> undef
+  if (N0.isUndef())
+return N0;
+
+  // fold (ptradd x, undef) -> undef
+  if (N1.isUndef())
+return DAG.getUNDEF(PtrVT);
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0))
+return N1;
+
+  if (N0.getOpcode() == ISD::PTRADD &&
+  !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) 
{
+SDValue X = N0.getOperand(0);
+SDValue Y = N0.getOperand(1);
+SDValue Z = N1;
+bool N0OneUse = N0.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+//   * y is a constant and (ptradd x, y) has one use; or
+//   * y and z are both constants.
+if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N0->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;
+  SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+  AddToWorklist(Add.getNode());
+  return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+}
+
+// TODO: There is another possible fold here that was proven useful.
+// It would be this:
+//
+// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+//   * (ptradd x, y) has one use; and
+//   * y is a constant; and
+//   * z is not a constant.
+//
+// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+// opportunity to select more complex instructions such as SUBPT and
+// MSUBPT. H

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143880

>From 99d65b3e0a8627b581673b55505962665a3ffcb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 45 +++
 1 file changed, 45 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) 
{
   store float 1.0, ptr addrspace(1) %p1
   ret void
 }
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+  ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %a_masked = and i64 %a, u0xf
+  %b_masked = and i64 %b, u0xf
+  %mul = mul i64 %a_masked, %b_masked
+  %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+  ret ptr %gep
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns (PR #143881)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143881

>From f93590bac710750f993c86005c217b843cc5a863 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 07:44:37 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns

This patch mirrors similar patterns for ISD::ADD. The main difference is
that ISD::ADD is commutative, so that a pattern definition for, e.g.,
(add (mul x, y), z), automatically also handles (add z, (mul x, y)).
ISD::PTRADD is not commutative, so we would need to handle these cases
explicitly. This patch only implements (ptradd z, (op x, y)) patterns,
where the nested operation (shift or multiply) is the offset of the
ptradd (i.e., the right operand), since base pointers that are the
result of a shift or multiply seem less likely.

For SWDEV-516125.
---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td| 36 +++-
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 41 ++
 llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll   | 42 +++
 3 files changed, 52 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a005e0245b8ff..8054e75782539 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -484,12 +484,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], 
True16Predicate = NotHasTrue
   defm: Ternary_i16_Pats_gfx9;
 } // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = 
NotHasTrue16BitInsts
 
-class ThreeOpFragSDAG : PatFrag<
+class ThreeOpFragSDAG : PatFrag<
   (ops node:$x, node:$y, node:$z),
   // When the inner operation is used multiple times, selecting 3-op
   // instructions may still be beneficial -- if the other users can be
   // combined similarly. Let's be conservative for now.
-  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z),
+  !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp node:$x, node:$y)),
+  (op2 (HasOneUseBinOp node:$x, node:$y), node:$z)),
   [{
 // Only use VALU ops when the result is divergent.
 if (!N->isDivergent())
@@ -516,7 +517,10 @@ class ThreeOpFragSDAG : PatFrag<
   let PredicateCodeUsesOperands = 1;
 }
 
-class ThreeOpFrag : 
ThreeOpFragSDAG {
+// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and
+// matches (op2 z, (op1, x, y)) if op1IsRight = 1.
+class ThreeOpFrag : ThreeOpFragSDAG {
   // The divergence predicate is irrelevant in GlobalISel, as we have
   // proper register bank checks. We just need to verify the constant
   // bus restriction when all the sources are considered.
@@ -806,12 +810,19 @@ def : GCNPat<
  (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1),
  (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
 
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX940Plus in {
 def : GCNPat<
   (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2),
   (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
 >;
 
+def : GCNPat <
+  // (ptradd z, (shl x, y)) -> ((x << y) + z)
+  (ThreeOpFrag i64:$src0, i32:$src1, 
i64:$src2),
+  (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
+>;
+} // End SubtargetPredicate = isGFX940Plus
+
 def : VOPBinOpClampPat;
 def : VOPBinOpClampPat;
 
@@ -880,19 +891,24 @@ multiclass IMAD32_Pats  {
 
 // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a 
normal mul.
 // We need to separate this because otherwise OtherPredicates would be 
overriden.
-class IMAD32_Mul24_Pat: GCNPat <
-(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
-(inst $src0, $src1, $src2, 0 /* clamp */)
->;
+class IMAD32_Mul24_Pats_Impl : GCNPat <
+!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, 
i32:$src1,
+(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), 
i64:$src2))),
+(inst $src0, $src1, $src2, 0 /* clamp */)>;
+
+multiclass IMAD32_Mul24_Pats {
+  def : IMAD32_Mul24_Pats_Impl;
+  def : IMAD32_Mul24_Pats_Impl;
+}
 
 // exclude pre-GFX9 where it was slow
 let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus 
in {
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in 
{
   defm : IMAD32_Pats;
-  def : IMAD32_Mul24_Pat;
+  defm : IMAD32_Mul24_Pats;
 }
 
 def VOP3_PERMLANE_Profile : VOP3_Profile, 
VOP3_OPSEL> {
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d48bfe0bb7f21..34bb98550de04 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -266,18 +266,11 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) 
%p) {
 
 ; Use non-zero shift amounts in v_lshl_add_u64.
 define ptr @select_v_lshl_add_u64(ptr %base, 

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142777

>From df620d738a35bb2d52c4254a784b66431725206f Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:30:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in
 SelectionDAGAddressAnalysis

Pre-committing test to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index b78dea1684545..d3242905ada64 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -126,3 +126,31 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
   store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
   ret void
 }
+
+; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
+; SelectionDAGAddressAnalysis.
+define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
+; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
+  ret void
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in SelectionDAGAddressAnalysis (PR #142778)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142778

>From c959592b27205064e3b6f53c7330032bce84f857 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:48:02 -0400
Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in
 SelectionDAGAddressAnalysis

This is used in a bunch of memory-related transforms.

For SWDEV-516125.
---
 .../SelectionDAGAddressAnalysis.cpp   |  6 ++--
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 ++-
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..da92aaa860b2b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -231,6 +231,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
   break;
 case ISD::ADD:
+case ISD::PTRADD:
   if (auto *C = dyn_cast(Base->getOperand(1))) {
 Offset += C->getSExtValue();
 Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
@@ -259,7 +260,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 break;
   }
 
-  if (Base->getOpcode() == ISD::ADD) {
+  if (Base->isAnyAdd()) {
 // TODO: The following code appears to be needless as it just
 //   bails on some Ptrs early, reducing the cases where we
 //   find equivalence. We should be able to remove this.
@@ -282,8 +283,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
 }
 
 // Check if Index Offset pattern
-if (Index->getOpcode() != ISD::ADD ||
-!isa(Index->getOperand(1)))
+if (!Index->isAnyAdd() || !isa(Index->getOperand(1)))
   return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
 
 Offset += cast(Index->getOperand(1))->getSExtValue();
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index d3242905ada64..2e76033a480f4 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -130,26 +130,14 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
 ; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
 ; SelectionDAGAddressAnalysis.
 define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
-; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_PTRADD:   ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
-; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
-; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
-; GFX942_LEGACY:   ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
-; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+; GFX942-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942:   ; %bb.0: ; %entry
+; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942-NEXT:s_waitcnt vmcnt(0)
+; GFX942-NEXT:s_setpc_b64 s[30:31]
 entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142777

>From df620d738a35bb2d52c4254a784b66431725206f Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:30:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in
 SelectionDAGAddressAnalysis

Pre-committing test to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index b78dea1684545..d3242905ada64 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -126,3 +126,31 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
   store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
   ret void
 }
+
+; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
+; SelectionDAGAddressAnalysis.
+define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
+; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
+  ret void
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From c9cbbce907dc77f1580019bb78ae3c175f99af37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143673

>From a3d204e9a8aae5de008a83904215d44d8d0c3380 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines

This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.

For SWDEV-516125.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 151 ++
 3 files changed, 167 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, 
EVT VT,
 return SDValue();
   int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
-  case ISD::ADD: break;
+  case ISD::ADD:
+  case ISD::PTRADD:
+break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
 #include 
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "si-lower"
 
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, 
const SDLoc &SL,
 // instead of a tree.
 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::ADD);
+  assert(N->isAnyAdd());
 
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 for (SDNode *User : LHS->users()) {
   // There is a use that does not feed into addition, so the multiply can't
   // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
-  if (User->getOpcode() != ISD::ADD)
+  if (!User->isAnyAdd())
 return SDValue();
 
   // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@ 
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
 
 SDValue Hi = getHiHalf64(LHS, DAG);
 SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+  Opcode = ISD::ADD;
 SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, 
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
 
 SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue 
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
-  SDNodeFlags Flags;
-  // If both additions in the original we

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143880

>From 99d65b3e0a8627b581673b55505962665a3ffcb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 45 +++
 1 file changed, 45 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) 
{
   store float 1.0, ptr addrspace(1) %p1
   ret void
 }
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+  ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %a_masked = and i64 %a, u0xf
+  %b_masked = and i64 %b, u0xf
+  %mul = mul i64 %a_masked, %b_masked
+  %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+  ret ptr %gep
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RISCV] Support non-power-of-2 types when expanding memcmp (PR #114971)

2025-06-13 Thread Luke Lau via llvm-branch-commits


@@ -2954,20 +2954,13 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool 
IsZeroCmp) const {
   }
 
   if (IsZeroCmp && ST->hasVInstructions()) {
-unsigned RealMinVLen = ST->getRealMinVLen();
-// Support Fractional LMULs if the lengths are larger than XLen.
-// TODO: Support non-power-of-2 types.
-for (unsigned FLMUL = 8; FLMUL >= 2; FLMUL /= 2) {
-  unsigned Len = RealMinVLen / FLMUL;
-  if (Len > ST->getXLen())
-Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
-}
-for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
- LMUL *= 2) {
-  unsigned Len = RealMinVLen * LMUL;
-  if (Len > ST->getXLen())
-Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
-}
+unsigned VLenB = ST->getRealMinVLen() / 8;
+// The minimum size should be the maximum bytes between `VLen * LMUL_MF8`
+// and `XLen * 2`.
+unsigned MinSize = std::max(VLenB / 8, ST->getXLen() * 2 / 8);

lukel97 wrote:

If that's the case, do we even need the LMUL check? I.e. can we just do 

```
unsigned MinSize = ST->getXLen() + 1;
```

And presumably for sizes < MF8, lowering will use the correct container anyway?

https://github.com/llvm/llvm-project/pull/114971
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Matt Arsenault via llvm-branch-commits


@@ -2628,6 +2630,87 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0) && PtrVT == IntVT)
+return N1;

arsenm wrote:

```suggestion
  // fold (ptradd 0, x) -> x
  if (PtrVT == IntVT && isNullConstant(N0))
return N1;
```

But PtrVT == IntVT was already asserted above? 

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Matt Arsenault via llvm-branch-commits


@@ -14944,6 +14945,51 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
+   DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N1.getOpcode() == ISD::ADD) {
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+//y is not, and (add y, z) is used only once.
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+//z is not, and (add y, z) is used only once.
+// The goal is to move constant offsets to the outermost ptradd, to create
+// more opportunities to fold offsets into memory instructions.
+// Together with the generic combines in DAGCombiner.cpp, this also
+// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+//
+// This transform is here instead of in the general DAGCombiner as it can
+// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
+// AArch64's CPA.
+SDValue X = N0;
+SDValue Y = N1.getOperand(0);
+SDValue Z = N1.getOperand(1);
+bool N1OneUse = N1.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+if ((ZIsConstant != YIsConstant) && N1OneUse) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N1->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;

arsenm wrote:

Can you do SDNodeFlags = (N->getFlags() & N1->getFlags()) & 
SDNodeFlags::NoUnsignedWrap? 

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Matt Arsenault via llvm-branch-commits


@@ -14944,6 +14945,51 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
+   DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N1.getOpcode() == ISD::ADD) {
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+//y is not, and (add y, z) is used only once.
+// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+//z is not, and (add y, z) is used only once.
+// The goal is to move constant offsets to the outermost ptradd, to create
+// more opportunities to fold offsets into memory instructions.
+// Together with the generic combines in DAGCombiner.cpp, this also
+// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+//
+// This transform is here instead of in the general DAGCombiner as it can
+// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
+// AArch64's CPA.
+SDValue X = N0;
+SDValue Y = N1.getOperand(0);
+SDValue Z = N1.getOperand(1);
+bool N1OneUse = N1.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+if ((ZIsConstant != YIsConstant) && N1OneUse) {

arsenm wrote:

Avoid the DAG.isConstantIntBuildVectorOrConstantInt in the !N1OneUse case? 

https://github.com/llvm/llvm-project/pull/142739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)

2025-06-13 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/142777
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143673

>From 50de6e085242ce975af812088f4ef48896444fb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines

This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.

For SWDEV-516125.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 151 ++
 3 files changed, 167 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, 
EVT VT,
 return SDValue();
   int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
-  case ISD::ADD: break;
+  case ISD::ADD:
+  case ISD::PTRADD:
+break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
 #include 
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "si-lower"
 
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, 
const SDLoc &SL,
 // instead of a tree.
 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::ADD);
+  assert(N->isAnyAdd());
 
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 for (SDNode *User : LHS->users()) {
   // There is a use that does not feed into addition, so the multiply can't
   // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
-  if (User->getOpcode() != ISD::ADD)
+  if (!User->isAnyAdd())
 return SDValue();
 
   // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@ 
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
 
 SDValue Hi = getHiHalf64(LHS, DAG);
 SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+  Opcode = ISD::ADD;
 SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, 
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
 
 SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue 
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
-  SDNodeFlags Flags;
-  // If both additions in the original we

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142739

>From 3002da1befde734af1904d3424abd72b65f1377b Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 03:32:32 -0400
Subject: [PATCH 1/5] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines

This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one
that is closely connected.

The generic DAG combine is based on a part of PR #105669 by @rgwott, which was
adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello
LLVM tree. I added some parts and removed several disjuncts from the
reassociation condition:
- `isNullConstant(X)`, since there are address spaces where 0 is a perfectly
  normal value that shouldn't be treated specially,
- `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since
  they cause regressions in AMDGPU.

For SWDEV-516125.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  92 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  49 +
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |   1 +
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 194 ++
 4 files changed, 201 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..505cb264ae948 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -419,6 +419,7 @@ namespace {
 SDValue visitADDLike(SDNode *N);
 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
 SDNode *LocReference);
+SDValue visitPTRADD(SDNode *N);
 SDValue visitSUB(SDNode *N);
 SDValue visitADDSAT(SDNode *N);
 SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool 
DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
 return false;
 
   auto *C2 = dyn_cast(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:return visitTokenFactor(N);
   case ISD::MERGE_VALUES:   return visitMERGE_VALUES(N);
   case ISD::ADD:return visitADD(N);
+  case ISD::PTRADD: return visitPTRADD(N);
   case ISD::SUB:return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:return visitADDSAT(N);
@@ -2628,6 +2630,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd undef, y) -> undef
+  if (N0.isUndef())
+return N0;
+
+  // fold (ptradd x, undef) -> undef
+  if (N1.isUndef())
+return DAG.getUNDEF(PtrVT);
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0))
+return N1;
+
+  if (N0.getOpcode() == ISD::PTRADD &&
+  !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) 
{
+SDValue X = N0.getOperand(0);
+SDValue Y = N0.getOperand(1);
+SDValue Z = N1;
+bool N0OneUse = N0.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+//   * y is a constant and (ptradd x, y) has one use; or
+//   * y and z are both constants.
+if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N0->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;
+  SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+  AddToWorklist(Add.getNode());
+  return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+}
+
+// TODO: There is another possible fold here that was proven useful.
+// It would be this:
+//
+// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+//   * (ptradd x, y) has one use; and
+//   * y is a constant; and
+//   * z is not a constant.
+//
+// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+// opportunity to select more complex instructions such as SUBPT and
+// MSUBPT. H

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines (PR #142739)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142739

>From 3002da1befde734af1904d3424abd72b65f1377b Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 03:32:32 -0400
Subject: [PATCH 1/5] [AMDGPU][SDAG] Add ISD::PTRADD DAG combines

This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one
that is closely connected.

The generic DAG combine is based on a part of PR #105669 by @rgwott, which was
adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello
LLVM tree. I added some parts and removed several disjuncts from the
reassociation condition:
- `isNullConstant(X)`, since there are address spaces where 0 is a perfectly
  normal value that shouldn't be treated specially,
- `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since
  they cause regressions in AMDGPU.

For SWDEV-516125.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  92 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  49 +
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |   1 +
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 194 ++
 4 files changed, 201 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..505cb264ae948 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -419,6 +419,7 @@ namespace {
 SDValue visitADDLike(SDNode *N);
 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
 SDNode *LocReference);
+SDValue visitPTRADD(SDNode *N);
 SDValue visitSUB(SDNode *N);
 SDValue visitADDSAT(SDNode *N);
 SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool 
DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
 return false;
 
   auto *C2 = dyn_cast(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:return visitTokenFactor(N);
   case ISD::MERGE_VALUES:   return visitMERGE_VALUES(N);
   case ISD::ADD:return visitADD(N);
+  case ISD::PTRADD: return visitPTRADD(N);
   case ISD::SUB:return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:return visitADDSAT(N);
@@ -2628,6 +2630,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc 
&DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+ "PTRADD with different operand types is not supported");
+
+  // fold (ptradd undef, y) -> undef
+  if (N0.isUndef())
+return N0;
+
+  // fold (ptradd x, undef) -> undef
+  if (N1.isUndef())
+return DAG.getUNDEF(PtrVT);
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (isNullConstant(N0))
+return N1;
+
+  if (N0.getOpcode() == ISD::PTRADD &&
+  !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) 
{
+SDValue X = N0.getOperand(0);
+SDValue Y = N0.getOperand(1);
+SDValue Z = N1;
+bool N0OneUse = N0.hasOneUse();
+bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+//   * y is a constant and (ptradd x, y) has one use; or
+//   * y and z are both constants.
+if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+  SDNodeFlags Flags;
+  // If both additions in the original were NUW, the new ones are as well.
+  if (N->getFlags().hasNoUnsignedWrap() &&
+  N0->getFlags().hasNoUnsignedWrap())
+Flags |= SDNodeFlags::NoUnsignedWrap;
+  SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+  AddToWorklist(Add.getNode());
+  return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+}
+
+// TODO: There is another possible fold here that was proven useful.
+// It would be this:
+//
+// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+//   * (ptradd x, y) has one use; and
+//   * y is a constant; and
+//   * z is not a constant.
+//
+// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+// opportunity to select more complex instructions such as SUBPT and
+// MSUBPT. H

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/142777

>From c0eab936e1cab87636ae7c676d7232948cc35aef Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 4 Jun 2025 09:30:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in
 SelectionDAGAddressAnalysis

Pre-committing test to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index b78dea1684545..d3242905ada64 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -126,3 +126,31 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr 
addrspace(1) %ptr)  #0 {
   store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
   ret void
 }
+
+; Taken from memcpy-param-combinations.ll, tests PTRADD handling in
+; SelectionDAGAddressAnalysis.
+define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr 
addrspace(4) align 1 readonly %src) {
+; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
+  ret void
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From 37747657c81cc49feb345810b792f01e35d28511 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143673

>From 50de6e085242ce975af812088f4ef48896444fb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines

This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.

For SWDEV-516125.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 151 ++
 3 files changed, 167 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, 
EVT VT,
 return SDValue();
   int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
-  case ISD::ADD: break;
+  case ISD::ADD:
+  case ISD::PTRADD:
+break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
 #include 
 
 using namespace llvm;
+using namespace llvm::SDPatternMatch;
 
 #define DEBUG_TYPE "si-lower"
 
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, 
const SDLoc &SL,
 // instead of a tree.
 SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::ADD);
+  assert(N->isAnyAdd());
 
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
 for (SDNode *User : LHS->users()) {
   // There is a use that does not feed into addition, so the multiply can't
   // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
-  if (User->getOpcode() != ISD::ADD)
+  if (!User->isAnyAdd())
 return SDValue();
 
   // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@ 
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
 
 SDValue Hi = getHiHalf64(LHS, DAG);
 SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+  Opcode = ISD::ADD;
 SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, 
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
 
 SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue 
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic 
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
-  SDNodeFlags Flags;
-  // If both additions in the original we

  1   2   >