[llvm-branch-commits] [llvm] [AddressSanitizer] Remove memory effects from functions (PR #130495)

2025-03-12 Thread Guy David via llvm-branch-commits

https://github.com/guy-david edited 
https://github.com/llvm/llvm-project/pull/130495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace ptr addrspace(8) undef uses with poison (PR #130904)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130904

>From b2b21aa4a3ef95a9b23db41f77f378abf88cddc0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:24:50 +0700
Subject: [PATCH] AMDGPU: Replace ptr addrspace(8) undef uses with poison

---
 llvm/test/CodeGen/AMDGPU/amdpal.ll|   2 +-
 .../CodeGen/AMDGPU/combine-add-zext-xor.ll|  12 +-
 llvm/test/CodeGen/AMDGPU/else.ll  |   2 +-
 .../AMDGPU/extract_subvector_vec4_vec3.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll   |   2 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.ll  |   2 +-
 .../llvm.amdgcn.struct.ptr.buffer.atomic.ll   |   2 +-
 .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll |   6 +-
 .../lower-work-group-id-intrinsics-hsa.ll |   2 +-
 .../lower-work-group-id-intrinsics-pal.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/merge-store-crash.ll |   2 +-
 .../test/CodeGen/AMDGPU/merge-store-usedef.ll |   2 +-
 .../AMDGPU/required-export-priority.ll|   2 +-
 .../AMDGPU/si-triv-disjoint-mem-access.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll|   8 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll   | 110 +-
 16 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll 
b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 171df029615ed..fd9227d2f4319 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -72,7 +72,7 @@ entry:
   %e = getelementptr [2 x i32], ptr addrspace(5) %v1, i32 0, i32 %idx
   %x = load i32, ptr addrspace(5) %e
   %xf = bitcast i32 %x to float
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) 
undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) 
poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll 
b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index b42542db6dbd8..f8227f0039af7 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -66,7 +66,7 @@ define i32 @combine_add_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -146,7 +146,7 @@ define i32 @combine_sub_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -229,7 +229,7 @@ define i32 @combine_add_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -313,7 +313,7 @@ define i32 @combine_sub_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -392,7 +392,7 @@ define i32 @combine_add_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -471,7 +471,7 @@ define i32 @combine_sub_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 4a3018e67b17d..884f5305407a1 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -47,7 +47,7 @@ else:
 
 end:
   %r = phi float [ %v.if, %if ], [ %v.else, %else ]
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(f

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130903

>From 4cb9b21de44ed84d1b79c68f9e06abbdda7327db Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:23:32 +0700
Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison

---
 .../AMDGPU/adjust-writemask-invalid-copy.ll| 10 +-
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++---
 llvm/test/CodeGen/AMDGPU/else.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll  |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |  4 ++--
 .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll  |  2 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll   |  4 ++--
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll   |  2 +-
 .../AMDGPU/undefined-subreg-liverange.ll   |  2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll   | 12 ++--
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll|  6 +++---
 llvm/test/CodeGen/AMDGPU/wave32.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll| 18 +-
 16 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index b913b5c3ab746..dd85edf59b18f 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll 
b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..1e40b4c9f04cf 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130903

>From 4cb9b21de44ed84d1b79c68f9e06abbdda7327db Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:23:32 +0700
Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison

---
 .../AMDGPU/adjust-writemask-invalid-copy.ll| 10 +-
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++---
 llvm/test/CodeGen/AMDGPU/else.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll  |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |  4 ++--
 .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll  |  2 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll   |  4 ++--
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll   |  2 +-
 .../AMDGPU/undefined-subreg-liverange.ll   |  2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll   | 12 ++--
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll|  6 +++---
 llvm/test/CodeGen/AMDGPU/wave32.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll| 18 +-
 16 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index b913b5c3ab746..dd85edf59b18f 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll 
b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..1e40b4c9f04cf 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 

[llvm-branch-commits] [llvm] AMDGPU: Replace insertelement undef with poison in cases with manual updates (PR #130898)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130898

>From 63cb58638a29b6e757f67c7cde616d481903a819 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 12:32:31 +0700
Subject: [PATCH] AMDGPU: Replace insertelement undef with poison in cases with
 manual updates

I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly 
sensitive
to undef vs. poison.
---
 llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 16 +++---
 .../AMDGPU/promote-alloca-array-aggregate.ll  |  6 ++---
 .../AMDGPU/promote-alloca-loadstores.ll   | 22 +--
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll 
b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index e72f3d3ce993a..d48b75a666db7 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -238,7 +238,7 @@ define <2 x half> 
@v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %ins0 = insertelement <2 x half> undef, half %lo, i32 0
+  %ins0 = insertelement <2 x half> poison, half %lo, i32 0
   %ins1 = insertelement <2 x half> %ins0, half %hi, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
   ret <2 x half> %canonicalized
@@ -2581,7 +2581,7 @@ define <2 x half> 
@v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 0
+  %vec = insertelement <2 x half> poison, half %val, i32 0
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2622,7 +2622,7 @@ define <2 x half> 
@v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 1
+  %vec = insertelement <2 x half> poison, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2785,7 +2785,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 2.0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half %val, i32 0
+  %vec0 = insertelement <2 x half> poison, half %val, i32 0
   %vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2829,7 +2829,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, 2.0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half 2.0, i32 0
+  %vec0 = insertelement <2 x half> poison, half 2.0, i32 0
   %vec1 = insertelement <2 x half> %vec0, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2925,7 +2925,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <4 x half> undef, half %val, i32 0
+  %vec = insertelement <4 x half> poison, half %val, i32 0
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec)
   ret <4 x half> %canonicalized
 }
@@ -2977,7 +2977,7 @@ define <4 x half> 
@v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 1
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1)
   ret <4 x half> %canonicalized
@@ -3035,7 +3035,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v1, v1, v1
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 2
   %vec2 = in

[llvm-branch-commits] [llvm] AMDGPU: Replace insertelement undef with poison in cases with manual updates (PR #130898)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130898

>From 63cb58638a29b6e757f67c7cde616d481903a819 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 12:32:31 +0700
Subject: [PATCH] AMDGPU: Replace insertelement undef with poison in cases with
 manual updates

I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly 
sensitive
to undef vs. poison.
---
 llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 16 +++---
 .../AMDGPU/promote-alloca-array-aggregate.ll  |  6 ++---
 .../AMDGPU/promote-alloca-loadstores.ll   | 22 +--
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll 
b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index e72f3d3ce993a..d48b75a666db7 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -238,7 +238,7 @@ define <2 x half> 
@v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %ins0 = insertelement <2 x half> undef, half %lo, i32 0
+  %ins0 = insertelement <2 x half> poison, half %lo, i32 0
   %ins1 = insertelement <2 x half> %ins0, half %hi, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
   ret <2 x half> %canonicalized
@@ -2581,7 +2581,7 @@ define <2 x half> 
@v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 0
+  %vec = insertelement <2 x half> poison, half %val, i32 0
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2622,7 +2622,7 @@ define <2 x half> 
@v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 1
+  %vec = insertelement <2 x half> poison, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2785,7 +2785,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 2.0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half %val, i32 0
+  %vec0 = insertelement <2 x half> poison, half %val, i32 0
   %vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2829,7 +2829,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, 2.0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half 2.0, i32 0
+  %vec0 = insertelement <2 x half> poison, half 2.0, i32 0
   %vec1 = insertelement <2 x half> %vec0, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2925,7 +2925,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <4 x half> undef, half %val, i32 0
+  %vec = insertelement <4 x half> poison, half %val, i32 0
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec)
   ret <4 x half> %canonicalized
 }
@@ -2977,7 +2977,7 @@ define <4 x half> 
@v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 1
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1)
   ret <4 x half> %canonicalized
@@ -3035,7 +3035,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v1, v1, v1
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 2
   %vec2 = in

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130903

>From 602e8abb425c3f36441939bce1f096bf0677372e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:23:32 +0700
Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison

---
 .../AMDGPU/adjust-writemask-invalid-copy.ll| 10 +-
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++---
 llvm/test/CodeGen/AMDGPU/else.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll  |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |  4 ++--
 .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll  |  2 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll   |  4 ++--
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll   |  2 +-
 .../AMDGPU/undefined-subreg-liverange.ll   |  2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll   | 12 ++--
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll|  6 +++---
 llvm/test/CodeGen/AMDGPU/wave32.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll| 18 +-
 16 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index b913b5c3ab746..dd85edf59b18f 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll 
b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..1e40b4c9f04cf 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 

[llvm-branch-commits] [llvm] [AddressSanitizer] Remove memory effects from functions (PR #130495)

2025-03-12 Thread Guy David via llvm-branch-commits

https://github.com/guy-david edited 
https://github.com/llvm/llvm-project/pull/130495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AddressSanitizer] Remove memory effects from functions (PR #130495)

2025-03-12 Thread Guy David via llvm-branch-commits


@@ -612,6 +612,48 @@ void getAddressSanitizerParams(const Triple &TargetTriple, 
int LongSize,
   *OrShadowOffset = Mapping.OrShadowOffset;
 }
 
+void removeASanIncompatibleFnAttributes(Function &F, bool RemoveWriteOnly) {
+  // Remove memory attributes that are invalid with ASan and HWSan.
+  // ASan checks read from shadow, which invalidates memory(argmem: *)
+  // Short granule checks on function arguments read from the argument memory
+  // (last byte of the granule), which invalidates writeonly.
+  //
+  // This is not only true for sanitized functions, because AttrInfer can
+  // infer those attributes on libc functions, which is not true if those
+  // are instrumented (Android) or intercepted.
+  //
+  // We might want to model ASan shadow memory more opaquely to get rid of
+  // this problem altogether, by hiding the shadow memory write in an
+  // intrinsic, essentially like in the AArch64StackTagging pass. But that's
+  // for another day.
+
+  // The API is weird. `onlyReadsMemory` actually means "does not write", and
+  // `onlyWritesMemory` actually means "does not read". So we reconstruct
+  // "accesses memory" && "does not read" <=> "writes".
+  bool Changed = false;
+  if (!F.doesNotAccessMemory()) {
+bool WritesMemory = !F.onlyReadsMemory();
+bool ReadsMemory = !F.onlyWritesMemory();
+if ((WritesMemory && !ReadsMemory) || F.onlyAccessesArgMemory()) {
+  F.removeFnAttr(Attribute::Memory);
+  Changed = true;
+}
+  }
+  if (RemoveWriteOnly) {
+for (Argument &A : F.args()) {
+  if (A.hasAttribute(Attribute::WriteOnly)) {
+A.removeAttr(Attribute::WriteOnly);
+Changed = true;
+  }
+}
+  }
+  if (Changed) {
+// nobuiltin makes sure later passes don't restore assumptions about
+// the function.
+F.addFnAttr(Attribute::NoBuiltin);
+  }
+}

guy-david wrote:

I'm not completely following, can you explain what it means to "operate in 
terms of Memory"?

https://github.com/llvm/llvm-project/pull/130495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Do not restrict the computed gotos (#114990) (PR #130585)

2025-03-12 Thread Chris Eibl via llvm-branch-commits

chris-eibl wrote:

Does this mean this won't be backported to 19?

https://github.com/llvm/llvm-project/pull/130585
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130059
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/130078

>From 2b6b9c1fae4ea33cb4d3056dc016cdcac2cfc304 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Thu, 6 Mar 2025 03:16:59 -0600
Subject: [PATCH 1/3] [flang][OpenMP] Translate OpenMP scopes when compiling
 for target device

If a `target` directive is nested in a host OpenMP directive (e.g.
parallel, task, or a worksharing loop), flang currently crashes if the
target directive-related MLIR ops (e.g. `omp.map.bounds` and
`omp.map.info` depends on SSA values defined inside the parent host
OpenMP directives/ops.

This PR tries to solve this problem by treating these parent OpenMP ops
as "SSA scopes". Whenever we are translating for the device, instead of
completely translating host ops, we just tranlate their MLIR ops as pure
SSA values.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  |  59 ++--
 .../openmp-target-nesting-in-host-ops.mlir| 136 ++
 2 files changed, 186 insertions(+), 9 deletions(-)
 create mode 100644 
mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b9893716980fe..f277f35fa51eb 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -537,6 +537,19 @@ static llvm::omp::ProcBindKind 
getProcBindKind(omp::ClauseProcBindKind kind) {
   llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
+/// Maps elements of \p blockArgs (which are MLIR values) to the corresponding
+/// LLVM values of \p operands' elements. This is useful when an OpenMP region
+/// with entry block arguments is converted to LLVM. In this case \p blockArgs
+/// are (part of) of the OpenMP region's entry arguments and \p operands are
+/// (part of) of the operands to the OpenMP op containing the region.
+static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
+omp::BlockArgOpenMPOpInterface blockArgIface) {
+  llvm::SmallVector> blockArgsPairs;
+  blockArgIface.getBlockArgsPairs(blockArgsPairs);
+  for (auto [var, arg] : blockArgsPairs)
+moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
+}
+
 /// Helper function to map block arguments defined by ignored loop wrappers to
 /// LLVM values and prevent any uses of those from triggering null pointer
 /// dereferences.
@@ -549,17 +562,10 @@ convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
   // Map block arguments directly to the LLVM value associated to the
   // corresponding operand. This is semantically equivalent to this wrapper not
   // being present.
-  auto forwardArgs =
-  [&moduleTranslation](omp::BlockArgOpenMPOpInterface blockArgIface) {
-llvm::SmallVector> blockArgsPairs;
-blockArgIface.getBlockArgsPairs(blockArgsPairs);
-for (auto [var, arg] : blockArgsPairs)
-  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
-  };
-
   return llvm::TypeSwitch(opInst)
   .Case([&](omp::SimdOp op) {
-forwardArgs(cast(*op));
+forwardArgs(moduleTranslation,
+cast(*op));
 op.emitWarning() << "simd information on composite construct 
discarded";
 return success();
   })
@@ -5294,6 +5300,7 @@ convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase 
&builder,
   return convertHostOrTargetOperation(op, builder, moduleTranslation);
 }
 
+
 static LogicalResult
 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -5313,6 +5320,40 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+if (auto blockArgsIface =
+dyn_cast(oper))
+  forwardArgs(moduleTranslation, blockArgsIface);
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;
+  }
+}
+
+for (Region ®ion : oper->getRegions()) {
+  auto result = convertOmpOpRegions(
+  region, oper->getNam

[llvm-branch-commits] [llvm] [NFC][Cloning] Move DebugInfoFinder decl closer to its place of usage (PR #129154)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129154

>From cc3662c955e640f6975daffc367b775e179df18c Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:09:23 -0800
Subject: [PATCH] [NFC][Cloning] Move DebugInfoFinder decl closer to its place
 of usage

Summary:
This makes it clear that DIFinder is only really necessary for llvm.dbg.cu 
update.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129154, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/12
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 979cbad0d82c0..3af07594c848b 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,8 +266,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  DebugInfoFinder DIFinder;
-
   if (Changes < CloneFunctionChangeType::DifferentModule) {
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
@@ -320,7 +318,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 Visited.insert(Operand);
 
   // Collect and clone all the compile units referenced from the instructions 
in
-  // the function (e.g. as a scope).
+  // the function (e.g. as instructions' scope).
+  DebugInfoFinder DIFinder;
   collectDebugInfoFromInstructions(*OldFunc, DIFinder);
   for (auto *Unit : DIFinder.compile_units()) {
 MDNode *MappedUnit =

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Clean up comments in CloneFunctionInto (PR #129153)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129153

>From a9b5f3844e670403fd4e5af9f5d6a497c3ad0653 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:07:40 -0800
Subject: [PATCH] [NFC][Cloning] Clean up comments in CloneFunctionInto

Summary:
Some comments no longer make sense nor refer to an existing code path.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129153, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/11
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f32d9454eb076..979cbad0d82c0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,24 +266,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   if (OldFunc->isDeclaration())
 return;
 
-  // When we remap instructions within the same module, we want to avoid
-  // duplicating inlined DISubprograms, so record all subprograms we find as we
-  // duplicate instructions and then freeze them in the MD map. We also record
-  // information about dbg.value and dbg.declare to avoid duplicating the
-  // types.
   DebugInfoFinder DIFinder;
 
-  // Track the subprogram attachment that needs to be cloned to fine-tune the
-  // mapping within the same module.
   if (Changes < CloneFunctionChangeType::DifferentModule) {
-// Need to find subprograms, types, and compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() == OldFunc->getParent()) &&
"Expected NewFunc to have the same parent, or no parent");
   } else {
-// Need to find all the compile units.
-
 assert((NewFunc->getParent() == nullptr ||
 NewFunc->getParent() != OldFunc->getParent()) &&
"Expected NewFunc to have different parents, or no parent");

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering (non i1) (PR #124298)

2025-03-12 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/124298

>From 9a5bfd368f40d1c313dc1783df4f43f12b88258b Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Fri, 28 Feb 2025 15:56:04 +0100
Subject: [PATCH] AMDGPU/GlobalISel: Temporal divergence lowering (non i1)

Record all uses outside cycle with divergent exit during
propagateTemporalDivergence in Uniformity analysis.
With this list of candidates for temporal divergence lowering,
excluding known lane masks from control flow intrinsics,
find sources from inside the cycle that are not i1 and uniform.
Temporal divergence lowering (non i1):
create copy(v_mov) to vgpr, with implicit exec (to stop other
passes from moving this copy outside of the cycle) and use this
vgpr outside of the cycle instead of original uniform source.
---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 44 +++-
 llvm/include/llvm/ADT/GenericUniformityInfo.h |  5 ++
 llvm/lib/Analysis/UniformityAnalysis.cpp  |  3 +-
 .../lib/CodeGen/MachineUniformityAnalysis.cpp |  6 +--
 .../AMDGPUGlobalISelDivergenceLowering.cpp| 52 ++-
 .../lib/Target/AMDGPU/AMDGPURegBankSelect.cpp | 25 +++--
 llvm/lib/Target/AMDGPU/SILowerI1Copies.h  |  6 +++
 ...divergent-i1-phis-no-lane-mask-merging.mir |  7 +--
 ...ergence-divergent-i1-used-outside-loop.mir | 19 +++
 .../divergence-temporal-divergent-reg.ll  | 38 +++---
 .../divergence-temporal-divergent-reg.mir |  8 +--
 .../AMDGPU/GlobalISel/regbankselect-mui.ll| 17 +++---
 12 files changed, 176 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h 
b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index bd09f4fe43e08..51e9ac30391fe 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -51,6 +51,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "uniformity"
@@ -342,6 +343,9 @@ template  class 
GenericUniformityAnalysisImpl {
   typename SyncDependenceAnalysisT::DivergenceDescriptor;
   using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
 
+  using TemporalDivergenceTuple =
+  std::tuple;
+
   GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
 const TargetTransformInfo *TTI)
   : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
@@ -396,6 +400,11 @@ template  class 
GenericUniformityAnalysisImpl {
 
   void print(raw_ostream &out) const;
 
+  SmallVector TemporalDivergenceList;
+
+  void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
+const CycleT *);
+
 protected:
   /// \brief Value/block pair representing a single phi input.
   struct PhiInput {
@@ -1129,6 +1138,13 @@ void GenericUniformityAnalysisImpl::compute() {
   }
 }
 
+template 
+void GenericUniformityAnalysisImpl::recordTemporalDivergence(
+ConstValueRefT Val, const InstructionT *User, const CycleT *Cycle) {
+  TemporalDivergenceList.emplace_back(Val, const_cast(User),
+  Cycle);
+}
+
 template 
 bool GenericUniformityAnalysisImpl::isAlwaysUniform(
 const InstructionT &Instr) const {
@@ -1146,6 +1162,12 @@ template 
 void GenericUniformityAnalysisImpl::print(raw_ostream &OS) const {
   bool haveDivergentArgs = false;
 
+  // When we print Value, LLVM IR instruction, we want to print extra new line.
+  // In LLVM IR print function for Value does not print new line at the end.
+  // In MIR print for MachineInstr prints new line at the end.
+  constexpr bool IsMIR = std::is_same::value;
+  std::string NewLine = IsMIR ? "" : "\n";
+
   // Control flow instructions may be divergent even if their inputs are
   // uniform. Thus, although exceedingly rare, it is possible to have a program
   // with no divergent values but with divergent control structures.
@@ -1180,6 +1202,16 @@ void 
GenericUniformityAnalysisImpl::print(raw_ostream &OS) const {
 }
   }
 
+  if (!TemporalDivergenceList.empty()) {
+OS << "\nTEMPORAL DIVERGENCE LIST:\n";
+
+for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
+  OS << "Value :" << Context.print(Val) << NewLine
+ << "Used by   :" << Context.print(UseInst) << NewLine
+ << "Outside cycle :" << Cycle->print(Context) << "\n\n";
+}
+  }
+
   for (auto &block : F) {
 OS << "\nBLOCK " << Context.print(&block) << '\n';
 
@@ -1191,7 +1223,7 @@ void 
GenericUniformityAnalysisImpl::print(raw_ostream &OS) const {
 OS << "  DIVERGENT: ";
   else
 OS << " ";
-  OS << Context.print(value) << '\n';
+  OS << Context.print(value) << NewLine;
 }
 
 OS << "TERMINATORS\n";
@@ -1203,13 +1235,21 @@ void 
GenericUniformityAnalysisImpl::prin

[llvm-branch-commits] [llvm] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking (PR #130617)

2025-03-12 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/130617

>From 936a6aabb39df4eb58fb60facd826685746906c4 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 10 Mar 2025 06:55:10 -0400
Subject: [PATCH 1/2] [SeparateConstOffsetFromGEP] Preserve inbounds flag based
 on ValueTracking

If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.

For SWDEV-516125.
---
 .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++
 .../AMDGPU/preserve-inbounds.ll   | 23 +++
 .../NVPTX/split-gep-and-gvn.ll| 16 ++---
 .../NVPTX/split-gep.ll|  8 +++
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp 
b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index ab8e979e7b40a..7f93115499bc9 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1052,6 +1052,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 }
   }
 
+  bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 // and the old index if they are not used.
 RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
 RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+MayRecoverInbounds =
+MayRecoverInbounds && computeKnownBits(NewIdx, 
*DL).isNonNegative();
   }
 }
   }
@@ -1100,11 +1104,15 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // address with silently-wrapping two's complement arithmetic".
   // Therefore, the final code will be a semantically equivalent.
   //
-  // TODO(jingyue): do some range analysis to keep as many inbounds as
-  // possible. GEPs with inbounds are more friendly to alias analysis.
-  // TODO(gep_nowrap): Preserve nuw at least.
-  GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
-  GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+  // If the initial GEP was inbounds and all variable indices and the
+  // accumulated offsets are non-negative, they can be added in any order and
+  // the intermediate results are in bounds. So, we can preserve the inbounds
+  // flag for both GEPs. GEPs with inbounds are more friendly to alias 
analysis.
+  //
+  // TODO(gep_nowrap): Preserve nuw?
+  GEPNoWrapFlags NewGEPFlags =
+  MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+  GEP->setNoWrapFlags(NewGEPFlags);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 422e5d8215502..01619aa481ddd 100644
--- 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -16,3 +16,26 @@ entry:
   %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
   ret ptr %arrayidx
 }
+
+; All offsets must be positive, so inbounds can be preserved.
+define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr 
[[TMP0]], i64 4
+; CHECK-NEXT:[[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr 
[[TMP2]], i64 4
+; CHECK-NEXT:store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
+; CHECK-NEXT:ret void
+;
+entry:
+  %i.prom = zext i32 %i to i64
+  %idx = add nsw i64 %i.prom, 1
+  %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
+  %3 = load float, ptr %arrayidx.src, align 4
+  %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
+  store float %3, ptr %arrayidx.dst, align 4
+  ret void
+}
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..4474585bf9b06 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/

[llvm-branch-commits] [clang] [Driver] Change linker job in Baremetal toolchain object accomodate GCCInstallation.(2/3) (PR #121830)

2025-03-12 Thread Garvit Gupta via llvm-branch-commits

quic-garvgupt wrote:

Hi @MaskRay, if this PR looks good, can you provide LGTM. You have already 
approved the first PR in this 3 series of patches 
https://github.com/llvm/llvm-project/pull/121829

https://github.com/llvm/llvm-project/pull/121830
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap (PR #129151)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129151

>From 1327301de16f7bcbd01b34d2b66c49c8dc433117 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:00:47 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused FindDebugInfoToIdentityMap

Summary:
This function is no longer needed.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129151, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/9
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 19 ---
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 34 
 2 files changed, 53 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2252dda0b9aad..ae00c16e7eada 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -244,25 +244,6 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
  CloneFunctionChangeType Changes,
  DebugInfoFinder &DIFinder);
 
-/// Based on \p Changes and \p DIFinder return debug info that needs to be
-/// identity mapped during Metadata cloning.
-///
-/// NOTE: Such \a MetadataSetTy can be used by \a CloneFunction* to directly
-/// specify metadata that should be identity mapped (and hence not cloned). The
-/// metadata will be identity mapped in \a ValueToValueMapTy on first use. 
There
-/// are several reasons for doing it this way rather than eagerly identity
-/// mapping metadata nodes in a \a ValueMap:
-/// 1. Mapping metadata is not cheap, particularly because of tracking.
-/// 2. When cloning a Function we identity map lots of global module-level
-///metadata to avoid cloning it, while only a fraction of it is actually
-///used by the function. Mapping on first use is a lot faster for modules
-///with meaningful amount of debug info.
-/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
-///data (e.g. a set of metadata nodes in a \a DICompileUnit).
-MetadataSetTy FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8080dca09be00..11033aeec7dda 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -189,40 +189,6 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const 
Function &F,
   return SPClonedWithinModule;
 }
 
-MetadataSetTy
-llvm::FindDebugInfoToIdentityMap(CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule) {
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return {};
-
-  if (DIFinder.subprogram_count() == 0)
-assert(!SPClonedWithinModule &&
-   "Subprogram should be in DIFinder->subprogram_count()...");
-
-  MetadataSetTy MD;
-
-  // Avoid cloning types, compile units, and (other) subprograms.
-  for (DISubprogram *ISP : DIFinder.subprograms())
-if (ISP != SPClonedWithinModule)
-  MD.insert(ISP);
-
-  // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-  for (DIScope *S : DIFinder.scopes()) {
-auto *LScope = dyn_cast(S);
-if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-  MD.insert(S);
-  }
-
-for (DICompileUnit *CU : DIFinder.compile_units())
-  MD.insert(CU);
-
-for (DIType *Type : DIFinder.types())
-  MD.insert(Type);
-
-  return MD;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit (PR #129150)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129150

>From d0c5ddcf3defff02566f5e41fb93b1689436977d Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:47:10 -0800
Subject: [PATCH] [NFC][Coro] Remove now unused CommonDebugInfo in CoroSplit

Summary:
This cleans up the now unnecessary debug info collection in CoroSplit.

This makes CoroSplit pass almost as fast with -g2 as it is with -g1 on
the sample cpp file used with other parts of this stack:

| | Baseline | IdentityMD set | Prebuilt CommonDI | 
MetadataPred (cur) |
|-|--||---||
| CoroSplitPass   | 306ms| 221ms  | 68ms  | 3.8ms   
   |
| CoroCloner  | 101ms| 72ms   | 0.5ms | 0.5ms   
   |
| CollectCommonDI | -| -  | 63ms  | -   
   |
| Speed up| 1x   | 1.4x   | 4.5x  | 80x 
   |

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129150, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/8
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 31 ++--
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 37 +++-
 2 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h 
b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index b817e55cad9fc..d1887980fb3bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,9 +48,6 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
-  // Common module-level metadata that's shared between all coroutine clones 
and
-  // doesn't need to be cloned itself.
-  const MetadataSetTy &CommonDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -63,12 +60,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
  Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI, const MetadataSetTy &CommonDebugInfo)
+ TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
 FKind(Shape.ABI == ABI::Async ? CloneKind::Async
   : CloneKind::Continuation),
-Builder(OrigF.getContext()), TTI(TTI), 
CommonDebugInfo(CommonDebugInfo),
-NewF(NewF), ActiveSuspend(ActiveSuspend) {
+Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
+ActiveSuspend(ActiveSuspend) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 assert(NewF && "need existing function for continuation");
@@ -77,11 +74,9 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI,
- const MetadataSetTy &CommonDebugInfo)
+ CloneKind FKind, TargetTransformInfo &TTI)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-Builder(OrigF.getContext()), TTI(TTI),
-CommonDebugInfo(CommonDebugInfo) {}
+Builder(OrigF.getContext()), TTI(TTI) {}
 
   virtual ~BaseCloner() {}
 
@@ -89,14 +84,12 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
-   TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo) {
+   TargetTransformInfo &TTI) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 TimeTraceScope FunctionScope("BaseCloner");
 
-BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
-  CommonDebugInfo);
+BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
 Cloner.create();
 return Cloner.getFunction();
   }
@@ -136,9 +129,8 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-   CloneKind FKind, TargetTransformInfo &TTI,
-   const MetadataSetTy &CommonDebugInfo)
-  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, CommonDebugInfo) {}
+   CloneKind FKind, TargetTransformInfo &TTI)
+  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
 
   void create() override;
 
@@ -146,12 +138,11 @@ class SwitchCloner : public BaseCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,

[llvm-branch-commits] [llvm] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto with a MetadataPredicate (PR #129148)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129148

>From df8853d497c41d020baa50078036509910d94f55 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:07:03 -0800
Subject: [PATCH] [NFC][Cloning] Replace DIFinder usage in CloneFunctionInto
 with a MetadataPredicate

Summary:
The new code should be functionally identical to the old one (but
faster). The reasoning is as follows.

In the old code when cloning within the module:
1. DIFinder traverses and collects *all* debug info reachable from a
   function, its instructions, and its owning compile unit.
2. Then "compile units, types, other subprograms, and lexical blocks of
   other subprograms" are saved in a set.
3. Then when we MapMetadata, we traverse the function's debug info
   _again_ and those nodes that are in the set from p.2 are identity
   mapped.

This looks equivalent to just doing step 3 with identity mapping based
on a predicate that says to identity map "compile units, types, other
subprograms, and lexical blocks of other subprograms" (same as in step
2). This is what the new code does.

Test Plan:
ninja check-all
There's a bunch of tests around cloning and all of them pass.

stack-info: PR: https://github.com/llvm/llvm-project/pull/129148, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/6
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 32 -
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 502c4898c5940..8080dca09be00 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -50,6 +50,30 @@ void collectDebugInfoFromInstructions(const Function &F,
   DIFinder.processInstruction(*M, I);
   }
 }
+
+// Create a predicate that matches the metadata that should be identity mapped
+// during function cloning.
+MetadataPredicate createIdentityMDPredicate(const Function &F,
+CloneFunctionChangeType Changes) {
+  if (Changes >= CloneFunctionChangeType::DifferentModule)
+return [](const Metadata *MD) { return false; };
+
+  DISubprogram *SPClonedWithinModule = F.getSubprogram();
+  return [=](const Metadata *MD) {
+// Avoid cloning types, compile units, and (other) subprograms.
+if (isa(MD) || isa(MD))
+  return true;
+
+if (auto *SP = dyn_cast(MD); SP)
+  return SP != SPClonedWithinModule;
+
+// If a subprogram isn't going to be cloned skip its lexical blocks as 
well.
+if (auto *LScope = dyn_cast(MD); LScope)
+  return LScope->getSubprogram() != SPClonedWithinModule;
+
+return false;
+  };
+}
 } // namespace
 
 /// See comments in Cloning.h.
@@ -325,13 +349,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
 }
   }
 
-  DISubprogram *SPClonedWithinModule =
-  CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
-
-  MetadataPredicate IdentityMD =
-  [MDSet =
-   FindDebugInfoToIdentityMap(Changes, DIFinder, 
SPClonedWithinModule)](
-  const Metadata *MD) { return MDSet.contains(MD); };
+  MetadataPredicate IdentityMD = createIdentityMDPredicate(*OldFunc, Changes);
 
   // Cloning is always a Module level operation, since Metadata needs to be
   // cloned.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning (PR #129152)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129152

>From fbb31608bf17317b97a3f0d09711de8f82851219 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 13:02:37 -0800
Subject: [PATCH] [NFC][Cloning] Remove now unused CollectDebugInfoForCloning

Summary:
This function is no longer used, let's remove it from the header and
impl.

Test Plan:
ninja check-llvm-unit

stack-info: PR: https://github.com/llvm/llvm-project/pull/129152, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/10
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 14 -
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 21 
 2 files changed, 35 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index ae00c16e7eada..ec1a1d5faa7e9 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -230,20 +230,6 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
 
-/// Collect debug information such as types, compile units, and other
-/// subprograms that are reachable from \p F and can be considered global for
-/// the purposes of cloning (and hence not needing to be cloned).
-///
-/// What debug information should be processed depends on \p Changes: when
-/// cloning into the same module we process \p F's subprogram and instructions;
-/// when into a cloned module, neither of those.
-///
-/// Returns DISubprogram of the cloned function when cloning into the same
-/// module or nullptr otherwise.
-DISubprogram *CollectDebugInfoForCloning(const Function &F,
- CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder);
-
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 11033aeec7dda..f32d9454eb076 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -168,27 +168,6 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
  OldAttrs.getRetAttrs(), NewArgAttrs));
 }
 
-DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
-   CloneFunctionChangeType Changes,
-   DebugInfoFinder &DIFinder) {
-  // CloneModule takes care of cloning debug info for ClonedModule. Cloning 
into
-  // DifferentModule is taken care of separately in ClonedFunctionInto as part
-  // of llvm.dbg.cu update.
-  if (Changes >= CloneFunctionChangeType::DifferentModule)
-return nullptr;
-
-  DISubprogram *SPClonedWithinModule = nullptr;
-  if (Changes < CloneFunctionChangeType::DifferentModule) {
-SPClonedWithinModule = F.getSubprogram();
-  }
-  if (SPClonedWithinModule)
-DIFinder.processSubprogram(SPClonedWithinModule);
-
-  collectDebugInfoFromInstructions(F, DIFinder);
-
-  return SPClonedWithinModule;
-}
-
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function 
&OldFunc,
  ValueToValueMapTy &VMap,
  RemapFlags RemapFlag,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits


@@ -5255,6 +5283,51 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+
+// TODO Handle other ops with entry block args.
+llvm::TypeSwitch(*oper)
+.Case([&](omp::WsloopOp wsloopOp) {
+  forwardPrivateArgs(wsloopOp, moduleTranslation);
+  forwardReductionArgs(wsloopOp, moduleTranslation);
+})
+.Case([&](omp::ParallelOp parallelOp) {
+  forwardPrivateArgs(parallelOp, moduleTranslation);
+  forwardReductionArgs(parallelOp, moduleTranslation);
+})
+.Case([&](omp::TaskOp taskOp) {
+  forwardPrivateArgs(taskOp, moduleTranslation);
+});
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;

skatrak wrote:

Yes, I meant that in other places we need to call `findAllocaInsertPoint` 
before adding new allocas. That will either return the entry block of the 
function or an alternative spot, if an `OpenMPAllocaStackFrame` was introduced 
by a parent operation.

So, the current insertion point there will always be valid, as you say, but 
maybe it wouldn't be the right place for allocas specifically. I'm not that 
familiar with this, so I may be wrong. At the end of the day operations created 
here are later removed from the LLVM module, so maybe this doesn't even matter.

https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Erich Keane via llvm-branch-commits

https://github.com/erichkeane approved this pull request.

This is an unfortunate and pretty nasty regression that I think we ought to 
fix.  I am pretty confident that this is low risk as well, so I'm in favor of 
backporting this.

https://github.com/llvm/llvm-project/pull/130950
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Clang-tidy operator& hijacker. (PR #128366)

2025-03-12 Thread Denis Mikhailov via llvm-branch-commits

denzor200 wrote:

This check should be in regular Clang Tidy, using of `std::addressof` actual 
not only for libbcpp, any user-writted code might follow the guidline to use 
`std::addressof` instead of `operator&` for a generic type. For example, the 
Boost library: 
https://github.com/boostorg/pfr/blob/f09e6aeae9d050897fff72b93d5f5e866cc5e11a/include/boost/pfr/detail/core_name20_static.hpp#L196

Please, look at the issue about it: 
https://github.com/llvm/llvm-project/issues/121172

https://github.com/llvm/llvm-project/pull/128366
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SPARC][MC] Add tests for VIS family instructions (PR #130967)

2025-03-12 Thread via llvm-branch-commits

https://github.com/koachan updated 
https://github.com/llvm/llvm-project/pull/130967

>From e2e0d44800b65a8fbddd6234c2ee9f83af92d7da Mon Sep 17 00:00:00 2001
From: Koakuma 
Date: Wed, 12 Mar 2025 21:14:42 +0700
Subject: [PATCH 1/2] Add missing NO-VIS lines

Created using spr 1.3.5
---
 llvm/test/MC/Sparc/sparc-vis.s | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/MC/Sparc/sparc-vis.s b/llvm/test/MC/Sparc/sparc-vis.s
index bf01da19293d0..bed901b6a7272 100644
--- a/llvm/test/MC/Sparc/sparc-vis.s
+++ b/llvm/test/MC/Sparc/sparc-vis.s
@@ -199,28 +199,39 @@ fcmpeq16 %f0, %f2, %o0
 ! VIS: fcmpeq32 %f0, %f2, %o0  ! encoding: 
[0x91,0xb0,0x05,0xc2]
 fcmpeq32 %f0, %f2, %o0
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge8 %o0, %o1, %o2 ! encoding: 
[0x95,0xb2,0x00,0x09]
 edge8 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge8l %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x00,0x49]
 edge8l %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge16 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x00,0x89]
 edge16 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge16l %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x00,0xc9]
 edge16l %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge32 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x01,0x09]
 edge32 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge32l %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x01,0x49]
 edge32l %o0, %o1, %o2
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: pdist %f0, %f2, %f4 ! encoding: 
[0x89,0xb0,0x07,0xc2]
 pdist %f0, %f2, %f4
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array8 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x02,0x09]
 array8 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array16 %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x02,0x49]
 array16 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array32 %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x02,0x89]
 array32 %o0, %o1, %o2
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: shutdown! encoding: 
[0x81,0xb0,0x10,0x00]
 shutdown

>From b98295fcdaa9fc1d6a839956c304dd5a7d31cc24 Mon Sep 17 00:00:00 2001
From: Koakuma 
Date: Wed, 12 Mar 2025 22:27:36 +0700
Subject: [PATCH 2/2] Fix typo in comment

Created using spr 1.3.5
---
 llvm/lib/Target/Sparc/SparcInstrVIS.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td 
b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index 6d0f12da3afcf..fbf56ae22cd30 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -7,7 +7,7 @@
 
//===--===//
 //
 // This file contains instruction formats, definitions and patterns needed for
-// VIS, VIS II, VIS II instructions on SPARC.
+// VIS, VIS II, VIS III instructions on SPARC.
 
//===--===//
 
 // VIS Instruction Format.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MemCpyOpt] Fix clobber check in fca2memcpy optimization (PR #130964)

2025-03-12 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic approved this pull request.


https://github.com/llvm/llvm-project/pull/130964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak edited 
https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits


@@ -24,7 +25,67 @@ namespace flangomp {
 
 namespace {
 namespace looputils {
-using LoopNest = llvm::SetVector;
+/// Stores info needed about the induction/iteration variable for each `do
+/// concurrent` in a loop nest.
+struct InductionVariableInfo {
+  /// The operation allocating memory for iteration variable.
+  mlir::Operation *iterVarMemDef;
+};
+
+using LoopNestToIndVarMap =
+llvm::MapVector;
+
+/// For the \p doLoop parameter, find the operation that declares its iteration
+/// variable or allocates memory for it.
+///
+/// For example, give the following loop:

skatrak wrote:

```suggestion
/// For example, given the following loop:
```

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking (PR #130617)

2025-03-12 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/130617

>From 936a6aabb39df4eb58fb60facd826685746906c4 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 10 Mar 2025 06:55:10 -0400
Subject: [PATCH 1/2] [SeparateConstOffsetFromGEP] Preserve inbounds flag based
 on ValueTracking

If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.

For SWDEV-516125.
---
 .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++
 .../AMDGPU/preserve-inbounds.ll   | 23 +++
 .../NVPTX/split-gep-and-gvn.ll| 16 ++---
 .../NVPTX/split-gep.ll|  8 +++
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp 
b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index ab8e979e7b40a..7f93115499bc9 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1052,6 +1052,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 }
   }
 
+  bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 // and the old index if they are not used.
 RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
 RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+MayRecoverInbounds =
+MayRecoverInbounds && computeKnownBits(NewIdx, 
*DL).isNonNegative();
   }
 }
   }
@@ -1100,11 +1104,15 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // address with silently-wrapping two's complement arithmetic".
   // Therefore, the final code will be a semantically equivalent.
   //
-  // TODO(jingyue): do some range analysis to keep as many inbounds as
-  // possible. GEPs with inbounds are more friendly to alias analysis.
-  // TODO(gep_nowrap): Preserve nuw at least.
-  GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
-  GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+  // If the initial GEP was inbounds and all variable indices and the
+  // accumulated offsets are non-negative, they can be added in any order and
+  // the intermediate results are in bounds. So, we can preserve the inbounds
+  // flag for both GEPs. GEPs with inbounds are more friendly to alias 
analysis.
+  //
+  // TODO(gep_nowrap): Preserve nuw?
+  GEPNoWrapFlags NewGEPFlags =
+  MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+  GEP->setNoWrapFlags(NewGEPFlags);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 422e5d8215502..01619aa481ddd 100644
--- 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -16,3 +16,26 @@ entry:
   %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
   ret ptr %arrayidx
 }
+
+; All offsets must be positive, so inbounds can be preserved.
+define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr 
[[TMP0]], i64 4
+; CHECK-NEXT:[[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr 
[[TMP2]], i64 4
+; CHECK-NEXT:store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
+; CHECK-NEXT:ret void
+;
+entry:
+  %i.prom = zext i32 %i to i64
+  %idx = add nsw i64 %i.prom, 1
+  %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
+  %3 = load float, ptr %arrayidx.src, align 4
+  %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
+  store float %3, ptr %arrayidx.dst, align 4
+  ret void
+}
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..4474585bf9b06 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/

[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits


@@ -5315,6 +5320,46 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might need LLVM values 
defined
+  // in their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {

skatrak wrote:

I'm wondering what should happen to OpenMP ops that don't have regions. If they 
return a value, it seems like that value could end up impacting what's passed 
into an `omp.map.info` as an argument. Maybe we should map their results to 
something as well.

https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits


@@ -5255,6 +5283,51 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+
+// TODO Handle other ops with entry block args.
+llvm::TypeSwitch(*oper)
+.Case([&](omp::WsloopOp wsloopOp) {
+  forwardPrivateArgs(wsloopOp, moduleTranslation);
+  forwardReductionArgs(wsloopOp, moduleTranslation);
+})
+.Case([&](omp::ParallelOp parallelOp) {
+  forwardPrivateArgs(parallelOp, moduleTranslation);
+  forwardReductionArgs(parallelOp, moduleTranslation);
+})
+.Case([&](omp::TaskOp taskOp) {
+  forwardPrivateArgs(taskOp, moduleTranslation);
+});
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;

skatrak wrote:

One thing that I'm thinking for both this case and OpenMP ops that return 
values is whether it makes sense to use `undef` to represent them temporarily 
until they are removed or something else, rather than `alloca`.

https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Kareem Ergawy via llvm-branch-commits


@@ -5315,6 +5320,46 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might need LLVM values 
defined
+  // in their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {

ergawy wrote:

Can you provide an example where this might happen?

https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/130915

None

>From cd50fad71b6cd07d029fa055f52580d1903ece27 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 14:11:51 +0700
Subject: [PATCH] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate
 poison

---
 .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp |  3 +++
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll  | 16 
 2 files changed, 19 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5314738b2b8ac..bf53018439e9f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -636,6 +636,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
 }
 
+if (isa(Src))
+  return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
+
 if (isa(Src)) {
   return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
 }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index fca3860240294..78606b1c869d1 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -274,6 +274,14 @@ declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind 
readnone
 declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
 
 
+define float @test_constant_fold_frexp_mant_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_poison(
+; CHECK-NEXT:ret float poison
+;
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float poison)
+  ret float %val
+}
+
 define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
 ; CHECK-NEXT:ret float undef
@@ -442,6 +450,14 @@ define double @test_constant_fold_frexp_mant_f64_min_num() 
nounwind {
 declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
 declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
 
+define i32 @test_constant_fold_frexp_exp_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_poison(
+; CHECK-NEXT:ret i32 poison
+;
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float poison)
+  ret i32 %val
+}
+
 define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
 ; CHECK-NEXT:ret i32 undef

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/130078

>From 9de8c664bad3a851e3b9644711b24c6449db9e49 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Thu, 6 Mar 2025 03:16:59 -0600
Subject: [PATCH 1/4] [flang][OpenMP] Translate OpenMP scopes when compiling
 for target device

If a `target` directive is nested in a host OpenMP directive (e.g.
parallel, task, or a worksharing loop), flang currently crashes if the
target directive-related MLIR ops (e.g. `omp.map.bounds` and
`omp.map.info` depends on SSA values defined inside the parent host
OpenMP directives/ops.

This PR tries to solve this problem by treating these parent OpenMP ops
as "SSA scopes". Whenever we are translating for the device, instead of
completely translating host ops, we just tranlate their MLIR ops as pure
SSA values.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  |  59 ++--
 .../openmp-target-nesting-in-host-ops.mlir| 136 ++
 2 files changed, 186 insertions(+), 9 deletions(-)
 create mode 100644 
mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b9893716980fe..f277f35fa51eb 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -537,6 +537,19 @@ static llvm::omp::ProcBindKind 
getProcBindKind(omp::ClauseProcBindKind kind) {
   llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
+/// Maps elements of \p blockArgs (which are MLIR values) to the corresponding
+/// LLVM values of \p operands' elements. This is useful when an OpenMP region
+/// with entry block arguments is converted to LLVM. In this case \p blockArgs
+/// are (part of) of the OpenMP region's entry arguments and \p operands are
+/// (part of) of the operands to the OpenMP op containing the region.
+static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
+omp::BlockArgOpenMPOpInterface blockArgIface) {
+  llvm::SmallVector> blockArgsPairs;
+  blockArgIface.getBlockArgsPairs(blockArgsPairs);
+  for (auto [var, arg] : blockArgsPairs)
+moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
+}
+
 /// Helper function to map block arguments defined by ignored loop wrappers to
 /// LLVM values and prevent any uses of those from triggering null pointer
 /// dereferences.
@@ -549,17 +562,10 @@ convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
   // Map block arguments directly to the LLVM value associated to the
   // corresponding operand. This is semantically equivalent to this wrapper not
   // being present.
-  auto forwardArgs =
-  [&moduleTranslation](omp::BlockArgOpenMPOpInterface blockArgIface) {
-llvm::SmallVector> blockArgsPairs;
-blockArgIface.getBlockArgsPairs(blockArgsPairs);
-for (auto [var, arg] : blockArgsPairs)
-  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
-  };
-
   return llvm::TypeSwitch(opInst)
   .Case([&](omp::SimdOp op) {
-forwardArgs(cast(*op));
+forwardArgs(moduleTranslation,
+cast(*op));
 op.emitWarning() << "simd information on composite construct 
discarded";
 return success();
   })
@@ -5294,6 +5300,7 @@ convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase 
&builder,
   return convertHostOrTargetOperation(op, builder, moduleTranslation);
 }
 
+
 static LogicalResult
 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -5313,6 +5320,40 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+if (auto blockArgsIface =
+dyn_cast(oper))
+  forwardArgs(moduleTranslation, blockArgsIface);
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;
+  }
+}
+
+for (Region ®ion : oper->getRegions()) {
+  auto result = convertOmpOpRegions(
+  region, oper->getNam

[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Kareem Ergawy via llvm-branch-commits


@@ -5255,6 +5283,51 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+
+// TODO Handle other ops with entry block args.
+llvm::TypeSwitch(*oper)
+.Case([&](omp::WsloopOp wsloopOp) {
+  forwardPrivateArgs(wsloopOp, moduleTranslation);
+  forwardReductionArgs(wsloopOp, moduleTranslation);
+})
+.Case([&](omp::ParallelOp parallelOp) {
+  forwardPrivateArgs(parallelOp, moduleTranslation);
+  forwardReductionArgs(parallelOp, moduleTranslation);
+})
+.Case([&](omp::TaskOp taskOp) {
+  forwardPrivateArgs(taskOp, moduleTranslation);
+});
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;

ergawy wrote:

I think `undef` is a better idea than using a fake `alloca`, less confusing. 
Did that.

https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CUDA][HIP] fix virtual dtor host/device attr (PR #130126)

2025-03-12 Thread Yaxun Liu via llvm-branch-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/130126

>From 64ecdf75962cb0e849ee2d39eca900329d3cc745 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Fri, 28 Feb 2025 09:58:19 -0500
Subject: [PATCH] [CUDA][HIP] fix virtual dtor host/device attr (#128926)

When inferring host device attr of virtual dtor of explicit
template class instantiation, clang should be conservative.
This guarantees dtors that may call host functions not to
have implicit device attr, therefore will not be emitted
on device side.

Backports: 0f0665db067f d37a39207bc1

Fixes: #108548
---
 clang/docs/HIPSupport.rst   |  20 ++
 clang/include/clang/Sema/Sema.h |   2 +-
 clang/lib/Sema/Sema.cpp |  43 +
 clang/lib/Sema/SemaCUDA.cpp |  23 ++-
 clang/lib/Sema/SemaDecl.cpp |  15 +
 clang/test/SemaCUDA/dtor.cu | 104 
 6 files changed, 204 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/SemaCUDA/dtor.cu

diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index 481ed39230813..8f473c21e1918 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -286,6 +286,26 @@ Example Usage
   basePtr->virtualFunction(); // Allowed since obj is constructed in 
device code
}
 
+Host and Device Attributes of Default Destructors
+===
+
+If a default destructor does not have explicit host or device attributes,
+clang infers these attributes based on the destructors of its data members
+and base classes. If any conflicts are detected among these destructors,
+clang diagnoses the issue. Otherwise, clang adds an implicit host or device
+attribute according to whether the data members's and base classes's
+destructors can execute on the host or device side.
+
+For explicit template classes with virtual destructors, which must be emitted,
+the inference adopts a conservative approach. In this case, implicit host or
+device attributes from member and base class destructors are ignored. This
+precaution is necessary because, although a constexpr destructor carries
+implicit host or device attributes, a constexpr function may call a
+non-constexpr function, which is by default a host function.
+
+Users can override the inferred host and device attributes of default
+destructors by adding explicit host and device attributes to them.
+
 C++ Standard Parallelism Offload Support: Compiler And Runtime
 ==
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a30a7076ea5d4..af648d7f9c63f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4336,11 +4336,11 @@ class Sema final : public SemaBase {
   // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check.
   bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee);
 
-private:
   /// Function or variable declarations to be checked for whether the deferred
   /// diagnostics should be emitted.
   llvm::SmallSetVector DeclsToCheckForDeferredDiags;
 
+private:
   /// Map of current shadowing declarations to shadowed declarations. Warn if
   /// it looks like the user is trying to modify the shadowing declaration.
   llvm::DenseMap ShadowingDecls;
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 9507d7602aa40..e0eac690e6e65 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1789,6 +1789,47 @@ class DeferredDiagnosticsEmitter
   Inherited::visitUsedDecl(Loc, D);
   }
 
+  // Visitor member and parent dtors called by this dtor.
+  void VisitCalledDestructors(CXXDestructorDecl *DD) {
+const CXXRecordDecl *RD = DD->getParent();
+
+// Visit the dtors of all members
+for (const FieldDecl *FD : RD->fields()) {
+  QualType FT = FD->getType();
+  if (const auto *RT = FT->getAs())
+if (const auto *ClassDecl = dyn_cast(RT->getDecl()))
+  if (ClassDecl->hasDefinition())
+if (CXXDestructorDecl *MemberDtor = ClassDecl->getDestructor())
+  asImpl().visitUsedDecl(MemberDtor->getLocation(), MemberDtor);
+}
+
+// Also visit base class dtors
+for (const auto &Base : RD->bases()) {
+  QualType BaseType = Base.getType();
+  if (const auto *RT = BaseType->getAs())
+if (const auto *BaseDecl = dyn_cast(RT->getDecl()))
+  if (BaseDecl->hasDefinition())
+if (CXXDestructorDecl *BaseDtor = BaseDecl->getDestructor())
+  asImpl().visitUsedDecl(BaseDtor->getLocation(), BaseDtor);
+}
+  }
+
+  void VisitDeclStmt(DeclStmt *DS) {
+// Visit dtors called by variables that need destruction
+for (auto *D : DS->decls())
+  if (auto *VD = dyn_cast(D))
+if (VD->isThisDeclarationADefinition() &&
+VD->needsDestruction(S.Context)) {
+  QualType VT = VD->getType();
+  if (const auto *RT = VT->getAs())

[llvm-branch-commits] [llvm] AMDGPU: Make sqrt and rsq intrinsics propagate poison (PR #130914)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits


@@ -548,6 +548,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   case Intrinsic::amdgcn_sqrt:
   case Intrinsic::amdgcn_rsq: {
 Value *Src = II.getArgOperand(0);
+if (isa(Src))
+  return IC.replaceInstUsesWith(II, Src);

arsenm wrote:

We've done this for a while for FP ops. I think the reasoning is that if the 
original value could have been a signaling nan or denormal, that would go 
through canonicalization. We're still guaranteeing a canonical value by 
returning a qnan (although technically we don't guarantee this for generic math 
ops, but I guess we can maintain it for target intrinsics)

https://github.com/llvm/llvm-project/pull/130914
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MemCpyOpt] Fix clobber check in fca2memcpy optimization (PR #130964)

2025-03-12 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MemCpyOpt] Fix clobber check in fca2memcpy optimization (PR #130964)

2025-03-12 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130964

Backport 5da9044c40840187330526ca888290a95927a629

Requested by: @nikic

>From d630c925b0ef0f3a60b6f2173f1859e56bc6928d Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Wed, 12 Mar 2025 14:52:01 +0100
Subject: [PATCH] [MemCpyOpt] Fix clobber check in fca2memcpy optimization

This effectively reverts #108535. The old AA code was looking for
the *first* clobber between the load and store and then trying to
move all the way up there. The new MSSA based code instead found
the *last* clobber. There might still be an earlier clobber that
has not been accounted for.

Fixes #130632.

(cherry picked from commit 5da9044c40840187330526ca888290a95927a629)
---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 24 ++-
 llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll  | 40 +++
 2 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp 
b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a80a85f38e74d..971d6012f6129 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -638,17 +638,19 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, 
LoadInst *LI,
   (EnableMemCpyOptWithoutLibcalls ||
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove {
 MemoryLocation LoadLoc = MemoryLocation::get(LI);
-MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
-   *StoreAccess = MSSA->getMemoryAccess(SI);
-
-// We use MSSA to check if an instruction may store to the memory we load
-// from in between the load and the store. If such an instruction is found,
-// we try to promote there instead of at the store position.
-auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-StoreAccess->getDefiningAccess(), LoadLoc, BAA);
-Instruction *P = MSSA->dominates(LoadAccess, Clobber)
- ? cast(Clobber)->getMemoryInst()
- : SI;
+
+// We use alias analysis to check if an instruction may store to
+// the memory we load from in between the load and the store. If
+// such an instruction is found, we try to promote there instead
+// of at the store position.
+// TODO: Can use MSSA for this.
+Instruction *P = SI;
+for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+  if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
+P = &I;
+break;
+  }
+}
 
 // If we found an instruction that may write to the loaded memory,
 // we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll 
b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 61e349e01ed91..7d4557aa331c4 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
 
 define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr 
align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, 
i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 
16, i1 false)
 ; CHECK-NEXT:ret void
 ;
   %1 = load %S, ptr %src
@@ -79,9 +79,9 @@ define void @copyalias(ptr %src, ptr %dst) {
 ; sure we lift the computation as well if needed and possible.
 define void @addrproducer(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @addrproducer(
-; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
+; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1
 ; CHECK-NEXT:call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 
undef, i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 undef, 
i64 16, i1 false)
 ; CHECK-NEXT:ret void
 ;
   %1 = load %S, ptr %src
@@ -113,8 +113,8 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias 
%dst, ptr noalias %dstidp
 ; CHECK-NEXT:[[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
 ; CHECK-NEXT:[[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
 ; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 
[[DSTINDEX]]
-; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 
undef, i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
+; 

[llvm-branch-commits] [SPARC][MC] Add tests for VIS family instructions (PR #130967)

2025-03-12 Thread via llvm-branch-commits

https://github.com/koachan created 
https://github.com/llvm/llvm-project/pull/130967

Also fix up any mistakes/typos in instruction definitions.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [SPARC][MC] Add tests for VIS family instructions (PR #130967)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-sparc

Author: Koakuma (koachan)


Changes

Also fix up any mistakes/typos in instruction definitions.


---

Patch is 41.81 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130967.diff


7 Files Affected:

- (modified) llvm/lib/Target/Sparc/SparcInstrFormats.td (+15) 
- (modified) llvm/lib/Target/Sparc/SparcInstrInfo.td (+2) 
- (modified) llvm/lib/Target/Sparc/SparcInstrVIS.td (+64-42) 
- (added) llvm/test/MC/Disassembler/Sparc/sparc-vis.txt (+291) 
- (modified) llvm/test/MC/Sparc/sparc-vis.s (+225-3) 
- (added) llvm/test/MC/Sparc/sparc-vis2.s (+55) 
- (added) llvm/test/MC/Sparc/sparc-vis3.s (+133) 


``diff
diff --git a/llvm/lib/Target/Sparc/SparcInstrFormats.td 
b/llvm/lib/Target/Sparc/SparcInstrFormats.td
index 3939f4ed94276..7d32cd8e5671b 100644
--- a/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -201,6 +201,21 @@ class F3_3c opVal, bits<6> op3val, bits<9> opfval, 
dag outs, dag ins,
   let Inst{4-0}  = rs2;
 }
 
+// SIAM instruction
+class F3_3_siam opVal, bits<6> op3val, bits<9> opfval, dag outs, dag 
ins,
+   string asmstr, list pattern, InstrItinClass itin = NoItinerary>
+   : F3 {
+  bits<3> siam_mode;
+
+  let op = opVal;
+  let op3= op3val;
+  let rd = 0;
+  let rs1= 0;
+  let Inst{13-5} = opfval;   // fp opcode
+  let Inst{4-3}  = 0;
+  let Inst{2-0}  = siam_mode;
+}
+
 // Shift by register rs2.
 class F3_Sr opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
 string asmstr, list pattern, InstrItinClass itin = 
IIC_iu_instr>
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td 
b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index c3b1fdf14d73e..0e8f743a83d5b 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -82,6 +82,8 @@ def UseDeprecatedInsts : 
Predicate<"Subtarget->useV8DeprecatedInsts()">;
 // Instruction Pattern Stuff
 
//===--===//
 
+def siam_mode : PatLeaf<(imm), [{ return isUInt<3>(N->getZExtValue()); }]>;
+
 def simm10  : PatLeaf<(imm), [{ return isInt<10>(N->getSExtValue()); }]>;
 
 def simm11  : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td 
b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index bdefc70869d74..6d0f12da3afcf 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -12,71 +12,91 @@
 
 // VIS Instruction Format.
 class VISInstFormat opfval, dag outs, dag ins, string asmstr,
-  list pattern>
+list pattern = []>
   : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, pattern>;
 
-class VISInst opfval, string OpcStr, RegisterClass RC = DFPRegs>
+class VISInst opfval, string OpcStr, RegisterClass RC = DFPRegs,
+list pattern = []>
: VISInstFormat;
+!strconcat(OpcStr, " $rs1, $rs2, $rd"), pattern>;
 
 // VIS Instruction with integer destination register.
-class VISInstID opfval, string OpcStr>
+class VISInstID opfval, string OpcStr, list pattern = []>
: VISInstFormat;
+!strconcat(OpcStr, " $rs1, $rs2, $rd"), pattern>;
 
 // For VIS Instructions with no operand.
 let rd = 0, rs1 = 0, rs2 = 0 in
-class VISInst0 opfval, string asmstr>
-   : VISInstFormat;
+class VISInst0 opfval, string asmstr, list pattern = []>
+   : VISInstFormat;
 
 // For VIS Instructions with only rs1, rd operands.
 let rs2 = 0 in
-class VISInst1 opfval, string OpcStr, RegisterClass RC = DFPRegs>
+class VISInst1 opfval, string OpcStr, RegisterClass RC = DFPRegs,
+list pattern = []>
: VISInstFormat;
+!strconcat(OpcStr, " $rs1, $rd"), pattern>;
 
 // For VIS Instructions with only rs2, rd operands.
 let rs1 = 0 in
-class VISInst2 opfval, string OpcStr, RegisterClass RC = DFPRegs>
+class VISInst2 opfval, string OpcStr, RegisterClass RC = DFPRegs,
+list pattern = []>
: VISInstFormat;
+!strconcat(OpcStr, " $rs2, $rd"), pattern>;
 
 // For VIS Instructions with only rd operand.
 let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
-class VISInstD opfval, string OpcStr, RegisterClass RC = DFPRegs>
+class VISInstD opfval, string OpcStr, RegisterClass RC = DFPRegs,
+list pattern = []>
: VISInstFormat;
+!strconcat(OpcStr, " $rd"), pattern>;
 
 // VIS 1 Instructions
 let Predicates = [HasVIS] in {
 
 def FPADD16 : VISInst<0b00101, "fpadd16">;
-def FPADD16S: VISInst<0b001010001, "fpadd16s">;
+def FPADD16S: VISInst<0b001010001, "fpadd16s", FPRegs>;
 def FPADD32 : VISInst<0b001010010, "fpadd32">;
-def FPADD32S: VISInst<0b001010011, "fpadd32s">;
+def FPADD32S: VISInst<0b001010011, "fpadd32s", FPRegs>;
 def FPSUB16 : VISInst<0b001010100, "fpsub16">;
-def FPSUB16S: VISInst<0b001010101, "fpsub16S">;
+def FPSUB16S: VISInst<0b001010101, "fpsub16s",

[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-12 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak approved this pull request.

Thank you, LGTM!

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: undef deprecator found issues in your code. :warning:



You can test this locally with the following command:


``bash
git diff -U0 --pickaxe-regex -S 
'([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 
cc7cb76e135918ffb86ddfc193b1b66c0948e42c 
59d8dfc9eabae6b16cb28b1a05b9be087fec40b5 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
``




The following files introduce new uses of undef:
 - mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated 
and should only be used in the rare cases where no replacement is possible. For 
example, a load of uninitialized memory yields `undef`. You should use `poison` 
values for placeholders instead.

In tests, avoid using `undef` and having tests that trigger undefined behavior. 
If you need an operand with some unimportant value, you can add a new argument 
to the function and use that instead.

For example, this is considered a bad practice:
```llvm
define void @fn() {
  ...
  br i1 undef, ...
}
```

Please use the following instead:
```llvm
define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}
```

Please refer to the [Undefined Behavior 
Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.



https://github.com/llvm/llvm-project/pull/130078
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)

2025-03-12 Thread via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117037

>From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 12 Mar 2025 23:30:01 +
Subject: [PATCH] Fix EOF newlines.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 108446729d857..5832aa6754137 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -2,4 +2,4 @@
 // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
-// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"
\ No newline at end of file
+// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)

2025-03-12 Thread via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117037

>From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 12 Mar 2025 23:30:01 +
Subject: [PATCH] Fix EOF newlines.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 108446729d857..5832aa6754137 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -2,4 +2,4 @@
 // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
-// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"
\ No newline at end of file
+// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][CallGraphSection] Type id metadata for indirect calls (PR #117036)

2025-03-12 Thread via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117036


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace some undef uses in test metadata with poison (PR #131052)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/131052
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][CallGraphSection] Type id metadata for indirect calls (PR #117036)

2025-03-12 Thread via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117036


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make sqrt and rsq intrinsics propagate poison (PR #130914)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130914
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace some undef uses in test metadata with poison (PR #131052)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/131052
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make sqrt and rsq intrinsics propagate poison (PR #130914)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Mar 12, 10:54 PM EDT**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/130914).


https://github.com/llvm/llvm-project/pull/130914
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Mike Lothian via llvm-branch-commits

FireBurn wrote:

Will this make 20.1.1?

https://github.com/llvm/llvm-project/pull/130950
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Documenting Static Samplers binary representation (PR #131011)

2025-03-12 Thread via llvm-branch-commits

github-actions[bot] wrote:

⚠️ We detected that you are using a GitHub private e-mail address to contribute 
to the repo. Please turn off [Keep my email addresses 
private](https://github.com/settings/emails) setting in your account. See 
[LLVM 
Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it)
 for more information.

https://github.com/llvm/llvm-project/pull/131011
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Clang-tidy operator& hijacker. (PR #128366)

2025-03-12 Thread Mark de Wever via llvm-branch-commits

mordante wrote:

> This check should be in regular Clang Tidy, using of `std::addressof` actual 
> not only for libbcpp, any user-writted code might follow the guidline to use 
> `std::addressof` instead of `operator&` for a generic type. For example, the 
> Boost library: 
> https://github.com/boostorg/pfr/blob/f09e6aeae9d050897fff72b93d5f5e866cc5e11a/include/boost/pfr/detail/core_name20_static.hpp#L196
> 
> Please, look at the issue about it: #121172

As @philnik777 mentioned we like to add checks to our own plugin. Another 
benefit of our plugin is that the check is available in clang-tidy 19, 20, and 
HEAD and not just in HEAD.

Still I'm open to adding a similar check to the regular Clang Tidy. Then we can 
remove our own check once all libc++ supported clang-tidy versions have this 
check.





https://github.com/llvm/llvm-project/pull/128366
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Documenting Static Samplers binary representation (PR #131011)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: None (joaosaffran)


Changes

Closes: #131009 

---
Full diff: https://github.com/llvm/llvm-project/pull/131011.diff


1 Files Affected:

- (modified) llvm/docs/DirectX/DXContainer.rst (+45) 


``diff
diff --git a/llvm/docs/DirectX/DXContainer.rst 
b/llvm/docs/DirectX/DXContainer.rst
index 40c088462a452..8d32ad02d5e6a 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -612,3 +612,48 @@ RootDescriptorTable provides basic table structure:
 #. **NumDescriptorRanges**: Number of descriptor ranges
 #. **DescriptorRangesOffset**: Offset to descriptor range array
 
+Static Samplers
+~~~
+
+Static samplers provide a way to define fixed sampler states within the root 
signature itself.
+
+.. code-block:: cpp
+
+   struct StaticSamplerDesc {
+  FilterMode Filter;
+  TextureAddressMode AddressU;
+  TextureAddressMode AddressV;
+  TextureAddressMode AddressW;
+  float MipLODBias;
+  uint32_t MaxAnisotropy;
+  ComparisonFunc ComparisonFunc;
+  StaticBorderColor BorderColor;
+  float MinLOD;
+  float MaxLOD;
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  ShaderVisibility ShaderVisibility;
+   };
+
+
+The StaticSamplerDesc structure defines all properties of a static sampler:
+
+#. Filter: The filtering mode (e.g., point, linear, anisotropic) used for 
texture sampling. 
+   For details, check `Direct X documentation 
`_.
 
+#. AddressU: The addressing mode for the U texture coordinate.
+   For details, check `Direct X documentation 
`_.
 
+#. AddressV: The addressing mode for the V texture coordinate.
+#. AddressW: The addressing mode for the W texture coordinate.
+#. MipLODBias: Bias value applied to mipmap level of detail calculations.
+#. MaxAnisotropy: Maximum anisotropy level when using anisotropic filtering.
+#. ComparisonFunc: Comparison function used for comparison samplers.
+   For details, check `Direct X documentation 
`_.
 
+#. BorderColor: Predefined border color used when address mode is set to 
border.
+   For details, check `Direct X documentation 
`_.
 
+#. MinLOD: Minimum level of detail to use for sampling.
+#. MaxLOD: Maximum level of detail to use for sampling.
+#. ShaderRegister: The shader sampler register (s#) where this sampler is 
bound.
+#. RegisterSpace: The register space used for the binding.
+#. ShaderVisibility: Specifies which shader stages can access this sampler.
+   For details, check `Direct X documentation 
`_.
 
+

``




https://github.com/llvm/llvm-project/pull/131011
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Documenting Static Samplers binary representation (PR #131011)

2025-03-12 Thread via llvm-branch-commits

https://github.com/joaosaffran created 
https://github.com/llvm/llvm-project/pull/131011

Closes: #131009 

>From b13609de4e66c1b4574264b553594fab47a1b08f Mon Sep 17 00:00:00 2001
From: joaosaffran <126493771+joaosaff...@users.noreply.github.com>
Date: Wed, 12 Mar 2025 11:36:06 -0700
Subject: [PATCH] Adding Static Sampler Documentation

---
 llvm/docs/DirectX/DXContainer.rst | 45 +++
 1 file changed, 45 insertions(+)

diff --git a/llvm/docs/DirectX/DXContainer.rst 
b/llvm/docs/DirectX/DXContainer.rst
index 40c088462a452..8d32ad02d5e6a 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -612,3 +612,48 @@ RootDescriptorTable provides basic table structure:
 #. **NumDescriptorRanges**: Number of descriptor ranges
 #. **DescriptorRangesOffset**: Offset to descriptor range array
 
+Static Samplers
+~~~
+
+Static samplers provide a way to define fixed sampler states within the root 
signature itself.
+
+.. code-block:: cpp
+
+   struct StaticSamplerDesc {
+  FilterMode Filter;
+  TextureAddressMode AddressU;
+  TextureAddressMode AddressV;
+  TextureAddressMode AddressW;
+  float MipLODBias;
+  uint32_t MaxAnisotropy;
+  ComparisonFunc ComparisonFunc;
+  StaticBorderColor BorderColor;
+  float MinLOD;
+  float MaxLOD;
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  ShaderVisibility ShaderVisibility;
+   };
+
+
+The StaticSamplerDesc structure defines all properties of a static sampler:
+
+#. Filter: The filtering mode (e.g., point, linear, anisotropic) used for 
texture sampling. 
+   For details, check `Direct X documentation 
`_.
 
+#. AddressU: The addressing mode for the U texture coordinate.
+   For details, check `Direct X documentation 
`_.
 
+#. AddressV: The addressing mode for the V texture coordinate.
+#. AddressW: The addressing mode for the W texture coordinate.
+#. MipLODBias: Bias value applied to mipmap level of detail calculations.
+#. MaxAnisotropy: Maximum anisotropy level when using anisotropic filtering.
+#. ComparisonFunc: Comparison function used for comparison samplers.
+   For details, check `Direct X documentation 
`_.
 
+#. BorderColor: Predefined border color used when address mode is set to 
border.
+   For details, check `Direct X documentation 
`_.
 
+#. MinLOD: Minimum level of detail to use for sampling.
+#. MaxLOD: Maximum level of detail to use for sampling.
+#. ShaderRegister: The shader sampler register (s#) where this sampler is 
bound.
+#. RegisterSpace: The register space used for the binding.
+#. ShaderVisibility: Specifies which shader stages can access this sampler.
+   For details, check `Direct X documentation 
`_.
 
+

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC][Coro] Use CloneFunctionInto for coroutine cloning instead of CloneFunction (PR #129149)

2025-03-12 Thread Artem Pianykh via llvm-branch-commits

https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/129149

>From 2706a7d6132377d1f4f4d480a0a153394ab3394f Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 25 Feb 2025 12:42:14 -0800
Subject: [PATCH] [NFC][Coro] Use CloneFunctionInto for coroutine cloning
 instead of CloneFunction

Summary:
CloneFunctionInto now is fast on its own and we don't need to use
CloneFunctionAttributes/Metadata/Body separately.

CommonDebugInfo in CoroClone is now unused and is cleaned up separately
in the next diff in the stack.

Test Plan:
ninja check-all

stack-info: PR: https://github.com/llvm/llvm-project/pull/129149, branch: 
users/artempyanykh/fast-coro-upstream-part2-take2/7
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b2c4e64319725..fabbf5f020a74 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -921,14 +921,8 @@ void coro::BaseCloner::create() {
   auto savedLinkage = NewF->getLinkage();
   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
 
-  MetadataPredicate IdentityMD = [&](const Metadata *MD) {
-return CommonDebugInfo.contains(MD);
-  };
-  CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
-  CloneFunctionMetadataInto(*NewF, OrigF, VMap, RF_None, nullptr, nullptr,
-&IdentityMD);
-  CloneFunctionBodyInto(*NewF, OrigF, VMap, RF_None, Returns, "", nullptr,
-nullptr, nullptr, &IdentityMD);
+  CloneFunctionInto(NewF, &OrigF, VMap,
+CloneFunctionChangeType::LocalChangesOnly, Returns);
 
   auto &Context = NewF->getContext();
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/130950

>From 88143ce2f76b2022b39612ee508da5cd8fa879eb Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Sat, 8 Mar 2025 20:32:14 -0300
Subject: [PATCH] [clang] fix matching of nested template template parameters

When checking the template template parameters of template template
parameters, the PartialOrdering context was not correctly propagated.

This also has a few drive-by fixes, such as checking the template parameter
lists of template template parameters, which was previously missing and
would have been it's own bug, but we need to fix it in order to
prevent crashes in error recovery in a simple way.

Fixes #130362

Backport of: https://github.com/llvm/llvm-project/pull/130447
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 clang/include/clang/Sema/Sema.h   |  8 +++--
 clang/lib/Sema/SemaDecl.cpp   |  2 +-
 clang/lib/Sema/SemaDeclCXX.cpp|  2 +-
 clang/lib/Sema/SemaTemplate.cpp   | 36 ---
 clang/lib/Sema/SemaTemplateDeduction.cpp  | 16 +
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  2 +-
 clang/test/SemaTemplate/cwg2398.cpp   | 22 ++--
 .../SemaTemplate/temp_arg_template_p0522.cpp  |  3 +-
 clang/unittests/AST/DeclPrinterTest.cpp   | 16 -
 10 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 57a567509a068..18f792c1e1c9e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1058,6 +1058,9 @@ Bug Fixes to C++ Support
 - Fixed a substitution bug in transforming CTAD aliases when the type alias 
contains a non-pack template argument
   corresponding to a pack parameter (#GH124715)
 - Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
+- Fixes matching of nested template template parameters. (#GH130362)
+- Correctly diagnoses template template paramters which have a pack parameter
+  not in the last position.
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a30a7076ea5d4..06fc280734e91 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11280,14 +11280,16 @@ class Sema final : public SemaBase {
 
   /// The context in which we are checking a template parameter list.
   enum TemplateParamListContext {
-TPC_ClassTemplate,
-TPC_VarTemplate,
+// For this context, Class, Variable, TypeAlias, and non-pack Template
+// Template Parameters are treated uniformly.
+TPC_Other,
+
 TPC_FunctionTemplate,
 TPC_ClassTemplateMember,
 TPC_FriendClassTemplate,
 TPC_FriendFunctionTemplate,
 TPC_FriendFunctionTemplateDefinition,
-TPC_TypeAliasTemplate
+TPC_TemplateTemplateParameterPack,
   };
 
   /// Checks the validity of a template parameter list, possibly
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 01f09aba8c2ad..2b34bde63fdd8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8145,7 +8145,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
   (D.getCXXScopeSpec().isSet() && DC && DC->isRecord() &&
DC->isDependentContext())
   ? TPC_ClassTemplateMember
-  : TPC_VarTemplate))
+  : TPC_Other))
 NewVD->setInvalidDecl();
 
   // If we are providing an explicit specialization of a static variable
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index e4e3bbad1f520..85de46c9adab4 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13533,7 +13533,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, 
AccessSpecifier AS,
 // Merge any previous default template arguments into our parameters,
 // and check the parameter list.
 if (CheckTemplateParameterList(TemplateParams, OldTemplateParams,
-   TPC_TypeAliasTemplate))
+   TPC_Other))
   return nullptr;
 
 TypeAliasTemplateDecl *NewDecl =
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 938671055333c..1c555b38277b0 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1591,8 +1591,16 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter(
   assert(S->isTemplateParamScope() &&
  "Template template parameter not in template parameter scope!");
 
-  // Construct the parameter object.
   bool IsParameterPack = EllipsisLoc.isValid();
+
+  bool Invalid = false;
+  if (CheckTemplateParameterList(
+  Params,
+  /*OldParams=*/nullptr,
+  IsParameterPack ? TPC_TemplateTemplateParameterPack : TPC_Other))
+Invalid = true;
+
+  // Construct the parameter object.
   TemplateTemplateParmDecl *

[llvm-branch-commits] [llvm] AMDGPU: Replace <4 x i32> undef uses in tests with poison (PR #130902)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130902

>From 8535bb8a383b08ddaeb6f8220d25d1722b3a48cf Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:21:20 +0700
Subject: [PATCH] AMDAMDGPU: Replace <4 x i32> undef uses in tests with poison

Most of these are from resource descriptors.
---
 .../AMDGPU/adjust-writemask-invalid-copy.ll   |  10 +-
 llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll|   2 +-
 llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll  |   4 +-
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll |  34 ++---
 llvm/test/CodeGen/AMDGPU/else.ll  |   2 +-
 .../AMDGPU/hsa-metadata-from-llvm-ir-full.ll  |   2 +-
 .../ipra-return-address-save-restore.ll   |   2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |   4 +-
 .../AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll   |   2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll |  44 +++---
 .../llvm.amdgcn.struct.buffer.atomic.ll   |   2 +-
 .../CodeGen/AMDGPU/mixed-wave32-wave64.ll |   2 +-
 .../AMDGPU/scheduler-subrange-crash.ll|  24 ++--
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/si-spill-cf.ll   | 134 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll  |   2 +-
 llvm/test/CodeGen/AMDGPU/split-smrd.ll|   4 +-
 .../AMDGPU/splitkit-getsubrangeformask.ll |  62 
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll  |   2 +-
 .../CodeGen/AMDGPU/subreg-eliminate-dead.ll   |   2 +-
 .../AMDGPU/undefined-subreg-liverange.ll  |   2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll  |  12 +-
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll|   2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll   |  30 ++--
 26 files changed, 199 insertions(+), 199 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index 7e5a5302ac2e1..b913b5c3ab746 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %t

[llvm-branch-commits] [llvm] AMDGPU: Replace ptr addrspace(8) undef uses with poison (PR #130904)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130904

>From 1fe990ca191a47a9f111a1897369729031164ff1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:24:50 +0700
Subject: [PATCH] AMDGPU: Replace ptr addrspace(8) undef uses with poison

---
 llvm/test/CodeGen/AMDGPU/amdpal.ll|   2 +-
 .../CodeGen/AMDGPU/combine-add-zext-xor.ll|  12 +-
 llvm/test/CodeGen/AMDGPU/else.ll  |   2 +-
 .../AMDGPU/extract_subvector_vec4_vec3.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll   |   2 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.ll  |   2 +-
 .../llvm.amdgcn.struct.ptr.buffer.atomic.ll   |   2 +-
 .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll |   6 +-
 .../lower-work-group-id-intrinsics-hsa.ll |   2 +-
 .../lower-work-group-id-intrinsics-pal.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/merge-store-crash.ll |   2 +-
 .../test/CodeGen/AMDGPU/merge-store-usedef.ll |   2 +-
 .../AMDGPU/required-export-priority.ll|   2 +-
 .../AMDGPU/si-triv-disjoint-mem-access.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll|   8 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll   | 110 +-
 16 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll 
b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 171df029615ed..fd9227d2f4319 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -72,7 +72,7 @@ entry:
   %e = getelementptr [2 x i32], ptr addrspace(5) %v1, i32 0, i32 %idx
   %x = load i32, ptr addrspace(5) %e
   %xf = bitcast i32 %x to float
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) 
undef, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %xf, ptr addrspace(8) 
poison, i32 0, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll 
b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index b42542db6dbd8..f8227f0039af7 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -66,7 +66,7 @@ define i32 @combine_add_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -146,7 +146,7 @@ define i32 @combine_sub_zext_xor() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -229,7 +229,7 @@ define i32 @combine_add_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -313,7 +313,7 @@ define i32 @combine_sub_zext_or() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -392,7 +392,7 @@ define i32 @combine_add_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
@@ -471,7 +471,7 @@ define i32 @combine_sub_zext_and() {
   br i1 undef, label %bb9, label %bb
 
 bb:   ; preds = %.a
-  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) undef, 
i32 %.2, i32 64, i32 1)
+  %.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) 
poison, i32 %.2, i32 64, i32 1)
   %i5 = icmp eq i32 %.i3, 0
   br label %bb9
 
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 4a3018e67b17d..884f5305407a1 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -47,7 +47,7 @@ else:
 
 end:
   %r = phi float [ %v.if, %if ], [ %v.else, %else ]
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(f

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130903

>From aeb2f61c061c99871f66e2b1173ea7c25a23c0d5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:23:32 +0700
Subject: [PATCH] AMDGPU: Replace <8 x i32> undef uses in tests with poison

---
 .../AMDGPU/adjust-writemask-invalid-copy.ll| 10 +-
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 14 +++---
 llvm/test/CodeGen/AMDGPU/else.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll  |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |  4 ++--
 .../test/CodeGen/AMDGPU/mixed-wave32-wave64.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll  |  2 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll   |  4 ++--
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll   |  2 +-
 .../AMDGPU/undefined-subreg-liverange.ll   |  2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll   | 12 ++--
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll|  6 +++---
 llvm/test/CodeGen/AMDGPU/wave32.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll| 18 +-
 16 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index b913b5c3ab746..dd85edf59b18f 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll 
b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..1e40b4c9f04cf 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 
undef, <8 

[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130903
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov created 
https://github.com/llvm/llvm-project/pull/130950

When checking the template template parameters of template template parameters, 
the PartialOrdering context was not correctly propagated.

This also has a few drive-by fixes, such as checking the template parameter 
lists of template template parameters, which was previously missing and would 
have been it's own bug, but we need to fix it in order to prevent crashes in 
error recovery in a simple way.

Fixes #130362

Backport of: https://github.com/llvm/llvm-project/pull/130447

>From ffed7fe255d7fa612367d9c128d71f204c5cd9e6 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Sat, 8 Mar 2025 20:32:14 -0300
Subject: [PATCH] [clang] fix matching of nested template template parameters

When checking the template template parameters of template template
parameters, the PartialOrdering context was not correctly propagated.

This also has a few drive-by fixes, such as checking the template parameter
lists of template template parameters, which was previously missing and
would have been it's own bug, but we need to fix it in order to
prevent crashes in error recovery in a simple way.

Fixes #130362

Backport of: https://github.com/llvm/llvm-project/pull/130447
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 clang/include/clang/Sema/Sema.h   |  8 +++--
 clang/lib/Sema/SemaDecl.cpp   |  2 +-
 clang/lib/Sema/SemaDeclCXX.cpp|  2 +-
 clang/lib/Sema/SemaTemplate.cpp   | 36 ---
 clang/lib/Sema/SemaTemplateDeduction.cpp  | 16 +
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  2 +-
 clang/test/SemaTemplate/cwg2398.cpp   | 19 --
 .../SemaTemplate/temp_arg_template_p0522.cpp  |  3 +-
 clang/unittests/AST/DeclPrinterTest.cpp   | 16 -
 10 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 57a567509a068..18f792c1e1c9e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1058,6 +1058,9 @@ Bug Fixes to C++ Support
 - Fixed a substitution bug in transforming CTAD aliases when the type alias 
contains a non-pack template argument
   corresponding to a pack parameter (#GH124715)
 - Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
+- Fixes matching of nested template template parameters. (#GH130362)
+- Correctly diagnoses template template paramters which have a pack parameter
+  not in the last position.
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a30a7076ea5d4..06fc280734e91 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11280,14 +11280,16 @@ class Sema final : public SemaBase {
 
   /// The context in which we are checking a template parameter list.
   enum TemplateParamListContext {
-TPC_ClassTemplate,
-TPC_VarTemplate,
+// For this context, Class, Variable, TypeAlias, and non-pack Template
+// Template Parameters are treated uniformly.
+TPC_Other,
+
 TPC_FunctionTemplate,
 TPC_ClassTemplateMember,
 TPC_FriendClassTemplate,
 TPC_FriendFunctionTemplate,
 TPC_FriendFunctionTemplateDefinition,
-TPC_TypeAliasTemplate
+TPC_TemplateTemplateParameterPack,
   };
 
   /// Checks the validity of a template parameter list, possibly
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 01f09aba8c2ad..2b34bde63fdd8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8145,7 +8145,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
   (D.getCXXScopeSpec().isSet() && DC && DC->isRecord() &&
DC->isDependentContext())
   ? TPC_ClassTemplateMember
-  : TPC_VarTemplate))
+  : TPC_Other))
 NewVD->setInvalidDecl();
 
   // If we are providing an explicit specialization of a static variable
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index e4e3bbad1f520..85de46c9adab4 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13533,7 +13533,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, 
AccessSpecifier AS,
 // Merge any previous default template arguments into our parameters,
 // and check the parameter list.
 if (CheckTemplateParameterList(TemplateParams, OldTemplateParams,
-   TPC_TypeAliasTemplate))
+   TPC_Other))
   return nullptr;
 
 TypeAliasTemplateDecl *NewDecl =
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 938671055333c..1c555b38277b0 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1591,8 +1591,16 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter(
  

[llvm-branch-commits] [clang] Backport: [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/130950
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov milestoned 
https://github.com/llvm/llvm-project/pull/130950
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace insertelement undef with poison in cases with manual updates (PR #130898)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130898
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make sqrt and rsq intrinsics propagate poison (PR #130914)

2025-03-12 Thread Shilei Tian via llvm-branch-commits


@@ -548,6 +548,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   case Intrinsic::amdgcn_sqrt:
   case Intrinsic::amdgcn_rsq: {
 Value *Src = II.getArgOperand(0);
+if (isa(Src))
+  return IC.replaceInstUsesWith(II, Src);

shiltian wrote:

Why does `undef` give `QNaN` while `poison` give `poison`?

https://github.com/llvm/llvm-project/pull/130914
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace tests using undef in shufflevector with poison (PR #130899)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Mar 12, 9:29 AM EDT**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/130899).


https://github.com/llvm/llvm-project/pull/130899
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace insertelement undef with poison in cases with manual updates (PR #130898)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Mar 12, 9:29 AM EDT**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/130898).


https://github.com/llvm/llvm-project/pull/130898
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDAMDGPU: Replace <4 x i32> undef uses in tests with poison (PR #130902)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130902

>From bee7d9e4e6758fae67207f52917cb4a07e188400 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 13:21:20 +0700
Subject: [PATCH] AMDAMDGPU: Replace <4 x i32> undef uses in tests with poison

Most of these are from resource descriptors.
---
 .../AMDGPU/adjust-writemask-invalid-copy.ll   |  10 +-
 llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll|   2 +-
 llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll  |   4 +-
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll |  34 ++---
 llvm/test/CodeGen/AMDGPU/else.ll  |   2 +-
 .../AMDGPU/hsa-metadata-from-llvm-ir-full.ll  |   2 +-
 .../ipra-return-address-save-restore.ll   |   2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll |   4 +-
 .../AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll   |   2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll |  44 +++---
 .../llvm.amdgcn.struct.buffer.atomic.ll   |   2 +-
 .../CodeGen/AMDGPU/mixed-wave32-wave64.ll |   2 +-
 .../AMDGPU/scheduler-subrange-crash.ll|  24 ++--
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/si-spill-cf.ll   | 134 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll  |   2 +-
 llvm/test/CodeGen/AMDGPU/split-smrd.ll|   4 +-
 .../AMDGPU/splitkit-getsubrangeformask.ll |  62 
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll  |   2 +-
 .../CodeGen/AMDGPU/subreg-eliminate-dead.ll   |   2 +-
 .../AMDGPU/undefined-subreg-liverange.ll  |   2 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll  |  12 +-
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll|   2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll   |  30 ++--
 26 files changed, 199 insertions(+), 199 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll 
b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index 7e5a5302ac2e1..b913b5c3ab746 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
 ; GCN: buffer_store_dword v0
 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
-  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> 
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
 
 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
-  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
+  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float 
undef, <8 x i32> undef, <4 x i32> poison, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
   %tmp2 = shufflevector <4 x i32> %t

[llvm-branch-commits] [llvm] AMDGPU: Replace insertelement undef with poison in cases with manual updates (PR #130898)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/130898

>From bcc5ce87aef8461ae508223544291187b5cb1fbd Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 12 Mar 2025 12:32:31 +0700
Subject: [PATCH] AMDGPU: Replace insertelement undef with poison in cases with
 manual updates

I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly 
sensitive
to undef vs. poison.
---
 llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 16 +++---
 .../AMDGPU/promote-alloca-array-aggregate.ll  |  6 ++---
 .../AMDGPU/promote-alloca-loadstores.ll   | 22 +--
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll 
b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index e72f3d3ce993a..d48b75a666db7 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -238,7 +238,7 @@ define <2 x half> 
@v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %ins0 = insertelement <2 x half> undef, half %lo, i32 0
+  %ins0 = insertelement <2 x half> poison, half %lo, i32 0
   %ins1 = insertelement <2 x half> %ins0, half %hi, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
   ret <2 x half> %canonicalized
@@ -2581,7 +2581,7 @@ define <2 x half> 
@v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 0
+  %vec = insertelement <2 x half> poison, half %val, i32 0
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2622,7 +2622,7 @@ define <2 x half> 
@v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <2 x half> undef, half %val, i32 1
+  %vec = insertelement <2 x half> poison, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
   ret <2 x half> %canonicalized
 }
@@ -2785,7 +2785,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 2.0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half %val, i32 0
+  %vec0 = insertelement <2 x half> poison, half %val, i32 0
   %vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2829,7 +2829,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half 
%val) #1 {
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, 2.0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <2 x half> undef, half 2.0, i32 0
+  %vec0 = insertelement <2 x half> poison, half 2.0, i32 0
   %vec1 = insertelement <2 x half> %vec0, half %val, i32 1
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
   ret <2 x half> %canonicalized
@@ -2925,7 +2925,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec = insertelement <4 x half> undef, half %val, i32 0
+  %vec = insertelement <4 x half> poison, half %val, i32 0
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec)
   ret <4 x half> %canonicalized
 }
@@ -2977,7 +2977,7 @@ define <4 x half> 
@v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
 ; GFX11-FAKE16-NEXT:s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 1
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1)
   ret <4 x half> %canonicalized
@@ -3035,7 +3035,7 @@ define <4 x half> 
@v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
 ; GFX11-FAKE16-NEXT:v_pack_b32_f16 v0, v0, 0
 ; GFX11-FAKE16-NEXT:v_pk_max_f16 v1, v1, v1
 ; GFX11-FAKE16-NEXT:s_setpc_b64 s[30:31]
-  %vec0 = insertelement <4 x half> undef, half %val0, i32 0
+  %vec0 = insertelement <4 x half> poison, half %val0, i32 0
   %vec1 = insertelement <4 x half> %vec0, half %val1, i32 2
   %vec2 = in

[llvm-branch-commits] [clang] Backport: [clang] fix matching of nested template template parameters (PR #130950)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Matheus Izvekov (mizvekov)


Changes

When checking the template template parameters of template template parameters, 
the PartialOrdering context was not correctly propagated.

This also has a few drive-by fixes, such as checking the template parameter 
lists of template template parameters, which was previously missing and would 
have been it's own bug, but we need to fix it in order to prevent crashes in 
error recovery in a simple way.

Fixes #130362

Backport of: https://github.com/llvm/llvm-project/pull/130447

---
Full diff: https://github.com/llvm/llvm-project/pull/130950.diff


10 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+3) 
- (modified) clang/include/clang/Sema/Sema.h (+5-3) 
- (modified) clang/lib/Sema/SemaDecl.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaDeclCXX.cpp (+1-1) 
- (modified) clang/lib/Sema/SemaTemplate.cpp (+24-12) 
- (modified) clang/lib/Sema/SemaTemplateDeduction.cpp (+9-7) 
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+1-1) 
- (modified) clang/test/SemaTemplate/cwg2398.cpp (+7-12) 
- (modified) clang/test/SemaTemplate/temp_arg_template_p0522.cpp (+2-1) 
- (modified) clang/unittests/AST/DeclPrinterTest.cpp (+8-8) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 57a567509a068..18f792c1e1c9e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1058,6 +1058,9 @@ Bug Fixes to C++ Support
 - Fixed a substitution bug in transforming CTAD aliases when the type alias 
contains a non-pack template argument
   corresponding to a pack parameter (#GH124715)
 - Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
+- Fixes matching of nested template template parameters. (#GH130362)
+- Correctly diagnoses template template paramters which have a pack parameter
+  not in the last position.
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a30a7076ea5d4..06fc280734e91 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11280,14 +11280,16 @@ class Sema final : public SemaBase {
 
   /// The context in which we are checking a template parameter list.
   enum TemplateParamListContext {
-TPC_ClassTemplate,
-TPC_VarTemplate,
+// For this context, Class, Variable, TypeAlias, and non-pack Template
+// Template Parameters are treated uniformly.
+TPC_Other,
+
 TPC_FunctionTemplate,
 TPC_ClassTemplateMember,
 TPC_FriendClassTemplate,
 TPC_FriendFunctionTemplate,
 TPC_FriendFunctionTemplateDefinition,
-TPC_TypeAliasTemplate
+TPC_TemplateTemplateParameterPack,
   };
 
   /// Checks the validity of a template parameter list, possibly
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 01f09aba8c2ad..2b34bde63fdd8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8145,7 +8145,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
   (D.getCXXScopeSpec().isSet() && DC && DC->isRecord() &&
DC->isDependentContext())
   ? TPC_ClassTemplateMember
-  : TPC_VarTemplate))
+  : TPC_Other))
 NewVD->setInvalidDecl();
 
   // If we are providing an explicit specialization of a static variable
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index e4e3bbad1f520..85de46c9adab4 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13533,7 +13533,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, 
AccessSpecifier AS,
 // Merge any previous default template arguments into our parameters,
 // and check the parameter list.
 if (CheckTemplateParameterList(TemplateParams, OldTemplateParams,
-   TPC_TypeAliasTemplate))
+   TPC_Other))
   return nullptr;
 
 TypeAliasTemplateDecl *NewDecl =
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 938671055333c..1c555b38277b0 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1591,8 +1591,16 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter(
   assert(S->isTemplateParamScope() &&
  "Template template parameter not in template parameter scope!");
 
-  // Construct the parameter object.
   bool IsParameterPack = EllipsisLoc.isValid();
+
+  bool Invalid = false;
+  if (CheckTemplateParameterList(
+  Params,
+  /*OldParams=*/nullptr,
+  IsParameterPack ? TPC_TemplateTemplateParameterPack : TPC_Other))
+Invalid = true;
+
+  // Construct the parameter object.
   TemplateTemplateParmDecl *Param = TemplateTemplateParmDecl::Create(
   Context, Context.getTranslationUnitDecl(),
   NameLoc.isInvalid() ? TmpLoc : NameLoc, Depth, Position, IsParamet

[llvm-branch-commits] [llvm] AMDAMDGPU: Replace <4 x i32> undef uses in tests with poison (PR #130902)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130902
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDAMDGPU: Replace <4 x i32> undef uses in tests with poison (PR #130902)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130902
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace ptr addrspace(8) undef uses with poison (PR #130904)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130904
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace ptr addrspace(8) undef uses with poison (PR #130904)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130904
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace ptr addrspace(1) undefs with poison (PR #130900)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130900
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace tests using undef in shufflevector with poison (PR #130899)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130899
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace tests using undef in shufflevector with poison (PR #130899)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130899
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace <4 x i32> undef uses in tests with poison (PR #130902)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130902
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Replace <8 x i32> undef uses in tests with poison (PR #130903)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/130903
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [MemCpyOpt] Fix clobber check in fca2memcpy optimization (PR #130964)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport 5da9044c40840187330526ca888290a95927a629

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/130964.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (+13-11) 
- (modified) llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll (+33-7) 


``diff
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp 
b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a80a85f38e74d..971d6012f6129 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -638,17 +638,19 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, 
LoadInst *LI,
   (EnableMemCpyOptWithoutLibcalls ||
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove {
 MemoryLocation LoadLoc = MemoryLocation::get(LI);
-MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
-   *StoreAccess = MSSA->getMemoryAccess(SI);
-
-// We use MSSA to check if an instruction may store to the memory we load
-// from in between the load and the store. If such an instruction is found,
-// we try to promote there instead of at the store position.
-auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-StoreAccess->getDefiningAccess(), LoadLoc, BAA);
-Instruction *P = MSSA->dominates(LoadAccess, Clobber)
- ? cast(Clobber)->getMemoryInst()
- : SI;
+
+// We use alias analysis to check if an instruction may store to
+// the memory we load from in between the load and the store. If
+// such an instruction is found, we try to promote there instead
+// of at the store position.
+// TODO: Can use MSSA for this.
+Instruction *P = SI;
+for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+  if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
+P = &I;
+break;
+  }
+}
 
 // If we found an instruction that may write to the loaded memory,
 // we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll 
b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 61e349e01ed91..7d4557aa331c4 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
 
 define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr 
align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, 
i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 
16, i1 false)
 ; CHECK-NEXT:ret void
 ;
   %1 = load %S, ptr %src
@@ -79,9 +79,9 @@ define void @copyalias(ptr %src, ptr %dst) {
 ; sure we lift the computation as well if needed and possible.
 define void @addrproducer(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @addrproducer(
-; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
+; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1
 ; CHECK-NEXT:call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 
undef, i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 undef, 
i64 16, i1 false)
 ; CHECK-NEXT:ret void
 ;
   %1 = load %S, ptr %src
@@ -113,8 +113,8 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias 
%dst, ptr noalias %dstidp
 ; CHECK-NEXT:[[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
 ; CHECK-NEXT:[[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
 ; CHECK-NEXT:[[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 
[[DSTINDEX]]
-; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 
undef, i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr 
align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, 
i64 16, i1 false)
 ; CHECK-NEXT:ret void
 ;
   %1 = load %S, ptr %src
@@ -130,7 +130,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @throwing_call(
 ; CHECK-NEXT:[[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
 ; CHECK-NEXT:call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 
16, i1 false)
-; CHECK-NEXT:call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT:call void @call() #[[ATTR2:[0

[llvm-branch-commits] [llvm] [SPARC][MC] Add tests for VIS family instructions (PR #130967)

2025-03-12 Thread via llvm-branch-commits

https://github.com/koachan updated 
https://github.com/llvm/llvm-project/pull/130967

>From e2e0d44800b65a8fbddd6234c2ee9f83af92d7da Mon Sep 17 00:00:00 2001
From: Koakuma 
Date: Wed, 12 Mar 2025 21:14:42 +0700
Subject: [PATCH] Add missing NO-VIS lines

Created using spr 1.3.5
---
 llvm/test/MC/Sparc/sparc-vis.s | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/MC/Sparc/sparc-vis.s b/llvm/test/MC/Sparc/sparc-vis.s
index bf01da19293d0..bed901b6a7272 100644
--- a/llvm/test/MC/Sparc/sparc-vis.s
+++ b/llvm/test/MC/Sparc/sparc-vis.s
@@ -199,28 +199,39 @@ fcmpeq16 %f0, %f2, %o0
 ! VIS: fcmpeq32 %f0, %f2, %o0  ! encoding: 
[0x91,0xb0,0x05,0xc2]
 fcmpeq32 %f0, %f2, %o0
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge8 %o0, %o1, %o2 ! encoding: 
[0x95,0xb2,0x00,0x09]
 edge8 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge8l %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x00,0x49]
 edge8l %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge16 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x00,0x89]
 edge16 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge16l %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x00,0xc9]
 edge16l %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge32 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x01,0x09]
 edge32 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: edge32l %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x01,0x49]
 edge32l %o0, %o1, %o2
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: pdist %f0, %f2, %f4 ! encoding: 
[0x89,0xb0,0x07,0xc2]
 pdist %f0, %f2, %f4
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array8 %o0, %o1, %o2! encoding: 
[0x95,0xb2,0x02,0x09]
 array8 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array16 %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x02,0x49]
 array16 %o0, %o1, %o2
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: array32 %o0, %o1, %o2   ! encoding: 
[0x95,0xb2,0x02,0x89]
 array32 %o0, %o1, %o2
 
+! NO-VIS: error: instruction requires a CPU feature not currently enabled
 ! VIS: shutdown! encoding: 
[0x81,0xb0,0x10,0x00]
 shutdown

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Backport: [BPF] Fix BitCast Assertion with NonZero AddrSpace (PR #130995)

2025-03-12 Thread via llvm-branch-commits

https://github.com/yonghong-song milestoned 
https://github.com/llvm/llvm-project/pull/130995
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Backport: [BPF] Fix BitCast Assertion with NonZero AddrSpace (PR #130995)

2025-03-12 Thread via llvm-branch-commits

https://github.com/yonghong-song created 
https://github.com/llvm/llvm-project/pull/130995

Alexei reported a bpf selftest failure with recent llvm for bpf prog
file progs/arena_spin_lock.c. The failure only happens when clang is
built with cmake option LLVM_ENABLE_ASSERTIONS=ON.

The error message looks like:
```
 clang: /home/yhs/work/yhs/llvm-project/llvm/lib/IR/Instructions.cpp:3460:
   llvm::BitCastInst::BitCastInst(Value *, Type *, const Twine &, 
InsertPosition):
   Assertion `castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"' failed.
```
Further investigation shows that the problem is triggered in
  BPF/BPFAbstractMemberAccess.cpp
for code
```
  auto *BCInst =
  new BitCastInst(Base, PointerType::getUnqual(BB->getContext()));
```
For the above BitCastInst, Since 'Base' has non-zero AddrSapce, the
compiler expects the type also has the same AddrSpace. But the above
PointerType::getUnqual(...) does not have AddrSpace and hence causes the
assertion failure.

Providing the proper AddrSpace for the BitCast type fixed the issue.

Backport of: https://github.com/llvm/llvm-project/pull/130722

>From 9d7369b1c4f1b40d5e3a2e69616f79aad3a5 Mon Sep 17 00:00:00 2001
From: yonghong-song 
Date: Tue, 11 Mar 2025 11:23:53 -0700
Subject: [PATCH] [BPF] Fix BitCast Assertion with NonZero AddrSpace

Alexei reported a bpf selftest failure with recent llvm for bpf prog
file progs/arena_spin_lock.c. The failure only happens when clang is
built with cmake option LLVM_ENABLE_ASSERTIONS=ON.

The error message looks like:
```
 clang: /home/yhs/work/yhs/llvm-project/llvm/lib/IR/Instructions.cpp:3460:
   llvm::BitCastInst::BitCastInst(Value *, Type *, const Twine &, 
InsertPosition):
   Assertion `castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"' failed.
```
Further investigation shows that the problem is triggered in
  BPF/BPFAbstractMemberAccess.cpp
for code
```
  auto *BCInst =
  new BitCastInst(Base, PointerType::getUnqual(BB->getContext()));
```
For the above BitCastInst, Since 'Base' has non-zero AddrSapce, the
compiler expects the type also has the same AddrSpace. But the above
PointerType::getUnqual(...) does not have AddrSpace and hence causes the
assertion failure.

Providing the proper AddrSpace for the BitCast type fixed the issue.

Co-authored-by: Yonghong Song 
(cherry picked from commit 5686786c550c6da6d1169b9bffc31cece1161902)
---
 .../Target/BPF/BPFAbstractMemberAccess.cpp|  5 +-
 llvm/test/CodeGen/BPF/CORE/arena_bitcast.ll   | 80 +++
 2 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/CORE/arena_bitcast.ll

diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp 
b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 646d57770164a..77ed246edbadf 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -1113,8 +1113,9 @@ bool BPFAbstractMemberAccess::transformGEPChain(CallInst 
*Call,
   Call->getIterator());
 
   // Generate a BitCast
-  auto *BCInst =
-  new BitCastInst(Base, PointerType::getUnqual(BB->getContext()));
+  auto *BCInst = new BitCastInst(
+  Base, PointerType::get(BB->getContext(),
+ Base->getType()->getPointerAddressSpace()));
   BCInst->insertBefore(Call->getIterator());
 
   // Generate a GetElementPtr
diff --git a/llvm/test/CodeGen/BPF/CORE/arena_bitcast.ll 
b/llvm/test/CodeGen/BPF/CORE/arena_bitcast.ll
new file mode 100644
index 0..bcd71c04d264d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/CORE/arena_bitcast.ll
@@ -0,0 +1,80 @@
+; RUN: opt -O2 %s | llvm-dis > %t1
+; RUN: llc -mcpu=v3 -filetype=asm -o - %t1 | FileCheck %s
+; Source code:
+;   struct lock_t {
+; int counter;
+;   } __attribute__((preserve_access_index));
+;
+;   #define __arena __attribute__((address_space(1)))
+;   int test(struct lock_t __arena *lock, unsigned val)
+;   {
+; return __sync_val_compare_and_swap((&lock->counter), val, 1);
+;   }
+; Compilation flag:
+;   clang -target bpf -O2 -g -S -emit-llvm -Xclang -disable-llvm-passes 
arena_bitcast.c
+
+target triple = "bpf"
+
+%struct.lock_t = type { i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @test(ptr addrspace(1) noundef %lock, i32 noundef %val) 
#0 !dbg !7 {
+entry:
+  %lock.addr = alloca ptr addrspace(1), align 8
+  %val.addr = alloca i32, align 4
+  store ptr addrspace(1) %lock, ptr %lock.addr, align 8, !tbaa !19
+#dbg_declare(ptr %lock.addr, !17, !DIExpression(), !24)
+  store i32 %val, ptr %val.addr, align 4, !tbaa !25
+#dbg_declare(ptr %val.addr, !18, !DIExpression(), !27)
+  %0 = load ptr addrspace(1), ptr %lock.addr, align 8, !dbg !28, !tbaa !19
+  %1 = call ptr addrspace(1) @llvm.preserve.struct.access.index.p1.p1(ptr 
addrspace(1) elementtype(%struct.lock_t) %0, i32 0, i32 0), !dbg !29, 
!llvm.preserve.access.index !12
+  %2 = load i32, ptr %val.addr, align 4, !dbg !

[llvm-branch-commits] [libcxx] [libc++] Clang-tidy operator& hijacker. (PR #128366)

2025-03-12 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 requested changes to this pull request.

@denzor200 It's a lot easier for us to add a libc++-speicific clang-tidy check 
than a general one. libc++ checks have significantly lower quality 
requirements, since they only have to work for libc++ (making "seems to work 
fine" good enough). If anybody wants to make this a general check they're 
welcome, but it's certainly more effort.

https://github.com/llvm/llvm-project/pull/128366
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Clang-tidy operator& hijacker. (PR #128366)

2025-03-12 Thread Nikolas Klauser via llvm-branch-commits


@@ -0,0 +1,47 @@
+//===--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "clang-tidy/ClangTidyCheck.h"
+#include "clang-tidy/ClangTidyModuleRegistry.h"
+#include "clang/Tooling/FixIt.h"
+
+#include "robust_against_operator_ampersand.hpp"
+
+// This clang-tidy check ensures that we don't use operator& on dependant
+// types. If the type is user supplied it may call the type's operator&.
+// Instead use std::addressof.

philnik777 wrote:

This should go into the coding guidelines instead.

https://github.com/llvm/llvm-project/pull/128366
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Clang-tidy operator& hijacker. (PR #128366)

2025-03-12 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 edited 
https://github.com/llvm/llvm-project/pull/128366
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Move disabling of arg verification to before isFullyInternal(). (#130693) (PR #130998)

2025-03-12 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130998
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Move disabling of arg verification to before isFullyInternal(). (#130693) (PR #130998)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:

@uweigand What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/130998
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Move disabling of arg verification to before isFullyInternal(). (#130693) (PR #130998)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-systemz

Author: None (llvmbot)


Changes

Backport 378739f18208165f9831571a57f34d82f6663bc6

Requested by: @uweigand

---
Full diff: https://github.com/llvm/llvm-project/pull/130998.diff


1 Files Affected:

- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+10-5) 


``diff
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 1fb31c26e20d3..2b8269e440e90 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -10231,6 +10231,11 @@ static void printFunctionArgExts(const Function *F, 
raw_fd_ostream &OS) {
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Call(const SmallVectorImpl &Outs,
  const Function *F, SDValue Callee) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+return;
+
   bool IsInternal = false;
   const Function *CalleeFn = nullptr;
   if (auto *G = dyn_cast(Callee))
@@ -10252,6 +10257,11 @@ verifyNarrowIntegerArgs_Call(const 
SmallVectorImpl &Outs,
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Ret(const SmallVectorImpl &Outs,
 const Function *F) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+return;
+
   if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
 errs() << "ERROR: Missing extension attribute of returned "
<< "value from function:\n";
@@ -10268,11 +10278,6 @@ verifyNarrowIntegerArgs(const 
SmallVectorImpl &Outs,
   if (IsInternal || !Subtarget.isTargetELF())
 return true;
 
-  // Temporarily only do the check when explicitly requested, until it can be
-  // enabled by default.
-  if (!EnableIntArgExtCheck)
-return true;
-
   if (EnableIntArgExtCheck.getNumOccurrences()) {
 if (!EnableIntArgExtCheck)
   return true;

``




https://github.com/llvm/llvm-project/pull/130998
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/130915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [flang][OpenMP] Translate OpenMP scopes when compiling for target device (PR #130078)

2025-03-12 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/130078

>From 9de8c664bad3a851e3b9644711b24c6449db9e49 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Thu, 6 Mar 2025 03:16:59 -0600
Subject: [PATCH 1/4] [flang][OpenMP] Translate OpenMP scopes when compiling
 for target device

If a `target` directive is nested in a host OpenMP directive (e.g.
parallel, task, or a worksharing loop), flang currently crashes if the
target directive-related MLIR ops (e.g. `omp.map.bounds` and
`omp.map.info` depends on SSA values defined inside the parent host
OpenMP directives/ops.

This PR tries to solve this problem by treating these parent OpenMP ops
as "SSA scopes". Whenever we are translating for the device, instead of
completely translating host ops, we just tranlate their MLIR ops as pure
SSA values.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  |  59 ++--
 .../openmp-target-nesting-in-host-ops.mlir| 136 ++
 2 files changed, 186 insertions(+), 9 deletions(-)
 create mode 100644 
mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b9893716980fe..f277f35fa51eb 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -537,6 +537,19 @@ static llvm::omp::ProcBindKind 
getProcBindKind(omp::ClauseProcBindKind kind) {
   llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
+/// Maps elements of \p blockArgs (which are MLIR values) to the corresponding
+/// LLVM values of \p operands' elements. This is useful when an OpenMP region
+/// with entry block arguments is converted to LLVM. In this case \p blockArgs
+/// are (part of) of the OpenMP region's entry arguments and \p operands are
+/// (part of) of the operands to the OpenMP op containing the region.
+static void forwardArgs(LLVM::ModuleTranslation &moduleTranslation,
+omp::BlockArgOpenMPOpInterface blockArgIface) {
+  llvm::SmallVector> blockArgsPairs;
+  blockArgIface.getBlockArgsPairs(blockArgsPairs);
+  for (auto [var, arg] : blockArgsPairs)
+moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
+}
+
 /// Helper function to map block arguments defined by ignored loop wrappers to
 /// LLVM values and prevent any uses of those from triggering null pointer
 /// dereferences.
@@ -549,17 +562,10 @@ convertIgnoredWrapper(omp::LoopWrapperInterface opInst,
   // Map block arguments directly to the LLVM value associated to the
   // corresponding operand. This is semantically equivalent to this wrapper not
   // being present.
-  auto forwardArgs =
-  [&moduleTranslation](omp::BlockArgOpenMPOpInterface blockArgIface) {
-llvm::SmallVector> blockArgsPairs;
-blockArgIface.getBlockArgsPairs(blockArgsPairs);
-for (auto [var, arg] : blockArgsPairs)
-  moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
-  };
-
   return llvm::TypeSwitch(opInst)
   .Case([&](omp::SimdOp op) {
-forwardArgs(cast(*op));
+forwardArgs(moduleTranslation,
+cast(*op));
 op.emitWarning() << "simd information on composite construct 
discarded";
 return success();
   })
@@ -5294,6 +5300,7 @@ convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase 
&builder,
   return convertHostOrTargetOperation(op, builder, moduleTranslation);
 }
 
+
 static LogicalResult
 convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -5313,6 +5320,40 @@ convertTargetOpsInNest(Operation *op, 
llvm::IRBuilderBase &builder,
   return WalkResult::interrupt();
 return WalkResult::skip();
   }
+
+  // Non-target ops might nest target-related ops, therefore, we
+  // translate them as non-OpenMP scopes. Translating them is needed by
+  // nested target-related ops since they might LLVM values defined in
+  // their parent non-target ops.
+  if (isa(oper->getDialect()) &&
+  oper->getParentOfType() &&
+  !oper->getRegions().empty()) {
+if (auto blockArgsIface =
+dyn_cast(oper))
+  forwardArgs(moduleTranslation, blockArgsIface);
+
+if (auto loopNest = dyn_cast(oper)) {
+  for (auto iv : loopNest.getIVs()) {
+// Create fake allocas just to maintain IR validity.
+moduleTranslation.mapValue(
+iv, builder.CreateAlloca(
+moduleTranslation.convertType(iv.getType(;
+  }
+}
+
+for (Region ®ion : oper->getRegions()) {
+  auto result = convertOmpOpRegions(
+  region, oper->getNam

[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Move disabling of arg verification to before isFullyInternal(). (#130693) (PR #130998)

2025-03-12 Thread Ulrich Weigand via llvm-branch-commits

uweigand wrote:

Fixes a significant compile time regression in LLVM 20, see 
https://github.com/llvm/llvm-project/issues/130541

https://github.com/llvm/llvm-project/pull/130998
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [SystemZ] Move disabling of arg verification to before isFullyInternal(). (#130693) (PR #130998)

2025-03-12 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130998

Backport 378739f18208165f9831571a57f34d82f6663bc6

Requested by: @uweigand

>From 64ae6413559e2f0fa9218b2f83919ec757404f3b Mon Sep 17 00:00:00 2001
From: Jonas Paulsson 
Date: Wed, 12 Mar 2025 11:33:12 -0600
Subject: [PATCH] [SystemZ]  Move disabling of arg verification to before
 isFullyInternal(). (#130693)

It has found to be quite a slowdown to traverse the users of a
function from each call site when it is called many (~70k)
times. This patch fixes this for now as long as this verification
is disabled by default, but there is still a need to eventually
cache the results to avoid recomputation.

Fixes #130541

(cherry picked from commit 378739f18208165f9831571a57f34d82f6663bc6)
---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 1fb31c26e20d3..2b8269e440e90 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -10231,6 +10231,11 @@ static void printFunctionArgExts(const Function *F, 
raw_fd_ostream &OS) {
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Call(const SmallVectorImpl &Outs,
  const Function *F, SDValue Callee) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+return;
+
   bool IsInternal = false;
   const Function *CalleeFn = nullptr;
   if (auto *G = dyn_cast(Callee))
@@ -10252,6 +10257,11 @@ verifyNarrowIntegerArgs_Call(const 
SmallVectorImpl &Outs,
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Ret(const SmallVectorImpl &Outs,
 const Function *F) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+return;
+
   if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
 errs() << "ERROR: Missing extension attribute of returned "
<< "value from function:\n";
@@ -10268,11 +10278,6 @@ verifyNarrowIntegerArgs(const 
SmallVectorImpl &Outs,
   if (IsInternal || !Subtarget.isTargetELF())
 return true;
 
-  // Temporarily only do the check when explicitly requested, until it can be
-  // enabled by default.
-  if (!EnableIntArgExtCheck)
-return true;
-
   if (EnableIntArgExtCheck.getNumOccurrences()) {
 if (!EnableIntArgExtCheck)
   return true;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130915.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+3) 
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+16) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5314738b2b8ac..bf53018439e9f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -636,6 +636,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
 }
 
+if (isa(Src))
+  return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
+
 if (isa(Src)) {
   return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
 }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index fca3860240294..78606b1c869d1 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -274,6 +274,14 @@ declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind 
readnone
 declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
 
 
+define float @test_constant_fold_frexp_mant_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_poison(
+; CHECK-NEXT:ret float poison
+;
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float poison)
+  ret float %val
+}
+
 define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
 ; CHECK-NEXT:ret float undef
@@ -442,6 +450,14 @@ define double @test_constant_fold_frexp_mant_f64_min_num() 
nounwind {
 declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
 declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
 
+define i32 @test_constant_fold_frexp_exp_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_poison(
+; CHECK-NEXT:ret i32 poison
+;
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float poison)
+  ret i32 %val
+}
+
 define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
 ; CHECK-NEXT:ret i32 undef

``




https://github.com/llvm/llvm-project/pull/130915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make sqrt and rsq intrinsics propagate poison (PR #130914)

2025-03-12 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/130914.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+2) 
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+24) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 6f6556365ebf6..5314738b2b8ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -548,6 +548,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   case Intrinsic::amdgcn_sqrt:
   case Intrinsic::amdgcn_rsq: {
 Value *Src = II.getArgOperand(0);
+if (isa(Src))
+  return IC.replaceInstUsesWith(II, Src);
 
 // TODO: Move to ConstantFolding/InstSimplify?
 if (isa(Src)) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll 
b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 42ddc71dab848..fca3860240294 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -89,6 +89,14 @@ declare half @llvm.amdgcn.sqrt.f16(half) nounwind readnone
 declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone
 declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone
 
+define half @test_constant_fold_sqrt_f16_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_sqrt_f16_poison(
+; CHECK-NEXT:ret half poison
+;
+  %val = call half @llvm.amdgcn.sqrt.f16(half poison) nounwind readnone
+  ret half %val
+}
+
 define half @test_constant_fold_sqrt_f16_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f16_undef(
 ; CHECK-NEXT:ret half 0xH7E00
@@ -97,6 +105,14 @@ define half @test_constant_fold_sqrt_f16_undef() nounwind {
   ret half %val
 }
 
+define float @test_constant_fold_sqrt_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_sqrt_f32_poison(
+; CHECK-NEXT:ret float poison
+;
+  %val = call float @llvm.amdgcn.sqrt.f32(float poison) nounwind readnone
+  ret float %val
+}
+
 define float @test_constant_fold_sqrt_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f32_undef(
 ; CHECK-NEXT:ret float 0x7FF8
@@ -234,6 +250,14 @@ define double @test_amdgcn_sqrt_f64(double %arg) {
 
 declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
 
+define float @test_constant_fold_rsq_f32_poison() nounwind {
+; CHECK-LABEL: @test_constant_fold_rsq_f32_poison(
+; CHECK-NEXT:ret float poison
+;
+  %val = call float @llvm.amdgcn.rsq.f32(float poison) nounwind readnone
+  ret float %val
+}
+
 define float @test_constant_fold_rsq_f32_undef() nounwind {
 ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef(
 ; CHECK-NEXT:ret float 0x7FF8

``




https://github.com/llvm/llvm-project/pull/130914
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/130915?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#130915** https://app.graphite.dev/github/pr/llvm/llvm-project/130915?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/130915?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#130914** https://app.graphite.dev/github/pr/llvm/llvm-project/130914?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130913** https://app.graphite.dev/github/pr/llvm/llvm-project/130913?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/130915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make frexp_exp and frexp_mant intrinsics propagate poison (PR #130915)

2025-03-12 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/130915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >