[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From d327d3edddf4a9a770cb8cfb4bbb35d0c3cc3de7 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +-
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a3e9700fa3089..e84d25afe620d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  if (I->getType()->getScalarType()->isPointerTy() &&
+  I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
+unsigned AS =
+
cast(I->getType()->getScalarType())->getAddressSpace();
+ElementCount EC = cast(I->getType())->getElementCount();
+Value *BC = Builder.CreateBitCast(
+Result,
+VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)),
+EC));
+Value *IntToPtr = Builder.CreateIntToPtr(
+BC, VectorType::get(PointerType::get(Ctx, AS), EC));
+V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..36c1305a7c5df 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 08d0405345f57..4293df8c13571 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LAB

[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From bca16c0850eb48e8eedaf04ac744e1d00798438e Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120386

>From 02dd78717b392f9fbb3b9d436e58183898dd70e9 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:38:23 -0500
Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG

When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.

commit-id:f9d761c5
---
 llvm/lib/Target/X86/X86ISelLowering.cpp|  4 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0fc50dc1a87b6..d604db17cb8cf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2650,6 +2650,10 @@ X86TargetLowering::X86TargetLowering(const 
X86TargetMachine &TM,
 setOperationAction(Op, MVT::f32, Promote);
   }
 
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d23cfb89f9fc8..6efcbb80c0ce6 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   %ret = load atomic <1 x i64>, ptr %x acquire, align 8
   ret <1 x i64> %ret
 }
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; CHECK0-NEXT:## implicit-def: $xmm0
+; CHECK0-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x half>, ptr %x acquire, align 2
+  ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x float>, ptr %x acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_double_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 8
+  ret <1 x double> %ret
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From 4b9e4d34d274373d09fbea29e466e603409c864f Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54da9fe3c6a40..c6cb334e0aa9a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -459,6 +462,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomicLoad(
+  ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(),
+  N->getBasePtr(), N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_vec

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From 7565845a3dd33fc1df7a771b165fbf095145f9d6 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG][X86] Split via Concat  vector types for
 atomic load

Vector types that aren't widened are 'split' via CONCAT_VECTORS
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  33 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 205 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..7905f5a94c579 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 328859738cae0..983958703be51 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1423,6 +1426,36 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  SDLoc dl(LD);
+
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned NumElts = MemoryVT.getVectorMinNumElements();
+
+  EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
+  EVT ElemVT =
+  EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1);
+
+  // Create a single atomic to load all the elements at once.
+  SDValue Atomic =
+  DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, IntMemoryVT, IntMemoryVT,
+LD->getChain(), LD->getBasePtr(),
+LD->getMemOperand());
+
+  // Instead of splitting, put all the elements back into a vector.
+  SmallVector Ops;
+  for (unsigned i = 0; i < NumElts; ++i) {
+SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic,
+  DAG.getVectorIdxConstant(i, dl));
+Elt = DAG.getBitcast(ElemVT, Elt);
+Ops.push_back(Elt);
+  }
+  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops);
+
+  ReplaceValueWith(SDValue(LD, 0), Concat);
+  ReplaceValueWith(SDValue(LD, 1), LD->getChain());
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 935d058a52f8f..42b0955824293 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl $16, %ecx
+; CHECK0-NEXT:movw %cx, %dx
+; CHECK0-NEXT:## implicit-def: $ecx
+; CHECK0-NEXT:movw %dx, %cx
+; CHECK0-NEXT:## implicit-def: $xmm1
+; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1
+; CHECK0-NEXT:movw %ax, %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-N

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From 4b9e4d34d274373d09fbea29e466e603409c864f Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54da9fe3c6a40..c6cb334e0aa9a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -459,6 +462,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomicLoad(
+  ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(),
+  N->getBasePtr(), N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_vec

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From e262387ca4870f7bae5de152b954461fa0311eb2 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic
 load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 108 ++
 llvm/lib/Target/X86/X86InstrCompiler.td   |   7 ++
 llvm/test/CodeGen/X86/atomic-load-store.ll|  81 +
 llvm/test/CodeGen/X86/atomic-unordered.ll |   3 +-
 5 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c6cb334e0aa9a..328859738cae0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5982,6 +5985,89 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+   TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl,
+   SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  } else if (FirstVT == WidenVT)
+return LdOp;
+  else {
+// TODO: We don't currently have any tests that exercise this code path.
+assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+unsigned NumConcat =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+SmallVector ConcatOps(NumConcat);
+SDValue UndefVal = DAG.getUNDEF(FirstVT);
+ConcatOps[0] = LdOp;
+for (unsigned i = 1; i != NumConcat; ++i)
+  ConcatOps[i] = UndefVal;
+return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+  }
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+  (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : 
LD->getAlign().value();
+
+  // Find the vector type that can load from.
+  st

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From e262387ca4870f7bae5de152b954461fa0311eb2 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic
 load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 108 ++
 llvm/lib/Target/X86/X86InstrCompiler.td   |   7 ++
 llvm/test/CodeGen/X86/atomic-load-store.ll|  81 +
 llvm/test/CodeGen/X86/atomic-unordered.ll |   3 +-
 5 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c6cb334e0aa9a..328859738cae0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5982,6 +5985,89 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+   TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl,
+   SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  } else if (FirstVT == WidenVT)
+return LdOp;
+  else {
+// TODO: We don't currently have any tests that exercise this code path.
+assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+unsigned NumConcat =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+SmallVector ConcatOps(NumConcat);
+SDValue UndefVal = DAG.getUNDEF(FirstVT);
+ConcatOps[0] = LdOp;
+for (unsigned i = 1; i != NumConcat; ++i)
+  ConcatOps[i] = UndefVal;
+return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+  }
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+  (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : 
LD->getAlign().value();
+
+  // Find the vector type that can load from.
+  st

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From de6a81035509ce371f750cce4a74118a04c1e17d Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The elements are
placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that its concat_vectors can be
mapped to a BUILD_VECTOR and so unused elements are no longer
referenced.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 .../SelectionDAG/SelectionDAGBuilder.cpp  |   6 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  29 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++
 6 files changed, 69 insertions(+), 187 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c183149b0863a..6ae1d019cad28 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1840,7 +1840,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2278,7 +2278,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9da2ba04f77cb..545da0a1fbfab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12264,7 +12264,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12957,17 +12957,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->get

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Handle scalarized reductions in getArithmeticReductionCost (PR #136688)

2025-04-25 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/136688

>From 425d1aad294f1132ed90d79ff51320ac2dfcb72d Mon Sep 17 00:00:00 2001
From: Luke Lau 
Date: Mon, 21 Apr 2025 16:04:25 +0800
Subject: [PATCH] [RISCV] Handle scalarized reductions in
 getArithmeticReductionCost

This fixes a crash reported at
https://github.com/llvm/llvm-project/pull/114250#issuecomment-2813686061

If the vector type isn't legal at all, e.g. bfloat with +zvfbfmin,
then the legalized type will be scalarized. So use getScalarType()
instead of getVectorElement() when checking for f16/bf16.

(cherry picked from commit 053451cb3502144564b4d0b30a9046045d1820d4)
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   5 +-
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 167 ++
 2 files changed, 137 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index add82dc80c429..8f1094413a756 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1658,9 +1658,8 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, 
VectorType *Ty,
 break;
   case ISD::FADD:
 // We can't promote f16/bf16 fadd reductions.
-if ((LT.second.getVectorElementType() == MVT::f16 &&
- !ST->hasVInstructionsF16()) ||
-LT.second.getVectorElementType() == MVT::bf16)
+if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) 
||
+LT.second.getScalarType() == MVT::bf16)
   return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
 if (TTI::requiresOrderedReduction(FMF)) {
   Opcodes.push_back(RISCV::VFMV_S_F);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll 
b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 1762f701a9b2d..71685b4acc822 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,25 +1,60 @@
 ; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=throughput 2>&1 -disable-output | 
FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=throughput 2>&1 -disable-output | 
FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print" 
-cost-kind=throughput 2>&1 -disable-output | FileCheck %s 
--check-prefixes=FP-REDUCE,FP-REDUCE-NO-ZFHMIN-NO-ZFBFMIN
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=code-size 2>&1 -disable-output | 
FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fadd_bfloat() {
-; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR, <1 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR, <2 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: 
%V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR, <4 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: 
%V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR, <8 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: 
%V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR, <16 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: 
%v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR, <32 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: 
%V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR, <64 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: 
%V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR, <128 
x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = 

[llvm-branch-commits] [llvm] [AMDGPU] Make `AllocaInst` return AS5 in `getAssumedAddrSpace` (PR #136798)

2025-04-25 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian closed 
https://github.com/llvm/llvm-project/pull/136798
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-reduce: Support exotic terminators in instructions-to-return (PR #134794)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/134794

>From 18d8c3083affbeb9d54ac5e558f427dcfd9da300 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 8 Apr 2025 11:16:01 +0700
Subject: [PATCH] llvm-reduce: Support exotic terminators in
 instructions-to-return

Use splitBasicBlock and avoid directly dealing with the specific of
how to trim the existing terminators. We just need to deal with
unconditional branch to return.
---
 .../reduce-values-to-return-callbr.ll | 54 ++
 .../reduce-values-to-return-invoke.ll | 56 +++
 .../llvm-reduce/remove-bb-switch-default.ll   |  6 +-
 .../deltas/ReduceValuesToReturn.cpp   | 50 -
 llvm/tools/llvm-reduce/deltas/Utils.cpp   |  2 +-
 5 files changed, 126 insertions(+), 42 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
new file mode 100644
index 0..da2f225f0405b
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
@@ -0,0 +1,54 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+@gv = global i32 0, align 4
+
+; INTERESTING-LABEL: @callbr0(
+; INTERESTING: %load0 = load i32, ptr %arg0
+; INTERESTING: store i32 %load0, ptr @gv
+
+; RESULT-LABEL: define void @callbr0(ptr %arg0) {
+; RESULT: %load0 = load i32, ptr %arg0, align 4
+; RESULT-NEXT: %callbr = callbr i32 asm
+define void @callbr0(ptr %arg0) {
+entry:
+  %load0 = load i32, ptr %arg0
+  %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0)
+  to label %one [label %two, label %three]
+one:
+  store i32 %load0, ptr @gv
+  ret void
+
+two:
+  store i32 %load0, ptr @gv
+  ret void
+
+three:
+  store i32 %load0, ptr @gv
+  ret void
+}
+
+; INTERESTING-LABEL: @callbr1(
+; INTERESTING: %load0 = load i32, ptr %arg0
+
+; RESULT-LABEL: define i32 @callbr1(ptr %arg0) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: %load0 = load i32, ptr %arg0
+; RESULT-NEXT: ret i32 %load0
+define void @callbr1(ptr %arg0) {
+entry:
+  %load0 = load i32, ptr %arg0
+  %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0)
+  to label %one [label %two, label %three]
+one:
+  store i32 %load0, ptr @gv
+  ret void
+
+two:
+  store i32 %load0, ptr @gv
+  ret void
+
+three:
+  store i32 %load0, ptr @gv
+  ret void
+}
diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll
new file mode 100644
index 0..efa1e5377160e
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll
@@ -0,0 +1,56 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+@gv = global i32 0, align 4
+
+
+define i32 @has_invoke_user(ptr %arg) {
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i32 9
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; INTERESTING-LABEL: @invoker_keep_invoke(
+; INTERESTING: %invoke
+; RESULT:   %invoke = invoke i32 @has_invoke_user(ptr %arg)
+define void @invoker_keep_invoke(ptr %arg) personality ptr 
@__gxx_personality_v0 {
+bb:
+  %invoke = invoke i32 @has_invoke_user(ptr %arg)
+to label %bb3 unwind label %bb1
+
+bb1:
+  landingpad { ptr, i32 }
+  catch ptr null
+  ret void
+
+bb3:
+  store i32 %invoke, ptr null
+  ret void
+}
+
+; INTERESTING-LABEL: @invoker_drop_invoke(
+; INTERESTING: %add = add i32
+
+; RESULT-LABEL: define i32 @invoker_drop_invoke(i32 %arg0, ptr %arg1) 
personality ptr @__gxx_personality_v0 {
+; RESULT-NEXT: bb:
+; RESULT-NEXT: %add = add i32 %arg0, 9
+; RESULT-NEXT: ret i32 %add
+; RESULT-NEXT: }
+define void @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr 
@__gxx_personality_v0 {
+bb:
+  %add = add i32 %arg0, 9
+  %invoke = invoke i32 @has_invoke_user(ptr %arg1)
+to label %bb3 unwind label %bb1
+
+bb1:
+  landingpad { ptr, i32 }
+  catch ptr null
+  br label %bb3
+
+bb3:
+  %phi = phi i32 [ %invoke, %bb ], [ %add, %bb1 ]
+  store i32 %phi, ptr null
+  ret void
+}
diff --git a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll 
b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
index b509d1181f74d..27e599e45e9a3 100644
--- a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
+++ b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
@@ -16,13 +16,14 @@
 ; RESULT0-NEXT: br i1 %arg0, label %bb1, label %bb2
 
 ; RESULT0: bb1:
-; RESULT0: %bb1.phi = 

[llvm-branch-commits] [llvm] llvm-reduce: Change function return types if function is not called (PR #134035)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/134035

>From d65f9ca48d04cf36380bc43840527065195d9e4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 2 Apr 2025 11:45:24 +0700
Subject: [PATCH 1/4] llvm-reduce: Change function return types if function is
 not called

Extend the early return on value reduction to mutate the function return
type if the function has no call uses. This could be generalized to rewrite
cases where all callsites are used, but it turns out that complicates the
visitation order given we try to compute all opportunities up front.

This is enough to cleanup the common case where we end up with one
function with a return of an uninteresting constant.
---
 .../reduce-instructions-to-return.ll  | 20 +++-
 ...reduce-values-to-return-new-return-type.ll | 95 +++
 .../deltas/ReduceValuesToReturn.cpp   |  7 +-
 3 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll 
b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
index 2af87aad05169..77501418f5283 100644
--- a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
+++ b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
@@ -196,13 +196,25 @@ define i32 @callsite_already_new_return_type(ptr %arg) {
 ; INTERESTING: ret
 
 ; RESULT-LABEL: define ptr @non_void_no_op(
-; RESULT: ret ptr null
+; RESULT-NEXT: %load = load i32, ptr %arg
+; RESULT-NEXT: store i32 %load, ptr @gv
+; RESULT-NEXT: ret ptr null
 define ptr @non_void_no_op(ptr %arg) {
   %load = load i32, ptr %arg
   store i32 %load, ptr @gv
   ret ptr null
 }
 
+; INTERESTING-LABEL: @non_void_no_op_caller(
+
+; RESULT-LABEL: define ptr @non_void_no_op_caller(ptr %arg) {
+; RESULT-NEXT: %call = call ptr @non_void_no_op(ptr %arg)
+; RESULT-NEXT: ret ptr %call
+define ptr @non_void_no_op_caller(ptr %arg) {
+  %call = call ptr @non_void_no_op(ptr %arg)
+  ret ptr %call
+}
+
 ; INTERESTING-LABEL: @non_void_same_type_use(
 ; INTERESTING: = load
 ; INTERESTING: ret
@@ -230,6 +242,12 @@ define i32 @non_void_bitcastable_type_use(ptr %arg) {
   ret i32 0
 }
 
+; INTERESTING-LABEL: @non_void_bitcastable_type_use_caller(
+define i32 @non_void_bitcastable_type_use_caller(ptr %arg) {
+  %ret = call i32 @non_void_bitcastable_type_use(ptr %arg)
+  ret i32 %ret
+}
+
 ; INTERESTING-LABEL: @form_return_struct(
 ; INTERESTING: = load { i32, float }
 
diff --git 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll
new file mode 100644
index 0..9ddbbe3def44f
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll
@@ -0,0 +1,95 @@
+; Test that llvm-reduce can move intermediate values by inserting
+; early returns when the function already has a different return type
+;
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+
+@gv = global i32 0, align 4
+@ptr_array = global [2 x ptr] [ptr 
@inst_to_return_has_different_type_but_no_func_call_use,
+   ptr @multiple_callsites_wrong_return_type]
+
+; Should rewrite this return from i64 to i32 since the function has no
+; uses.
+; INTERESTING-LABEL: @inst_to_return_has_different_type_but_no_func_call_use(
+; RESULT-LABEL: define i32 
@inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) {
+; RESULT-NEXT: %load = load i32, ptr %arg, align 4
+; RESULT-NEXT: ret i32 %load
+define i64 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) {
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i64 0
+}
+
+; INTERESTING-LABEL: @callsite_different_type_unused_0(
+; RESULT-LABEL: define i64 
@inst_to_return_has_different_type_but_call_result_unused(
+; RESULT-NEXT: %load = load i32, ptr %arg
+; RESULT-NEXT: store i32 %load, ptr @gv
+; RESULT-NEXT: ret i64 0
+define void @callsite_different_type_unused_0(ptr %arg) {
+  %unused0 = call i64 
@inst_to_return_has_different_type_but_call_result_unused(ptr %arg)
+  %unused1 = call i64 
@inst_to_return_has_different_type_but_call_result_unused(ptr null)
+  ret void
+}
+
+; TODO: Could rewrite this return from i64 to i32 since the callsite is unused.
+; INTERESTING-LABEL: @inst_to_return_has_different_type_but_call_result_unused(
+; RESULT-LABEL: define i64 
@inst_to_return_has_different_type_but_call_result_unused(
+; RESULT: ret i64 0
+define i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) 
{
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i64 0
+}
+
+; INTERESTING-LABEL: @multiple_callsites_wrong_return_type(
+; RESULT-LABEL: define i64 @multipl

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/133627

>From 2ee2c34b9da4fc887d33e0f6eb5402d60fdd30c3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Mar 2025 14:33:36 +0700
Subject: [PATCH 1/2] llvm-reduce: Reduce with early return of arguments

Extend the instruction -> return reduction with one that inserts
return of function arguments. Not sure how useful this really is. This
has more freedom since we could insert the return anywhere in the function,
but this just inserts the return in the entry block.
---
 .../reduce-values-to-return-args.ll   | 77 +++
 llvm/tools/llvm-reduce/DeltaPasses.def|  6 +-
 .../deltas/ReduceValuesToReturn.cpp   | 42 +-
 .../llvm-reduce/deltas/ReduceValuesToReturn.h |  1 +
 4 files changed, 122 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
new file mode 100644
index 0..abbc643822033
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
@@ -0,0 +1,77 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=arguments-to-return --test FileCheck --test-arg 
--check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefixes=RESULT %s < %t
+
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr 
%ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) {
+  store i32 %arg, ptr %ptr
+  ret void
+}
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 
%arg, ptr %ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 
@move_entry_block_use_argument_to_return_existing_ret(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr 
%ptr) {
+  store i32 %arg, ptr %ptr
+  ret i32 0
+}
+
+; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr 
%ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return(
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg
+define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr 
%ptr1, i1 %cond0, i1 %cond1) {
+entry:
+  br i1 %cond0, label %bb0, label %bb1
+
+bb0:
+  %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ]
+  store i32 %arg, ptr %ptr0
+  store i32 %phi, ptr %ptr1
+  br label %bb1
+
+bb1:
+  br i1 %cond1, label %bb0, label %bb2
+
+bb2:
+  ret void
+}
+
+; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) {
+; INTERESTING: %arg1
+
+; RESULT-LABEL: define ptr @keep_second_arg(
+; RESULT-NEXT: ret ptr %arg1
+; RESULT-NEXT: }
+define void @keep_second_arg(i32 %arg0, ptr %arg1) {
+  store i32 %arg0, ptr %arg1
+  ret void
+}
+
+; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+; INTERESTING: i32 %arg2
+
+; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 
%arg2) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg2
+define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+entry:
+  br i1 %arg0, label %bb0, label %bb1
+
+bb0:
+  store i32 %arg2, ptr %arg1
+  ret void
+
+bb1:
+  ret void
+}
diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def 
b/llvm/tools/llvm-reduce/DeltaPasses.def
index 421e4472006b6..3aed0ccd74b84 100644
--- a/llvm/tools/llvm-reduce/DeltaPasses.def
+++ b/llvm/tools/llvm-reduce/DeltaPasses.def
@@ -49,7 +49,11 @@ DELTA_PASS_IR("attributes", reduceAttributesDeltaPass, 
"Reducing Attributes")
 DELTA_PASS_IR("target-features-attr", reduceTargetFeaturesAttrDeltaPass, 
"Reducing target-features")
 DELTA_PASS_IR("module-data", reduceModuleDataDeltaPass, "Reducing Module Data")
 DELTA_PASS_IR("opcodes", reduceOpcodesDeltaPass, "Reducing Opcodes")
-DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, 
"Early return of instructions")
+
+DELTA_PASS_IR("arguments-to-return", reduceArgumentsToReturnDeltaPass,
+  "Converting arguments to function return value")
+DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass,
+  "Early return of instructions")
 DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing 
Volatile Instructions")
 DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing 
Atomic Ordering")
 DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic 
Sync Scopes")
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp 
b/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp
index 9ee0af3e1a69b..3e400ffc89482 100644
--- a/llvm/tools/llvm-reduce/de

[llvm-branch-commits] [clang] [llvm] [llvm] Introduce callee_type metadata (PR #87573)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits


@@ -1,23 +1,23 @@
 ;; Test if the callee_type metadata is dropped when an indirect function call 
through a function ptr is promoted
 ;; to a direct function call during instcombine.
 
-; RUN: opt < %s -O2 | llvm-dis | FileCheck %s
+; RUN: opt < %s -passes="cgscc(inline),instcombine" -S | FileCheck %s

arsenm wrote:

instcombine test should not be running the inliner, only instcombine 

https://github.com/llvm/llvm-project/pull/87573
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove uselist for constantdata (PR #134692)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm closed 
https://github.com/llvm/llvm-project/pull/134692
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/137314

This is a follow up change to eliminating uselists for ConstantData.
In the previous revision, ConstantData had a replacement reference count
instead of a uselist. This reference count was misleading, and not useful
in the same way as it would be for another value. The references may not
have even been in the current module, since these are shared throughout
the LLVMContext.

This doesn't space leak any more than we previously did; nothing was
attempting to garbage collect unused constants.

Previously the use_empty, and hasNUses type of APIs were supported through
the reference count. These now behave as if the uses are always empty.
Ideally it would be illegal to inspect these, but this forces API complexity
into quite a few places. It may be doable to make it illegal to check these
counts, but I would like there to be a targeted fuzzing effort to make sure
every transform properly deals with a constant in every operand position.

All tests pass if I turn the hasNUses* and getNumUses queries into assertions,
only hasOneUse in particular appears to hit in some set of contexts. I've
added unit tests to ensure logical consistency between these cases

>From 96635fca09fa1835e372f175eff83013b03da28e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 19 Apr 2025 21:11:23 +0200
Subject: [PATCH] IR: Remove reference counts from ConstantData

This is a follow up change to eliminating uselists for ConstantData.
In the previous revision, ConstantData had a replacement reference count
instead of a uselist. This reference count was misleading, and not useful
in the same way as it would be for another value. The references may not
have even been in the current module, since these are shared throughout
the LLVMContext.

This doesn't space leak any more than we previously did; nothing was
attempting to garbage collect unused constants.

Previously the use_empty, and hasNUses type of APIs were supported through
the reference count. These now behave as if the uses are always empty.
Ideally it would be illegal to inspect these, but this forces API complexity
into quite a few places. It may be doable to make it illegal to check these
counts, but I would like there to be a targeted fuzzing effort to make sure
every transform properly deals with a constant in every operand position.

All tests pass if I turn the hasNUses* and getNumUses queries into assertions,
only hasOneUse in particular appears to hit in some set of contexts. I've
added unit tests to ensure logical consistency between these cases
---
 llvm/docs/ReleaseNotes.md   |   4 +-
 llvm/include/llvm/IR/Constants.h|   3 +-
 llvm/include/llvm/IR/Use.h  |   9 +--
 llvm/include/llvm/IR/Value.h| 118 ++--
 llvm/lib/IR/Instruction.cpp |   4 +-
 llvm/lib/IR/Value.cpp   |  28 +++
 llvm/unittests/IR/ConstantsTest.cpp |  36 +
 7 files changed, 98 insertions(+), 104 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 411cefe004e16..4665302a4144c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 --
 
-* It is no longer permitted to inspect the uses of ConstantData
+* It is no longer permitted to inspect the uses of ConstantData. Use
+  count APIs will behave as if they have no uses (i.e. use_empty() is
+  always true).
 
 * The `nocapture` attribute has been replaced by `captures(none)`.
 * The constant expression variants of the following instructions have been
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 7b1dbdece43f7..07d71cf7108d2 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -51,7 +51,8 @@ template  struct ConstantAggrKeyType;
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
 ///
-/// These do not have use lists. It is illegal to inspect the uses.
+/// These do not have use lists. It is illegal to inspect the uses. These 
behave
+/// as if they have no uses (i.e. use_empty() is always true).
 class ConstantData : public Constant {
   constexpr static IntrusiveOperandsAllocMarker AllocMarker{0};
 
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index bcd1fd6677497..dc22d69ba561d 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -23,7 +23,6 @@
 namespace llvm {
 
 template  struct simplify_type;
-class ConstantData;
 class User;
 class Value;
 
@@ -43,7 +42,7 @@ class Use {
 
 private:
   /// Destructor - Only for zap()
-  ~Use();
+  ~Use() { removeFromList(); }
 
   /// Constructor
   Use(User *Parent) : Parent(Parent) {}
@@ -85,10 +84,8 @@ class Use {
   Use **Prev = nullptr;
   User *Parent = nullptr;
 
-  inline void addT

[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#137314** https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#137313** https://app.graphite.dev/github/pr/llvm/llvm-project/137313?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/137314
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Matt Arsenault (arsenm)


Changes

This is a follow up change to eliminating uselists for ConstantData.
In the previous revision, ConstantData had a replacement reference count
instead of a uselist. This reference count was misleading, and not useful
in the same way as it would be for another value. The references may not
have even been in the current module, since these are shared throughout
the LLVMContext.

This doesn't space leak any more than we previously did; nothing was
attempting to garbage collect unused constants.

Previously the use_empty, and hasNUses type of APIs were supported through
the reference count. These now behave as if the uses are always empty.
Ideally it would be illegal to inspect these, but this forces API complexity
into quite a few places. It may be doable to make it illegal to check these
counts, but I would like there to be a targeted fuzzing effort to make sure
every transform properly deals with a constant in every operand position.

All tests pass if I turn the hasNUses* and getNumUses queries into assertions,
only hasOneUse in particular appears to hit in some set of contexts. I've
added unit tests to ensure logical consistency between these cases

---
Full diff: https://github.com/llvm/llvm-project/pull/137314.diff


7 Files Affected:

- (modified) llvm/docs/ReleaseNotes.md (+3-1) 
- (modified) llvm/include/llvm/IR/Constants.h (+2-1) 
- (modified) llvm/include/llvm/IR/Use.h (+3-6) 
- (modified) llvm/include/llvm/IR/Value.h (+40-78) 
- (modified) llvm/lib/IR/Instruction.cpp (+1-3) 
- (modified) llvm/lib/IR/Value.cpp (+13-15) 
- (modified) llvm/unittests/IR/ConstantsTest.cpp (+36) 


``diff
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 411cefe004e16..4665302a4144c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 --
 
-* It is no longer permitted to inspect the uses of ConstantData
+* It is no longer permitted to inspect the uses of ConstantData. Use
+  count APIs will behave as if they have no uses (i.e. use_empty() is
+  always true).
 
 * The `nocapture` attribute has been replaced by `captures(none)`.
 * The constant expression variants of the following instructions have been
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 7b1dbdece43f7..07d71cf7108d2 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -51,7 +51,8 @@ template  struct ConstantAggrKeyType;
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
 ///
-/// These do not have use lists. It is illegal to inspect the uses.
+/// These do not have use lists. It is illegal to inspect the uses. These 
behave
+/// as if they have no uses (i.e. use_empty() is always true).
 class ConstantData : public Constant {
   constexpr static IntrusiveOperandsAllocMarker AllocMarker{0};
 
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index bcd1fd6677497..dc22d69ba561d 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -23,7 +23,6 @@
 namespace llvm {
 
 template  struct simplify_type;
-class ConstantData;
 class User;
 class Value;
 
@@ -43,7 +42,7 @@ class Use {
 
 private:
   /// Destructor - Only for zap()
-  ~Use();
+  ~Use() { removeFromList(); }
 
   /// Constructor
   Use(User *Parent) : Parent(Parent) {}
@@ -85,10 +84,8 @@ class Use {
   Use **Prev = nullptr;
   User *Parent = nullptr;
 
-  inline void addToList(unsigned &Count);
-  inline void addToList(Use *&List);
-  inline void removeFromList(unsigned &Count);
-  inline void removeFromList(Use *&List);
+  inline void addToList(Use **List);
+  inline void removeFromList();
 };
 
 /// Allow clients to treat uses just like values when using
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 180b6238eda6c..ae874304c4316 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -116,10 +116,7 @@ class Value {
 
 private:
   Type *VTy;
-  union {
-Use *List = nullptr;
-unsigned Count;
-  } Uses;
+  Use *UseList = nullptr;
 
   friend class ValueAsMetadata; // Allow access to IsUsedByMD.
   friend class ValueHandleBase; // Allow access to HasValueHandle.
@@ -347,23 +344,21 @@ class Value {
 
   bool use_empty() const {
 assertModuleIsMaterialized();
-return hasUseList() ? Uses.List == nullptr : Uses.Count == 0;
+return UseList == nullptr;
   }
 
-  bool materialized_use_empty() const {
-return hasUseList() ? Uses.List == nullptr : !Uses.Count;
-  }
+  bool materialized_use_empty() const { return UseList == nullptr; }
 
   using use_iterator = use_iterator_impl;
   using const_use_iterator = use_iterator_impl;
 
   use_iterator materialized_use_begin() {
 assert(hasUseList())

[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/137314
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove uselist for constantdata (PR #134692)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

Reposted as essentially the same form in 
https://github.com/llvm/llvm-project/pull/137313.   
https://github.com/llvm/llvm-project/pull/137314 takes the next step and 
eliminates the reference counts 

https://github.com/llvm/llvm-project/pull/134692
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -0,0 +1,113 @@
+//===- MCGOFFAttributes.h - Attributes of GOFF symbols 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Defines the various attribute collections defining GOFF symbols.
+//
+//===--===//
+
+#ifndef LLVM_MC_MCGOFFATTRIBUTES_H
+#define LLVM_MC_MCGOFFATTRIBUTES_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/GOFF.h"
+
+namespace llvm {
+namespace GOFF {
+// An "External Symbol Definition" in the GOFF file has a type, and depending 
on
+// the type a different subset of the fields is used.
+//
+// Unlike other formats, a 2 dimensional structure is used to define the
+// location of data. For example, the equivalent of the ELF .text section is
+// made up of a Section Definition (SD) and a class (Element Definition; ED).
+// The name of the SD symbol depends on the application, while the class has 
the
+// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively.
+//
+// Data can be placed into this structure in 2 ways. First, the data (in a text
+// record) can be associated with an ED symbol. To refer to data, a Label
+// Definition (LD) is used to give an offset into the data a name. When 
binding,
+// the whole data is pulled into the resulting executable, and the addresses
+// given by the LD symbols are resolved.
+//
+// The alternative is to use a Part Definition (PR). In this case, the data (in
+// a text record) is associated with the part. When binding, only the data of
+// referenced PRs is pulled into the resulting binary.
+//
+// Both approaches are used, which means that the equivalent of a section in 
ELF
+// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain
+// sections are fine with just defining SD/ED symbols. The SymbolMapper takes
+// care of all those details.
+
+// Attributes for SD symbols.
+struct SDAttr {
+  GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for ED symbols.
+struct EDAttr {
+  bool IsReadOnly = false;
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDAmode Amode;

uweigand wrote:

Just to double-check: your current code does *not* set Amode on the PR symbol.  
 Is is necessary to do this or not?

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -0,0 +1,113 @@
+//===- MCGOFFAttributes.h - Attributes of GOFF symbols 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Defines the various attribute collections defining GOFF symbols.
+//
+//===--===//
+
+#ifndef LLVM_MC_MCGOFFATTRIBUTES_H
+#define LLVM_MC_MCGOFFATTRIBUTES_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/GOFF.h"
+
+namespace llvm {
+namespace GOFF {
+// An "External Symbol Definition" in the GOFF file has a type, and depending 
on
+// the type a different subset of the fields is used.
+//
+// Unlike other formats, a 2 dimensional structure is used to define the
+// location of data. For example, the equivalent of the ELF .text section is
+// made up of a Section Definition (SD) and a class (Element Definition; ED).
+// The name of the SD symbol depends on the application, while the class has 
the
+// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively.
+//
+// Data can be placed into this structure in 2 ways. First, the data (in a text
+// record) can be associated with an ED symbol. To refer to data, a Label
+// Definition (LD) is used to give an offset into the data a name. When 
binding,
+// the whole data is pulled into the resulting executable, and the addresses
+// given by the LD symbols are resolved.
+//
+// The alternative is to use a Part Definition (PR). In this case, the data (in
+// a text record) is associated with the part. When binding, only the data of
+// referenced PRs is pulled into the resulting binary.
+//
+// Both approaches are used, which means that the equivalent of a section in 
ELF
+// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain
+// sections are fine with just defining SD/ED symbols. The SymbolMapper takes
+// care of all those details.
+
+// Attributes for SD symbols.
+struct SDAttr {
+  GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for ED symbols.
+struct EDAttr {
+  bool IsReadOnly = false;
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDAmode Amode;
+  GOFF::ESDRmode Rmode;
+  GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+  GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented;
+  GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate;
+  GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial;
+  GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0;
+  GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword;
+};
+
+// Attributes for LD symbols.
+struct LDAttr {
+  bool IsRenamable = false;
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+  GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong;

uweigand wrote:

I see.  This is not currently reflected in the HLASM output, however.  How 
would one do this?

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -0,0 +1,145 @@
+//===- MCSectionGOFF.cpp - GOFF Code Section Representation 
---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+void emitRMode(raw_ostream &OS, GOFF::ESDRmode Rmode, bool UseParenthesis) {
+  if (Rmode != GOFF::ESD_RMODE_None) {
+OS << "RMODE" << (UseParenthesis ? '(' : ' ');
+switch (Rmode) {
+case GOFF::ESD_RMODE_24:
+  OS << "24";
+  break;
+case GOFF::ESD_RMODE_31:
+  OS << "31";
+  break;
+case GOFF::ESD_RMODE_64:
+  OS << "64";
+  break;
+case GOFF::ESD_RMODE_None:
+  break;
+}
+if (UseParenthesis)
+  OS << ')';
+  }
+}
+
+void emitCATTR(raw_ostream &OS, StringRef Name, StringRef ParentName,
+   bool EmitAmodeAndRmode, GOFF::ESDAmode Amode,
+   GOFF::ESDRmode Rmode, GOFF::ESDAlignment Alignment,
+   GOFF::ESDLoadingBehavior LoadBehavior,
+   GOFF::ESDExecutable Executable, bool IsReadOnly,
+   StringRef PartName) {
+  if (EmitAmodeAndRmode && Amode != GOFF::ESD_AMODE_None) {
+OS << ParentName << " AMODE ";
+switch (Amode) {
+case GOFF::ESD_AMODE_24:
+  OS << "24";
+  break;
+case GOFF::ESD_AMODE_31:
+  OS << "31";
+  break;
+case GOFF::ESD_AMODE_ANY:
+  OS << "ANY";
+  break;
+case GOFF::ESD_AMODE_64:
+  OS << "64";
+  break;
+case GOFF::ESD_AMODE_MIN:
+  OS << "ANY64";
+  break;
+case GOFF::ESD_AMODE_None:
+  break;
+}
+OS << "\n";
+  }
+  if (EmitAmodeAndRmode && Rmode != GOFF::ESD_RMODE_None) {
+OS << ParentName << ' ';
+emitRMode(OS, Rmode, /*UseParenthesis=*/false);
+OS << "\n";
+  }
+  OS << Name << " CATTR ";
+  OS << "ALIGN(" << static_cast(Alignment) << ")";
+  switch (LoadBehavior) {
+  case GOFF::ESD_LB_Deferred:
+OS << ",DEFLOAD";
+break;
+  case GOFF::ESD_LB_NoLoad:
+OS << ",NOLOAD";
+break;
+  default:
+break;
+  }
+  switch (Executable) {
+  case GOFF::ESD_EXE_CODE:
+OS << ",EXECUTABLE";
+break;
+  case GOFF::ESD_EXE_DATA:
+OS << ",NOTEXECUTABLE";
+break;
+  default:
+break;
+  }
+  if (IsReadOnly)
+OS << ",READONLY";
+  if (Rmode != GOFF::ESD_RMODE_None) {
+OS << ',';
+emitRMode(OS, Rmode, /*UseParenthesis=*/true);
+  }
+  if (!PartName.empty())
+OS << ",PART(" << PartName << ")";
+  OS << '\n';
+}
+} // namespace
+
+void MCSectionGOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ raw_ostream &OS,
+ uint32_t Subsection) const {
+  switch (SymbolType) {
+  case GOFF::ESD_ST_SectionDefinition: {
+OS << Name << " CSECT\n";
+Emitted = true;
+break;
+  }
+  case GOFF::ESD_ST_ElementDefinition: {
+bool ParentEmitted = getParent()->Emitted;
+getParent()->printSwitchToSection(MAI, T, OS, Subsection);
+if (!Emitted) {
+  emitCATTR(OS, Name, getParent()->getName(), !ParentEmitted,
+EDAttributes.Amode, EDAttributes.Rmode, EDAttributes.Alignment,
+EDAttributes.LoadBehavior, EDAttributes.Executable,
+EDAttributes.IsReadOnly, StringRef());
+  Emitted = true;
+} else
+  OS << Name << " CATTR ,\n";
+break;
+  }
+  case GOFF::ESD_ST_PartReference: {
+MCSectionGOFF *ED = getParent();
+bool SDEmitted = ED->getParent()->Emitted;
+ED->getParent()->printSwitchToSection(MAI, T, OS, Subsection);
+if (!Emitted) {
+  emitCATTR(OS, ED->getName(), ED->getParent()->getName(), !SDEmitted,
+PRAttributes.Amode, getParent()->EDAttributes.Rmode,
+PRAttributes.Alignment, getParent()->EDAttributes.LoadBehavior,

uweigand wrote:

The HLASM docs contain the somewhat cryptic statement: "Binding attributes 
assigned to the class are also assigned to the part." where is not really 
defined anywhere (I can see) what exactly "binding attributes" mean.

Do we need to replicate the HLASM behavior of setting Amode and LoadBehavior or 
not?

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -0,0 +1,106 @@
+//===- MCGOFFAttributes.h - Attributes of GOFF symbols 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Defines the various attribute collections defining GOFF symbols.
+//
+//===--===//
+
+#ifndef LLVM_MC_MCGOFFATTRIBUTES_H
+#define LLVM_MC_MCGOFFATTRIBUTES_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/GOFF.h"
+
+namespace llvm {
+namespace GOFF {
+// An "External Symbol Definition" in the GOFF file has a type, and depending 
on
+// the type a different subset of the fields is used.
+//
+// Unlike other formats, a 2 dimensional structure is used to define the
+// location of data. For example, the equivalent of the ELF .text section is
+// made up of a Section Definition (SD) and a class (Element Definition; ED).
+// The name of the SD symbol depends on the application, while the class has 
the
+// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively.
+//
+// Data can be placed into this structure in 2 ways. First, the data (in a text
+// record) can be associated with an ED symbol. To refer to data, a Label
+// Definition (LD) is used to give an offset into the data a name. When 
binding,
+// the whole data is pulled into the resulting executable, and the addresses
+// given by the LD symbols are resolved.
+//
+// The alternative is to use a Part Definition (PR). In this case, the data (in
+// a text record) is associated with the part. When binding, only the data of
+// referenced PRs is pulled into the resulting binary.
+//
+// Both approaches are used, which means that the equivalent of a section in 
ELF
+// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain
+// sections are fine with just defining SD/ED symbols. The SymbolMapper takes
+// care of all those details.
+
+// Attributes for SD symbols.
+struct SDAttr {
+  GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for ED symbols.
+struct EDAttr {
+  bool IsReadOnly = false;
+  GOFF::ESDRmode Rmode;
+  GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+  GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented;
+  GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate;
+  GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial;
+  GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0;
+  GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword;
+};
+
+// Attributes for LD symbols.
+struct LDAttr {
+  bool IsRenamable = false;
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong;
+  GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink;
+  GOFF::ESDAmode Amode;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for PR symbols.
+struct PRAttr {
+  bool IsRenamable = false;
+  bool IsReadOnly = false; //  Not documented.

uweigand wrote:

Does it ever make sense for this value to differ from the ED value?  Or is this 
one of those attributes that should be copied from the element to the part?   
It doesn't seem possible to specific differing values in HLASM ...

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -0,0 +1,106 @@
+//===- MCGOFFAttributes.h - Attributes of GOFF symbols 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Defines the various attribute collections defining GOFF symbols.
+//
+//===--===//
+
+#ifndef LLVM_MC_MCGOFFATTRIBUTES_H
+#define LLVM_MC_MCGOFFATTRIBUTES_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/GOFF.h"
+
+namespace llvm {
+namespace GOFF {
+// An "External Symbol Definition" in the GOFF file has a type, and depending 
on
+// the type a different subset of the fields is used.
+//
+// Unlike other formats, a 2 dimensional structure is used to define the
+// location of data. For example, the equivalent of the ELF .text section is
+// made up of a Section Definition (SD) and a class (Element Definition; ED).
+// The name of the SD symbol depends on the application, while the class has 
the
+// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively.
+//
+// Data can be placed into this structure in 2 ways. First, the data (in a text
+// record) can be associated with an ED symbol. To refer to data, a Label
+// Definition (LD) is used to give an offset into the data a name. When 
binding,
+// the whole data is pulled into the resulting executable, and the addresses
+// given by the LD symbols are resolved.
+//
+// The alternative is to use a Part Definition (PR). In this case, the data (in
+// a text record) is associated with the part. When binding, only the data of
+// referenced PRs is pulled into the resulting binary.
+//
+// Both approaches are used, which means that the equivalent of a section in 
ELF
+// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain
+// sections are fine with just defining SD/ED symbols. The SymbolMapper takes
+// care of all those details.
+
+// Attributes for SD symbols.
+struct SDAttr {
+  GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for ED symbols.
+struct EDAttr {
+  bool IsReadOnly = false;
+  GOFF::ESDRmode Rmode;
+  GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName;
+  GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented;
+  GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate;
+  GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial;
+  GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0;
+  GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword;
+};
+
+// Attributes for LD symbols.
+struct LDAttr {
+  bool IsRenamable = false;
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong;
+  GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink;
+  GOFF::ESDAmode Amode;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+};
+
+// Attributes for PR symbols.
+struct PRAttr {
+  bool IsRenamable = false;
+  bool IsReadOnly = false; //  Not documented.
+  GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified;
+  GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink;
+  GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified;
+  GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Byte;

uweigand wrote:

Same question for the alignment, can it ever differ from the ED alignment?   
How would you specify this in HLASM?

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits


@@ -223,13 +197,95 @@ void GOFFOstream::finalizeRecord() {
 }
 
 namespace {
+// A GOFFSymbol holds all the data required for writing an ESD record.
+class GOFFSymbol {
+public:
+  std::string Name;
+  uint32_t EsdId;
+  uint32_t ParentEsdId;
+  uint64_t Offset = 0; // Offset of the symbol into the section. LD only.
+   // Offset is only 32 bit, the larger type is used to
+   // enable error checking.
+  GOFF::ESDSymbolType SymbolType;
+  GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_ProgramManagementBinder;
+
+  GOFF::BehavioralAttributes BehavAttrs;
+  GOFF::SymbolFlags SymbolFlags;
+  uint32_t SortKey = 0;
+  uint32_t SectionLength = 0;
+  uint32_t ADAEsdId = 0;
+  uint32_t EASectionEDEsdId = 0;
+  uint32_t EASectionOffset = 0;
+  uint8_t FillByteValue = 0;
+
+  GOFFSymbol() : EsdId(0), ParentEsdId(0) {}
+
+  GOFFSymbol(StringRef Name, uint32_t EsdID, const GOFF::SDAttr &Attr)
+  : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(0),
+SymbolType(GOFF::ESD_ST_SectionDefinition) {
+BehavAttrs.setTaskingBehavior(Attr.TaskingBehavior);
+BehavAttrs.setBindingScope(Attr.BindingScope);
+  }
+
+  GOFFSymbol(StringRef Name, uint32_t EsdID, uint32_t ParentEsdID,
+ const GOFF::EDAttr &Attr)
+  : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(ParentEsdID),
+SymbolType(GOFF::ESD_ST_ElementDefinition) {
+this->NameSpace = Attr.NameSpace;
+// TODO Do we need/should set the "mangled" flag?
+SymbolFlags.setFillBytePresence(1);

uweigand wrote:

Oh, and one more thing here: by hard-coding this here, we're not emitting any 
HLASM output.   Should we emit a CATTR FILL(0) then as well?   (But that's only 
supported on the PART apparently, while the fill byte is supposed to go on the 
ED?)

https://github.com/llvm/llvm-project/pull/133799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][test] Fix callcont-fallthru.s after #129481 (PR #135867)

2025-04-25 Thread Paschalis Mpeis via llvm-branch-commits

paschalis-mpeis wrote:

Hey folks, any updates on this?

I spent some time experimenting with @MaskRay's suggestion. I used a mock libc 
shared object that had a `puts` symbol.
Indeed there won't be unresolved symbols now, however, still GNU `nm` doesn't 
show a PLT entry when using `--synthetic` .


https://github.com/llvm/llvm-project/pull/135867
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancel sections to LLVMIR (PR #137193)

2025-04-25 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah updated 
https://github.com/llvm/llvm-project/pull/137193

>From 4d28bd8f0106321af6679e9b155fd36ef2f919fc Mon Sep 17 00:00:00 2001
From: Tom Eccles 
Date: Thu, 10 Apr 2025 11:43:18 +
Subject: [PATCH] [mlir][OpenMP] Convert omp.cancel sections to LLVMIR

This is quite ugly but it is the best I could think of. The old
FiniCBWrapper was way too brittle depending upon the exact block
structure inside of the section, and could be confused by any control
flow in the section (e.g. an if clause on cancel). The wording in the
comment and variable names didn't seem to match where it was actually
branching too as well.

Clang's (non-OpenMPIRBuilder) lowering for cancel inside of sections
branches to a block containing __kmpc_for_static_fini.

This was hard to achieve here because sometimes the FiniCBWrapper has to
run before the worksharing loop finalization has been crated.

To get around this ordering issue I created a dummy branch to a dummy
block, which is then fixed later once all of the information is
available.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 27 ---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  |  6 +-
 mlir/test/Target/LLVMIR/openmp-cancel.mlir| 76 +++
 mlir/test/Target/LLVMIR/openmp-todo.mlir  | 16 
 4 files changed, 97 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index be05f01c94603..3f19088e6c73d 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2172,6 +2172,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
   if (!updateToLocation(Loc))
 return Loc.IP;
 
+  // FiniCBWrapper needs to create a branch to the loop finalization block, but
+  // this has not been created yet at some times when this callback runs.
+  SmallVector CancellationBranches;
   auto FiniCBWrapper = [&](InsertPointTy IP) {
 if (IP.getBlock()->end() != IP.getPoint())
   return FiniCB(IP);
@@ -2179,16 +2182,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
 // will fail because that function requires the Finalization Basic Block to
 // have a terminator, which is already removed by EmitOMPRegionBody.
 // IP is currently at cancelation block.
-// We need to backtrack to the condition block to fetch
-// the exit block and create a branch from cancelation
-// to exit block.
-IRBuilder<>::InsertPointGuard IPG(Builder);
-Builder.restoreIP(IP);
-auto *CaseBB = IP.getBlock()->getSinglePredecessor();
-auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
-auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
-Instruction *I = Builder.CreateBr(ExitBB);
-IP = InsertPointTy(I->getParent(), I->getIterator());
+BranchInst *DummyBranch = Builder.CreateBr(IP.getBlock());
+IP = InsertPointTy(DummyBranch->getParent(), DummyBranch->getIterator());
+CancellationBranches.push_back(DummyBranch);
 return FiniCB(IP);
   };
 
@@ -2251,6 +2247,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
 return WsloopIP.takeError();
   InsertPointTy AfterIP = *WsloopIP;
 
+  BasicBlock *LoopFini = AfterIP.getBlock()->getSinglePredecessor();
+  assert(LoopFini && "Bad structure of static workshare loop finalization");
+
   // Apply the finalization callback in LoopAfterBB
   auto FiniInfo = FinalizationStack.pop_back_val();
   assert(FiniInfo.DK == OMPD_sections &&
@@ -2264,6 +2263,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
 AfterIP = {FiniBB, FiniBB->begin()};
   }
 
+  // Now we can fix the dummy branch to point to the right place
+  if (!CancellationBranches.empty()) {
+for (BranchInst *DummyBranch : CancellationBranches) {
+  assert(DummyBranch->getNumSuccessors() == 1);
+  DummyBranch->setSuccessor(0, LoopFini);
+}
+  }
+
   return AfterIP;
 }
 
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a29f9b7e2f96f..473688acdcd69 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -161,7 +161,8 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
   auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
 omp::ClauseCancellationConstructType cancelledDirective =
 op.getCancelDirective();
-if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel)
+if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel &&
+cancelledDirective != omp::ClauseCancellationConstructType::Sections)
   result = todo("cancel directive construct type not yet supported");
   };
   auto checkDepend = [&todo](auto op, LogicalResult &result) {
@@ 

[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/137314

>From ee561eefea8e236d9c5d7e40f4b82d662df5f873 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sat, 19 Apr 2025 21:11:23 +0200
Subject: [PATCH 1/2] IR: Remove reference counts from ConstantData

This is a follow up change to eliminating uselists for ConstantData.
In the previous revision, ConstantData had a replacement reference count
instead of a uselist. This reference count was misleading, and not useful
in the same way as it would be for another value. The references may not
have even been in the current module, since these are shared throughout
the LLVMContext.

This doesn't space leak any more than we previously did; nothing was
attempting to garbage collect unused constants.

Previously the use_empty, and hasNUses type of APIs were supported through
the reference count. These now behave as if the uses are always empty.
Ideally it would be illegal to inspect these, but this forces API complexity
into quite a few places. It may be doable to make it illegal to check these
counts, but I would like there to be a targeted fuzzing effort to make sure
every transform properly deals with a constant in every operand position.

All tests pass if I turn the hasNUses* and getNumUses queries into assertions,
only hasOneUse in particular appears to hit in some set of contexts. I've
added unit tests to ensure logical consistency between these cases
---
 llvm/docs/ReleaseNotes.md  |   4 +-
 llvm/include/llvm/IR/Constants.h   |   3 +-
 llvm/include/llvm/IR/Use.h |   9 +-
 llvm/include/llvm/IR/Value.h   | 118 +++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp |   2 +-
 llvm/lib/IR/AsmWriter.cpp  |   3 +-
 llvm/lib/IR/Instruction.cpp|   4 +-
 llvm/lib/IR/Value.cpp  |  28 +++--
 llvm/unittests/IR/ConstantsTest.cpp|  36 +++
 9 files changed, 100 insertions(+), 107 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 411cefe004e16..4665302a4144c 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 --
 
-* It is no longer permitted to inspect the uses of ConstantData
+* It is no longer permitted to inspect the uses of ConstantData. Use
+  count APIs will behave as if they have no uses (i.e. use_empty() is
+  always true).
 
 * The `nocapture` attribute has been replaced by `captures(none)`.
 * The constant expression variants of the following instructions have been
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 7b1dbdece43f7..07d71cf7108d2 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -51,7 +51,8 @@ template  struct ConstantAggrKeyType;
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
 ///
-/// These do not have use lists. It is illegal to inspect the uses.
+/// These do not have use lists. It is illegal to inspect the uses. These 
behave
+/// as if they have no uses (i.e. use_empty() is always true).
 class ConstantData : public Constant {
   constexpr static IntrusiveOperandsAllocMarker AllocMarker{0};
 
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index bcd1fd6677497..dc22d69ba561d 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -23,7 +23,6 @@
 namespace llvm {
 
 template  struct simplify_type;
-class ConstantData;
 class User;
 class Value;
 
@@ -43,7 +42,7 @@ class Use {
 
 private:
   /// Destructor - Only for zap()
-  ~Use();
+  ~Use() { removeFromList(); }
 
   /// Constructor
   Use(User *Parent) : Parent(Parent) {}
@@ -85,10 +84,8 @@ class Use {
   Use **Prev = nullptr;
   User *Parent = nullptr;
 
-  inline void addToList(unsigned &Count);
-  inline void addToList(Use *&List);
-  inline void removeFromList(unsigned &Count);
-  inline void removeFromList(Use *&List);
+  inline void addToList(Use **List);
+  inline void removeFromList();
 };
 
 /// Allow clients to treat uses just like values when using
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 180b6238eda6c..ae874304c4316 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -116,10 +116,7 @@ class Value {
 
 private:
   Type *VTy;
-  union {
-Use *List = nullptr;
-unsigned Count;
-  } Uses;
+  Use *UseList = nullptr;
 
   friend class ValueAsMetadata; // Allow access to IsUsedByMD.
   friend class ValueHandleBase; // Allow access to HasValueHandle.
@@ -347,23 +344,21 @@ class Value {
 
   bool use_empty() const {
 assertModuleIsMaterialized();
-return hasUseList() ? Uses.List == nullptr : Uses.Count == 0;
+return UseList == nullptr;
   }
 
-  bool materialized_use_empty() const {
-

[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/137338

The previous statement removed all uses.

>From c62d35ffa390f7bcceb6d81d76e6dd178e68b847 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 25 Apr 2025 16:20:32 +0200
Subject: [PATCH] ARM: Remove unnecessary use_empty check

The previous statement removed all uses.
---
 llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp 
b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index e998359bd3496..ce59ae0c95dcf 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -1099,8 +1099,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value 
*Offsets, BasicBlock *BB,
 
   // The instruction has now been "absorbed" into the phi value
   Offs->replaceAllUsesWith(NewPhi);
-  if (Offs->use_empty())
-Offs->eraseFromParent();
+  Offs->eraseFromParent();
   // Clean up the old increment in case it's unused because we built a new
   // one
   if (IncInstruction->use_empty())

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/137338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#137338** https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#137337** https://app.graphite.dev/github/pr/llvm/llvm-project/137337?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/137338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: Matt Arsenault (arsenm)


Changes

The previous statement removed all uses.

---
Full diff: https://github.com/llvm/llvm-project/pull/137338.diff


1 Files Affected:

- (modified) llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp (+1-2) 


``diff
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp 
b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index e998359bd3496..ce59ae0c95dcf 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -1099,8 +1099,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value 
*Offsets, BasicBlock *BB,
 
   // The instruction has now been "absorbed" into the phi value
   Offs->replaceAllUsesWith(NewPhi);
-  if (Offs->use_empty())
-Offs->eraseFromParent();
+  Offs->eraseFromParent();
   // Clean up the old increment in case it's unused because we built a new
   // one
   if (IncInstruction->use_empty())

``




https://github.com/llvm/llvm-project/pull/137338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][OpenMP] convert wsloop cancellation to LLVMIR (PR #137194)

2025-04-25 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah updated 
https://github.com/llvm/llvm-project/pull/137194

>From bb1af0154c62a622775f0f05cbdc486de69266c9 Mon Sep 17 00:00:00 2001
From: Tom Eccles 
Date: Tue, 15 Apr 2025 15:05:50 +
Subject: [PATCH] [mlir][OpenMP] convert wsloop cancellation to LLVMIR

Taskloop support will follow in a later patch.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  | 40 -
 mlir/test/Target/LLVMIR/openmp-cancel.mlir| 87 +++
 mlir/test/Target/LLVMIR/openmp-todo.mlir  | 16 
 3 files changed, 125 insertions(+), 18 deletions(-)

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 473688acdcd69..c05bc98ec7be8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -161,8 +161,7 @@ static LogicalResult checkImplementationStatus(Operation 
&op) {
   auto checkCancelDirective = [&todo](auto op, LogicalResult &result) {
 omp::ClauseCancellationConstructType cancelledDirective =
 op.getCancelDirective();
-if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel &&
-cancelledDirective != omp::ClauseCancellationConstructType::Sections)
+if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup)
   result = todo("cancel directive construct type not yet supported");
   };
   auto checkDepend = [&todo](auto op, LogicalResult &result) {
@@ -2356,6 +2355,30 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop
   : llvm::omp::WorksharingLoopType::ForStaticLoop;
 
+  SmallVector cancelTerminators;
+  // This callback is invoked only if there is cancellation inside of the 
wsloop
+  // body.
+  auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error {
+llvm::IRBuilderBase &llvmBuilder = ompBuilder->Builder;
+llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder);
+
+// ip is currently in the block branched to if cancellation occured.
+// We need to create a branch to terminate that block.
+llvmBuilder.restoreIP(ip);
+
+// We must still clean up the wsloop after cancelling it, so we need to
+// branch to the block that finalizes the wsloop.
+// That block has not been created yet so use this block as a dummy for now
+// and fix this after creating the wsloop.
+cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock()));
+return llvm::Error::success();
+  };
+  // We have to add the cleanup to the OpenMPIRBuilder before the body gets
+  // created in case the body contains omp.cancel (which will then expect to be
+  // able to find this cleanup callback).
+  ompBuilder->pushFinalizationCB({finiCB, llvm::omp::Directive::OMPD_for,
+  constructIsCancellable(wsloopOp)});
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::Expected regionBlock = convertOmpOpRegions(
   wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
@@ -2377,6 +2400,19 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase 
&builder,
   if (failed(handleError(wsloopIP, opInst)))
 return failure();
 
+  ompBuilder->popFinalizationCB();
+  if (!cancelTerminators.empty()) {
+// If we cancelled the loop, we should branch to the finalization block of
+// the wsloop (which is always immediately before the loop continuation
+// block). Now the finalization has been created, we can fix the branch.
+llvm::BasicBlock *wsloopFini = 
wsloopIP->getBlock()->getSinglePredecessor();
+for (llvm::BranchInst *cancelBranch : cancelTerminators) {
+  assert(cancelBranch->getNumSuccessors() == 1 &&
+ "cancel branch should have one target");
+  cancelBranch->setSuccessor(0, wsloopFini);
+}
+  }
+
   // Process the reductions if required.
   if (failed(createReductionsAndCleanup(
   wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
diff --git a/mlir/test/Target/LLVMIR/openmp-cancel.mlir 
b/mlir/test/Target/LLVMIR/openmp-cancel.mlir
index fca16b936fc85..3c195a98d1000 100644
--- a/mlir/test/Target/LLVMIR/openmp-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-cancel.mlir
@@ -156,3 +156,90 @@ llvm.func @cancel_sections_if(%cond : i1) {
 // CHECK: ret void
 // CHECK:   .cncl:; preds = 
%[[VAL_27]]
 // CHECK: br label %[[VAL_19]]
+
+llvm.func @cancel_wsloop_if(%lb : i32, %ub : i32, %step : i32, %cond : i1) {
+  omp.wsloop {
+omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+  omp.cancel cancellation_construct_type(loop) if(%cond)
+  omp.yield
+}
+  }
+  llvm.return
+}
+// CHECK-LABEL: define void @cancel_wsloop_if
+// CHECK: %[[VAL_0:.*]] = alloca i32, align 4
+// CHECK: 

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/133627

>From 2ee2c34b9da4fc887d33e0f6eb5402d60fdd30c3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Mar 2025 14:33:36 +0700
Subject: [PATCH 1/2] llvm-reduce: Reduce with early return of arguments

Extend the instruction -> return reduction with one that inserts
return of function arguments. Not sure how useful this really is. This
has more freedom since we could insert the return anywhere in the function,
but this just inserts the return in the entry block.
---
 .../reduce-values-to-return-args.ll   | 77 +++
 llvm/tools/llvm-reduce/DeltaPasses.def|  6 +-
 .../deltas/ReduceValuesToReturn.cpp   | 42 +-
 .../llvm-reduce/deltas/ReduceValuesToReturn.h |  1 +
 4 files changed, 122 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
new file mode 100644
index 0..abbc643822033
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
@@ -0,0 +1,77 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=arguments-to-return --test FileCheck --test-arg 
--check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefixes=RESULT %s < %t
+
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr 
%ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) {
+  store i32 %arg, ptr %ptr
+  ret void
+}
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 
%arg, ptr %ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 
@move_entry_block_use_argument_to_return_existing_ret(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr 
%ptr) {
+  store i32 %arg, ptr %ptr
+  ret i32 0
+}
+
+; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr 
%ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return(
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg
+define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr 
%ptr1, i1 %cond0, i1 %cond1) {
+entry:
+  br i1 %cond0, label %bb0, label %bb1
+
+bb0:
+  %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ]
+  store i32 %arg, ptr %ptr0
+  store i32 %phi, ptr %ptr1
+  br label %bb1
+
+bb1:
+  br i1 %cond1, label %bb0, label %bb2
+
+bb2:
+  ret void
+}
+
+; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) {
+; INTERESTING: %arg1
+
+; RESULT-LABEL: define ptr @keep_second_arg(
+; RESULT-NEXT: ret ptr %arg1
+; RESULT-NEXT: }
+define void @keep_second_arg(i32 %arg0, ptr %arg1) {
+  store i32 %arg0, ptr %arg1
+  ret void
+}
+
+; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+; INTERESTING: i32 %arg2
+
+; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 
%arg2) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg2
+define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+entry:
+  br i1 %arg0, label %bb0, label %bb1
+
+bb0:
+  store i32 %arg2, ptr %arg1
+  ret void
+
+bb1:
+  ret void
+}
diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def 
b/llvm/tools/llvm-reduce/DeltaPasses.def
index 421e4472006b6..3aed0ccd74b84 100644
--- a/llvm/tools/llvm-reduce/DeltaPasses.def
+++ b/llvm/tools/llvm-reduce/DeltaPasses.def
@@ -49,7 +49,11 @@ DELTA_PASS_IR("attributes", reduceAttributesDeltaPass, 
"Reducing Attributes")
 DELTA_PASS_IR("target-features-attr", reduceTargetFeaturesAttrDeltaPass, 
"Reducing target-features")
 DELTA_PASS_IR("module-data", reduceModuleDataDeltaPass, "Reducing Module Data")
 DELTA_PASS_IR("opcodes", reduceOpcodesDeltaPass, "Reducing Opcodes")
-DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, 
"Early return of instructions")
+
+DELTA_PASS_IR("arguments-to-return", reduceArgumentsToReturnDeltaPass,
+  "Converting arguments to function return value")
+DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass,
+  "Early return of instructions")
 DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing 
Volatile Instructions")
 DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing 
Atomic Ordering")
 DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic 
Sync Scopes")
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp 
b/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp
index 9ee0af3e1a69b..3e400ffc89482 100644
--- a/llvm/tools/llvm-reduce/de

[llvm-branch-commits] [llvm] [Attributor] Use `getAssumedAddrSpace` to get address space for `AllocaInst` (PR #136865)

2025-04-25 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian closed 
https://github.com/llvm/llvm-project/pull/136865
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-reduce: Change function return types if function is not called (PR #134035)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/134035

>From d65f9ca48d04cf36380bc43840527065195d9e4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 2 Apr 2025 11:45:24 +0700
Subject: [PATCH 1/4] llvm-reduce: Change function return types if function is
 not called

Extend the early return on value reduction to mutate the function return
type if the function has no call uses. This could be generalized to rewrite
cases where all callsites are used, but it turns out that complicates the
visitation order given we try to compute all opportunities up front.

This is enough to cleanup the common case where we end up with one
function with a return of an uninteresting constant.
---
 .../reduce-instructions-to-return.ll  | 20 +++-
 ...reduce-values-to-return-new-return-type.ll | 95 +++
 .../deltas/ReduceValuesToReturn.cpp   |  7 +-
 3 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll 
b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
index 2af87aad05169..77501418f5283 100644
--- a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
+++ b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll
@@ -196,13 +196,25 @@ define i32 @callsite_already_new_return_type(ptr %arg) {
 ; INTERESTING: ret
 
 ; RESULT-LABEL: define ptr @non_void_no_op(
-; RESULT: ret ptr null
+; RESULT-NEXT: %load = load i32, ptr %arg
+; RESULT-NEXT: store i32 %load, ptr @gv
+; RESULT-NEXT: ret ptr null
 define ptr @non_void_no_op(ptr %arg) {
   %load = load i32, ptr %arg
   store i32 %load, ptr @gv
   ret ptr null
 }
 
+; INTERESTING-LABEL: @non_void_no_op_caller(
+
+; RESULT-LABEL: define ptr @non_void_no_op_caller(ptr %arg) {
+; RESULT-NEXT: %call = call ptr @non_void_no_op(ptr %arg)
+; RESULT-NEXT: ret ptr %call
+define ptr @non_void_no_op_caller(ptr %arg) {
+  %call = call ptr @non_void_no_op(ptr %arg)
+  ret ptr %call
+}
+
 ; INTERESTING-LABEL: @non_void_same_type_use(
 ; INTERESTING: = load
 ; INTERESTING: ret
@@ -230,6 +242,12 @@ define i32 @non_void_bitcastable_type_use(ptr %arg) {
   ret i32 0
 }
 
+; INTERESTING-LABEL: @non_void_bitcastable_type_use_caller(
+define i32 @non_void_bitcastable_type_use_caller(ptr %arg) {
+  %ret = call i32 @non_void_bitcastable_type_use(ptr %arg)
+  ret i32 %ret
+}
+
 ; INTERESTING-LABEL: @form_return_struct(
 ; INTERESTING: = load { i32, float }
 
diff --git 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll
new file mode 100644
index 0..9ddbbe3def44f
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll
@@ -0,0 +1,95 @@
+; Test that llvm-reduce can move intermediate values by inserting
+; early returns when the function already has a different return type
+;
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+
+@gv = global i32 0, align 4
+@ptr_array = global [2 x ptr] [ptr 
@inst_to_return_has_different_type_but_no_func_call_use,
+   ptr @multiple_callsites_wrong_return_type]
+
+; Should rewrite this return from i64 to i32 since the function has no
+; uses.
+; INTERESTING-LABEL: @inst_to_return_has_different_type_but_no_func_call_use(
+; RESULT-LABEL: define i32 
@inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) {
+; RESULT-NEXT: %load = load i32, ptr %arg, align 4
+; RESULT-NEXT: ret i32 %load
+define i64 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) {
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i64 0
+}
+
+; INTERESTING-LABEL: @callsite_different_type_unused_0(
+; RESULT-LABEL: define i64 
@inst_to_return_has_different_type_but_call_result_unused(
+; RESULT-NEXT: %load = load i32, ptr %arg
+; RESULT-NEXT: store i32 %load, ptr @gv
+; RESULT-NEXT: ret i64 0
+define void @callsite_different_type_unused_0(ptr %arg) {
+  %unused0 = call i64 
@inst_to_return_has_different_type_but_call_result_unused(ptr %arg)
+  %unused1 = call i64 
@inst_to_return_has_different_type_but_call_result_unused(ptr null)
+  ret void
+}
+
+; TODO: Could rewrite this return from i64 to i32 since the callsite is unused.
+; INTERESTING-LABEL: @inst_to_return_has_different_type_but_call_result_unused(
+; RESULT-LABEL: define i64 
@inst_to_return_has_different_type_but_call_result_unused(
+; RESULT: ret i64 0
+define i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) 
{
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i64 0
+}
+
+; INTERESTING-LABEL: @multiple_callsites_wrong_return_type(
+; RESULT-LABEL: define i64 @multipl

[llvm-branch-commits] [llvm] llvm-reduce: Support exotic terminators in instructions-to-return (PR #134794)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/134794

>From 18d8c3083affbeb9d54ac5e558f427dcfd9da300 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 8 Apr 2025 11:16:01 +0700
Subject: [PATCH] llvm-reduce: Support exotic terminators in
 instructions-to-return

Use splitBasicBlock and avoid directly dealing with the specific of
how to trim the existing terminators. We just need to deal with
unconditional branch to return.
---
 .../reduce-values-to-return-callbr.ll | 54 ++
 .../reduce-values-to-return-invoke.ll | 56 +++
 .../llvm-reduce/remove-bb-switch-default.ll   |  6 +-
 .../deltas/ReduceValuesToReturn.cpp   | 50 -
 llvm/tools/llvm-reduce/deltas/Utils.cpp   |  2 +-
 5 files changed, 126 insertions(+), 42 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
 create mode 100644 
llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
new file mode 100644
index 0..da2f225f0405b
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll
@@ -0,0 +1,54 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+@gv = global i32 0, align 4
+
+; INTERESTING-LABEL: @callbr0(
+; INTERESTING: %load0 = load i32, ptr %arg0
+; INTERESTING: store i32 %load0, ptr @gv
+
+; RESULT-LABEL: define void @callbr0(ptr %arg0) {
+; RESULT: %load0 = load i32, ptr %arg0, align 4
+; RESULT-NEXT: %callbr = callbr i32 asm
+define void @callbr0(ptr %arg0) {
+entry:
+  %load0 = load i32, ptr %arg0
+  %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0)
+  to label %one [label %two, label %three]
+one:
+  store i32 %load0, ptr @gv
+  ret void
+
+two:
+  store i32 %load0, ptr @gv
+  ret void
+
+three:
+  store i32 %load0, ptr @gv
+  ret void
+}
+
+; INTERESTING-LABEL: @callbr1(
+; INTERESTING: %load0 = load i32, ptr %arg0
+
+; RESULT-LABEL: define i32 @callbr1(ptr %arg0) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: %load0 = load i32, ptr %arg0
+; RESULT-NEXT: ret i32 %load0
+define void @callbr1(ptr %arg0) {
+entry:
+  %load0 = load i32, ptr %arg0
+  %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0)
+  to label %one [label %two, label %three]
+one:
+  store i32 %load0, ptr @gv
+  ret void
+
+two:
+  store i32 %load0, ptr @gv
+  ret void
+
+three:
+  store i32 %load0, ptr @gv
+  ret void
+}
diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll
new file mode 100644
index 0..efa1e5377160e
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll
@@ -0,0 +1,56 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefix=RESULT %s < %t
+
+@gv = global i32 0, align 4
+
+
+define i32 @has_invoke_user(ptr %arg) {
+  %load = load i32, ptr %arg
+  store i32 %load, ptr @gv
+  ret i32 9
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; INTERESTING-LABEL: @invoker_keep_invoke(
+; INTERESTING: %invoke
+; RESULT:   %invoke = invoke i32 @has_invoke_user(ptr %arg)
+define void @invoker_keep_invoke(ptr %arg) personality ptr 
@__gxx_personality_v0 {
+bb:
+  %invoke = invoke i32 @has_invoke_user(ptr %arg)
+to label %bb3 unwind label %bb1
+
+bb1:
+  landingpad { ptr, i32 }
+  catch ptr null
+  ret void
+
+bb3:
+  store i32 %invoke, ptr null
+  ret void
+}
+
+; INTERESTING-LABEL: @invoker_drop_invoke(
+; INTERESTING: %add = add i32
+
+; RESULT-LABEL: define i32 @invoker_drop_invoke(i32 %arg0, ptr %arg1) 
personality ptr @__gxx_personality_v0 {
+; RESULT-NEXT: bb:
+; RESULT-NEXT: %add = add i32 %arg0, 9
+; RESULT-NEXT: ret i32 %add
+; RESULT-NEXT: }
+define void @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr 
@__gxx_personality_v0 {
+bb:
+  %add = add i32 %arg0, 9
+  %invoke = invoke i32 @has_invoke_user(ptr %arg1)
+to label %bb3 unwind label %bb1
+
+bb1:
+  landingpad { ptr, i32 }
+  catch ptr null
+  br label %bb3
+
+bb3:
+  %phi = phi i32 [ %invoke, %bb ], [ %add, %bb1 ]
+  store i32 %phi, ptr null
+  ret void
+}
diff --git a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll 
b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
index b509d1181f74d..27e599e45e9a3 100644
--- a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
+++ b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll
@@ -16,13 +16,14 @@
 ; RESULT0-NEXT: br i1 %arg0, label %bb1, label %bb2
 
 ; RESULT0: bb1:
-; RESULT0: %bb1.phi = 

[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Apr 25, 11:24 AM EDT**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/137338).


https://github.com/llvm/llvm-project/pull/137338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)

2025-04-25 Thread Tim Gymnich via llvm-branch-commits

https://github.com/tgymnich approved this pull request.


https://github.com/llvm/llvm-project/pull/137338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)

2025-04-25 Thread via llvm-branch-commits

https://github.com/thetruestblue edited 
https://github.com/llvm/llvm-project/pull/137353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The elements are
placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that its concat_vectors can be
mapped to a BUILD_VECTOR and so unused elements are no longer
referenced.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 .../SelectionDAG/SelectionDAGBuilder.cpp  |   6 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  29 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++
 6 files changed, 69 insertions(+), 187 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c183149b0863a..6ae1d019cad28 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1840,7 +1840,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2278,7 +2278,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9da2ba04f77cb..545da0a1fbfab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12264,7 +12264,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12957,17 +12957,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->get

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From 51488b849e778d9cf7e56491a289d2d908bbf727 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic
 load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 108 ++
 llvm/lib/Target/X86/X86InstrCompiler.td   |   7 ++
 llvm/test/CodeGen/X86/atomic-load-store.ll|  81 +
 llvm/test/CodeGen/X86/atomic-unordered.ll |   3 +-
 5 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 05387afaf840d..6733d4c7d31e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5982,6 +5985,89 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+   TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl,
+   SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  } else if (FirstVT == WidenVT)
+return LdOp;
+  else {
+// TODO: We don't currently have any tests that exercise this code path.
+assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+unsigned NumConcat =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+SmallVector ConcatOps(NumConcat);
+SDValue UndefVal = DAG.getUNDEF(FirstVT);
+ConcatOps[0] = LdOp;
+for (unsigned i = 1; i != NumConcat; ++i)
+  ConcatOps[i] = UndefVal;
+return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+  }
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+  (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : 
LD->getAlign().value();
+
+  // Find the vector type that can load from.
+  st

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From bf3f6b0a6246e9e4890b320517b5f8333e638236 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG][X86] Split via Concat  vector types for
 atomic load

Vector types that aren't widened are 'split' via CONCAT_VECTORS
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  32 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 204 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..7905f5a94c579 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6733d4c7d31e8..9bbf583691106 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1423,6 +1426,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  SDLoc dl(LD);
+
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned NumElts = MemoryVT.getVectorMinNumElements();
+
+  EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
+  EVT ElemVT =
+  EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1);
+
+  // Create a single atomic to load all the elements at once.
+  SDValue Atomic =
+  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT,
+LD->getChain(), LD->getBasePtr(), LD->getMemOperand());
+
+  // Instead of splitting, put all the elements back into a vector.
+  SmallVector Ops;
+  for (unsigned i = 0; i < NumElts; ++i) {
+SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic,
+  DAG.getVectorIdxConstant(i, dl));
+Elt = DAG.getBitcast(ElemVT, Elt);
+Ops.push_back(Elt);
+  }
+  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops);
+
+  ReplaceValueWith(SDValue(LD, 0), Concat);
+  ReplaceValueWith(SDValue(LD, 1), LD->getChain());
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 935d058a52f8f..42b0955824293 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl $16, %ecx
+; CHECK0-NEXT:movw %cx, %dx
+; CHECK0-NEXT:## implicit-def: $ecx
+; CHECK0-NEXT:movw %dx, %cx
+; CHECK0-NEXT:## implicit-def: $xmm1
+; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1
+; CHECK0-NEXT:movw %ax, %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; 

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The elements are
placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that its concat_vectors can be
mapped to a BUILD_VECTOR and so unused elements are no longer
referenced.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 .../SelectionDAG/SelectionDAGBuilder.cpp  |   6 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  29 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++
 6 files changed, 69 insertions(+), 187 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c183149b0863a..6ae1d019cad28 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1840,7 +1840,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2278,7 +2278,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9da2ba04f77cb..545da0a1fbfab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12264,7 +12264,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12957,17 +12957,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->get

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54da9fe3c6a40..05387afaf840d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -459,6 +462,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomic(
+  ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(), 
N->getBasePtr(),
+  N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54da9fe3c6a40..05387afaf840d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -459,6 +462,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomic(
+  ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(), 
N->getBasePtr(),
+  N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +-
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a3e9700fa3089..e84d25afe620d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  if (I->getType()->getScalarType()->isPointerTy() &&
+  I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
+unsigned AS =
+
cast(I->getType()->getScalarType())->getAddressSpace();
+ElementCount EC = cast(I->getType())->getElementCount();
+Value *BC = Builder.CreateBitCast(
+Result,
+VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)),
+EC));
+Value *IntToPtr = Builder.CreateIntToPtr(
+BC, VectorType::get(PointerType::get(Ctx, AS), EC));
+V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..36c1305a7c5df 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 08d0405345f57..4293df8c13571 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LAB

[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From 40f307cde75fdb15b97a38c89e5ee0d41dc09d8c Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +-
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a3e9700fa3089..e84d25afe620d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  if (I->getType()->getScalarType()->isPointerTy() &&
+  I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
+unsigned AS =
+
cast(I->getType()->getScalarType())->getAddressSpace();
+ElementCount EC = cast(I->getType())->getElementCount();
+Value *BC = Builder.CreateBitCast(
+Result,
+VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)),
+EC));
+Value *IntToPtr = Builder.CreateIntToPtr(
+BC, VectorType::get(PointerType::get(Ctx, AS), EC));
+V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..36c1305a7c5df 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 08d0405345f57..4293df8c13571 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LAB

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From bf3f6b0a6246e9e4890b320517b5f8333e638236 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG][X86] Split via Concat  vector types for
 atomic load

Vector types that aren't widened are 'split' via CONCAT_VECTORS
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  32 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 204 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..7905f5a94c579 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6733d4c7d31e8..9bbf583691106 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1423,6 +1426,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  SDLoc dl(LD);
+
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned NumElts = MemoryVT.getVectorMinNumElements();
+
+  EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
+  EVT ElemVT =
+  EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1);
+
+  // Create a single atomic to load all the elements at once.
+  SDValue Atomic =
+  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT,
+LD->getChain(), LD->getBasePtr(), LD->getMemOperand());
+
+  // Instead of splitting, put all the elements back into a vector.
+  SmallVector Ops;
+  for (unsigned i = 0; i < NumElts; ++i) {
+SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic,
+  DAG.getVectorIdxConstant(i, dl));
+Elt = DAG.getBitcast(ElemVT, Elt);
+Ops.push_back(Elt);
+  }
+  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops);
+
+  ReplaceValueWith(SDValue(LD, 0), Concat);
+  ReplaceValueWith(SDValue(LD, 1), LD->getChain());
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 935d058a52f8f..42b0955824293 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl $16, %ecx
+; CHECK0-NEXT:movw %cx, %dx
+; CHECK0-NEXT:## implicit-def: $ecx
+; CHECK0-NEXT:movw %dx, %cx
+; CHECK0-NEXT:## implicit-def: $xmm1
+; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1
+; CHECK0-NEXT:movw %ax, %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; 

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54da9fe3c6a40..05387afaf840d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -459,6 +462,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomic(
+  ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(), 
N->getBasePtr(),
+  N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +-
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a3e9700fa3089..e84d25afe620d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  if (I->getType()->getScalarType()->isPointerTy() &&
+  I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
+unsigned AS =
+
cast(I->getType()->getScalarType())->getAddressSpace();
+ElementCount EC = cast(I->getType())->getElementCount();
+Value *BC = Builder.CreateBitCast(
+Result,
+VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)),
+EC));
+Value *IntToPtr = Builder.CreateIntToPtr(
+BC, VectorType::get(PointerType::get(Ctx, AS), EC));
+V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..36c1305a7c5df 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 08d0405345f57..4293df8c13571 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LAB

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From 51488b849e778d9cf7e56491a289d2d908bbf727 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic
 load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 108 ++
 llvm/lib/Target/X86/X86InstrCompiler.td   |   7 ++
 llvm/test/CodeGen/X86/atomic-load-store.ll|  81 +
 llvm/test/CodeGen/X86/atomic-unordered.ll |   3 +-
 5 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 05387afaf840d..6733d4c7d31e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5982,6 +5985,89 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+   TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl,
+   SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  } else if (FirstVT == WidenVT)
+return LdOp;
+  else {
+// TODO: We don't currently have any tests that exercise this code path.
+assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
+unsigned NumConcat =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+SmallVector ConcatOps(NumConcat);
+SDValue UndefVal = DAG.getUNDEF(FirstVT);
+ConcatOps[0] = LdOp;
+for (unsigned i = 1; i != NumConcat; ++i)
+  ConcatOps[i] = UndefVal;
+return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+  }
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+  // Allow wider loads if they are sufficiently aligned to avoid memory faults
+  // and if the original load is simple.
+  unsigned LdAlign =
+  (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : 
LD->getAlign().value();
+
+  // Find the vector type that can load from.
+  st

[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: refactor issue reporting (PR #135662)

2025-04-25 Thread Anatoly Trosinenko via llvm-branch-commits


@@ -263,8 +242,75 @@ struct GenericReport : public Report {
   const BinaryContext &BC) const override;
 };
 
+/// An information about an issue collected on the slower, detailed,
+/// run of an analysis.
+class ExtraInfo {
+public:
+  virtual void print(raw_ostream &OS, const MCInstReference Location) const = 
0;
+
+  virtual ~ExtraInfo() {}
+};
+
+class ClobberingInfo : public ExtraInfo {
+  SmallVector ClobberingInstrs;
+
+public:
+  ClobberingInfo(const ArrayRef Instrs)
+  : ClobberingInstrs(Instrs) {}
+
+  void print(raw_ostream &OS, const MCInstReference Location) const override;
+};
+
+/// A brief version of a report that can be further augmented with the details.
+///
+/// It is common for a particular type of gadget detector to be tied to some
+/// specific kind of analysis. If an issue is returned by that detector, it may
+/// be further augmented with the detailed info in an analysis-specific way,
+/// or just be left as-is (f.e. if a free-form warning was reported).
+template  struct BriefReport {
+  BriefReport(std::shared_ptr Issue,
+  const std::optional RequestedDetails)
+  : Issue(Issue), RequestedDetails(RequestedDetails) {}

atrosinenko wrote:

> If I understand the code correctly, `Report`, `GadgetReport` and 
> `GenericReport` are similar, but `BriefReport` and `DetailedReport` really 
> are something somewhat different.

Yes, these are two groups of classes with misleading common "report" suffix. I 
like the idea of replacing `Report` with `Diagnostic` in `Report`, 
`GadgetReport` and `GenericReport`, thanks! This way, the "report" suffix is 
left for the last two classes which are closer to the issues _reported_ to the 
user.

I'm not sure `DiagnosticUnderConstruction` is better than `BriefReport`, but 
`DetailedReport` can definitely be renamed to `FinalReport` for clarity.

> Assuming all of my guesses above are correct, I'm also not sure if there is a 
> need for a `DetailedReport` class? Couldn't `ExtraInfo` just be added to the 
> `BaseDiagnostic` or `GadgetDiagnostic` classes, which would eliminate the 
> need of one class, and as a result make the diagnostic reporting engine a 
> little bit less complex?

After thinking a bit more, I think the updated structure still makes sense, 
added a detailed explanation as a comment. Thank you for pointing this out!

https://github.com/llvm/llvm-project/pull/135662
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From 40f307cde75fdb15b97a38c89e5ee0d41dc09d8c Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/120385
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits


@@ -462,7 +563,22 @@ class DataflowSrcSafetyAnalysis
 return DFParent::getStateBefore(Inst);
   }
 
-  void run() override { DFParent::run(); }
+  void run() override {
+for (BinaryBasicBlock &BB : Func) {
+  if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) {
+MCInst *LastInstOfChecker = BB.getLastNonPseudoInstr();
+LLVM_DEBUG({
+  dbgs() << "Found pointer checking sequence in " << BB.getName()
+ << ":\n";
+  traceReg(BC, "Checked register", CheckerInfo->first);
+  traceInst(BC, "First instruction", *CheckerInfo->second);
+  traceInst(BC, "Last instruction", *LastInstOfChecker);
+});
+CheckerSequenceInfo[LastInstOfChecker] = *CheckerInfo;
+  }
+}

kbeyls wrote:

Another nit-pick.
This to me looks like it's initializing the `CheckerSequenceInfo` variable of 
the `SrcSafetyAnalysis` parent class.
Wouldn't it be cleaner to do this initializing in the constructor for 
`SrcSafetyAnalysis`, rather than in this `run` method?

https://github.com/llvm/llvm-project/pull/134146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits


@@ -591,7 +591,9 @@ obscure_indirect_call_arg_nocfg:
 .globl  safe_lr_at_function_entry_nocfg
 .type   safe_lr_at_function_entry_nocfg,@function
 safe_lr_at_function_entry_nocfg:
-// CHECK-NOT: safe_lr_at_function_entry_nocfg
+// Due to state being reset after a label, paciasp is reported as
+// a signing oracle - this is a known false positive, ignore it.
+// CHECK-NOT: non-protected call{{.*}}safe_lr_at_function_entry_nocfg
 cbz x0, 1f
 ret// LR is safe at the start of the 
function
 1:

kbeyls wrote:

[Re: lines 
+594 to +600]
I'm wondering if this false positive pattern could end up appearing quite a few 
times in real code, specifically in code that has been shrink-wrap optimized?
Did you run this scanner on a larger code base? How many and what kind of false 
positives did you see?
See this comment 
inline on https://app.graphite.dev/github/pr/llvm/llvm-project/134146?utm_source=unchanged-line-comment";>Graphite.

https://github.com/llvm/llvm-project/pull/134146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits


@@ -355,6 +389,46 @@ class SrcSafetyAnalysis {
 return Regs;
   }
 
+  // Returns all registers made trusted by this instruction.
+  SmallVector getRegsMadeTrusted(const MCInst &Point,
+const SrcState &Cur) const {
+SmallVector Regs;
+const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+// An authenticated pointer can be checked, or
+MCPhysReg CheckedReg =
+BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false);
+if (CheckedReg != NoReg && Cur.SafeToDerefRegs[CheckedReg])
+  Regs.push_back(CheckedReg);
+
+if (CheckerSequenceInfo.contains(&Point)) {
+  MCPhysReg CheckedReg;
+  const MCInst *FirstCheckerInst;
+  std::tie(CheckedReg, FirstCheckerInst) = CheckerSequenceInfo.at(&Point);
+
+  // FirstCheckerInst should belong to the same basic block, meaning
+  // it was deterministically processed a few steps before this 
instruction.
+  const SrcState &StateBeforeChecker =
+  getStateBefore(*FirstCheckerInst).get();

kbeyls wrote:

This is a nitpick.
I was trying to get my head around whether it's guaranteed to get to a fixed 
point in a dataflow analysis, where next to using just the previous state on 
the current instruction, also a state on another instruction is used as input 
to compute the next state for the current instruction.
I'm assuming this is probably fine (probably, because as you write in the 
comment, this instruction should be in the same basic block).
I thought I'd just ask if you ended up checking this somewhere else and, if so, 
you'd happen to have a pointer to something stating that you can also take 
state from another instruction in the same basic block, and still be guaranteed 
to reach a fixed point in a dataflow analysis?

Orthogonal to that: would it be hard to add an assert here that 
`*FirstCheckerInst` is indeed in the same basic block?

https://github.com/llvm/llvm-project/pull/134146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits

https://github.com/kbeyls commented:

I finally managed to read through this patch end-to-end.
I only have 3 very nit-picky questions left.
This looks almost ready to merge.

https://github.com/llvm/llvm-project/pull/134146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits

https://github.com/kbeyls edited 
https://github.com/llvm/llvm-project/pull/134146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: use more appropriate types (NFC) (PR #135661)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits

https://github.com/kbeyls approved this pull request.


https://github.com/llvm/llvm-project/pull/135661
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: use more appropriate types (NFC) (PR #135661)

2025-04-25 Thread Kristof Beyls via llvm-branch-commits

https://github.com/kbeyls edited 
https://github.com/llvm/llvm-project/pull/135661
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic commented:

Looks like there are some polly assertion failures.

https://github.com/llvm/llvm-project/pull/137314
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/137314
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)

2025-04-25 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic commented:

The general approach here makes sense to me.

For reference, this is the diff for both PRs together, which is a bit clearer 
as the second undoes half of the first: 
https://github.com/llvm/llvm-project/compare/main...users/arsenm/ir/remove-constantdata-reference-counts

https://github.com/llvm/llvm-project/pull/137314
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of text records (PR #137235)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits

https://github.com/uweigand commented:

Why do we need all that "virtual section" stuff?   Wouldn't it suffice to exit 
early from ::writeText if the section length is zero?

https://github.com/llvm/llvm-project/pull/137235
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Add writing of text records (PR #137235)

2025-04-25 Thread Ulrich Weigand via llvm-branch-commits

uweigand wrote:

Also, the new tests seem to be failing in CI.

https://github.com/llvm/llvm-project/pull/137235
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)

2025-04-25 Thread Chris Apple via llvm-branch-commits

https://github.com/cjappl approved this pull request.


https://github.com/llvm/llvm-project/pull/137353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock (PR #137353)

2025-04-25 Thread via llvm-branch-commits

https://github.com/thetruestblue created 
https://github.com/llvm/llvm-project/pull/137353

This cherry pick updates the OS Spin Lock logic to fix to intercept both OS 
Spin Lock and os_nospin_lock which needs to be intercepted due to macro 
replacement of OSSpinLock within atomic_load compiler_rt call. 

This is causing a test to fail in release bot: 
https://green.lab.llvm.org/job/llvm.org/job/clang-stage1-RA-release-branch/205/testReport/

 rdar://145488759 

>From 2453cd24fd5fb9778b55155f77bf9d05a49539f9 Mon Sep 17 00:00:00 2001
From: davidtrevelyan 
Date: Thu, 13 Mar 2025 10:18:25 +
Subject: [PATCH 1/2] [rtsan][Apple] Add interceptor for _os_nospin_lock_lock
 (#131034)

Follows the discussion here:
https://github.com/llvm/llvm-project/pull/129309

Recently, the test
`TestRtsan.AccessingALargeAtomicVariableDiesWhenRealtime` has been
failing on newer MacOS versions, because the internal locking mechanism
in `std::atomic::load` (for types `T` that are larger than the
hardware lock-free limit), has changed to a function that wasn't being
intercepted by rtsan.

This PR introduces an interceptor for `_os_nospin_lock_lock`, which is
the new internal locking mechanism.

_Note: we'd probably do well to introduce interceptors for
`_os_nospin_lock_unlock` (and `os_unfair_lock_unlock`) too, which also
appear to have blocking implementations. This can follow in a separate
PR._

(cherry picked from commit 481a55a3d9645a6bc1540d326319b78ad8ed8db1)
---
 .../lib/rtsan/rtsan_interceptors_posix.cpp| 11 +++
 .../tests/rtsan_test_interceptors_posix.cpp   | 19 +++
 2 files changed, 30 insertions(+)

diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp 
b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 6816119065263..4d602a88ba9ae 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -30,6 +30,12 @@
 extern "C" {
 typedef int32_t OSSpinLock;
 void OSSpinLockLock(volatile OSSpinLock *__lock);
+// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but
+// it's an internal implementation detail of `os/lock.c` on Darwin, and
+// therefore not available in any headers. As a workaround, we forward declare
+// it here, which is enough to facilitate interception of _os_nospin_lock_lock.
+struct _os_nospin_lock_s;
+using _os_nospin_lock_t = _os_nospin_lock_s *;
 }
 #endif // TARGET_OS_MAC
 
@@ -642,6 +648,11 @@ INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t 
lock) {
   __rtsan_notify_intercepted_call("os_unfair_lock_lock");
   return REAL(os_unfair_lock_lock)(lock);
 }
+
+INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) {
+  __rtsan_notify_intercepted_call("_os_nospin_lock_lock");
+  return REAL(_os_nospin_lock_lock)(lock);
+}
 #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK  
\
   INTERCEPT_FUNCTION(os_unfair_lock_lock)
 #else
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp 
b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
index 59663776366bb..75f723081c4b6 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
@@ -1058,6 +1058,25 @@ TEST(TestRtsanInterceptors, 
OsUnfairLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "os_unfair_lock_lock");
   ExpectNonRealtimeSurvival(Func);
 }
+
+// We intercept _os_nospin_lock_lock because it's the internal
+// locking mechanism for MacOS's atomic implementation for data
+// types that are larger than the hardware's maximum lock-free size.
+// However, it's a private implementation detail and not visible in any 
headers,
+// so we must duplicate the required type definitions to forward declaration
+// what we need here.
+extern "C" {
+struct _os_nospin_lock_s {
+  unsigned int oul_value;
+};
+void _os_nospin_lock_lock(_os_nospin_lock_s *);
+}
+TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) {
+  _os_nospin_lock_s lock{};
+  auto Func = [&]() { _os_nospin_lock_lock(&lock); };
+  ExpectRealtimeDeath(Func, "_os_nospin_lock_lock");
+  ExpectNonRealtimeSurvival(Func);
+}
 #endif
 
 #if SANITIZER_LINUX

>From e2a5aa2526b06497869514287d26ea4732a14b95 Mon Sep 17 00:00:00 2001
From: thetruestblue 
Date: Fri, 18 Apr 2025 11:25:31 -0700
Subject: [PATCH 2/2] [RTSan][Darwin] Adjust OSSpinLock/_os_nospin_lock
 interceptor and tests (#132867)

These changes align with these lock types and allows builds and tests to
pass with various SDKS.

rdar://147067322
(cherry picked from commit 7cc4472037b43971bd3ee373fe75b5043f5abca9)
---
 .../lib/rtsan/rtsan_interceptors_posix.cpp| 37 +++--
 .../tests/rtsan_test_interceptors_posix.cpp   | 40 +--
 2 files changed, 32 insertions(+), 45 deletions(-)

diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp 
b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 4d602a88ba9ae..040f501ee52

[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock to release/20.x (PR #137353)

2025-04-25 Thread via llvm-branch-commits

https://github.com/thetruestblue edited 
https://github.com/llvm/llvm-project/pull/137353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)

2025-04-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (thetruestblue)


Changes

This cherry pick updates the OS Spin Lock logic to fix to intercept both OS 
Spin Lock and os_nospin_lock which needs to be intercepted due to macro 
replacement of OSSpinLock within atomic_load compiler_rt call. 

This is causing a test to fail in release bot: 
https://green.lab.llvm.org/job/llvm.org/job/clang-stage1-RA-release-branch/205/testReport/

 rdar://145488759 

---
Full diff: https://github.com/llvm/llvm-project/pull/137353.diff


2 Files Affected:

- (modified) compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp (+15-13) 
- (modified) compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp 
(+18-3) 


``diff
diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp 
b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 6816119065263..040f501ee52e9 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -21,18 +21,6 @@
 #include "rtsan/rtsan.h"
 
 #if SANITIZER_APPLE
-
-#if TARGET_OS_MAC
-// On MacOS OSSpinLockLock is deprecated and no longer present in the headers,
-// but the symbol still exists on the system. Forward declare here so we
-// don't get compilation errors.
-#include 
-extern "C" {
-typedef int32_t OSSpinLock;
-void OSSpinLockLock(volatile OSSpinLock *__lock);
-}
-#endif // TARGET_OS_MAC
-
 #include 
 #include 
 #endif // SANITIZER_APPLE
@@ -627,21 +615,35 @@ INTERCEPTOR(mode_t, umask, mode_t cmask) {
 #pragma clang diagnostic push
 // OSSpinLockLock is deprecated, but still in use in libc++
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#undef OSSpinLockLock
+
 INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) {
   __rtsan_notify_intercepted_call("OSSpinLockLock");
   return REAL(OSSpinLockLock)(lock);
 }
-#pragma clang diagnostic pop
+
 #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK INTERCEPT_FUNCTION(OSSpinLockLock)
 #else
 #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK
 #endif // SANITIZER_APPLE
 
+#if SANITIZER_APPLE
+// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro.
+typedef volatile OSSpinLock *_os_nospin_lock_t;
+
+INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) {
+  __rtsan_notify_intercepted_call("_os_nospin_lock_lock");
+  return REAL(_os_nospin_lock_lock)(lock);
+}
+#pragma clang diagnostic pop // "-Wdeprecated-declarations"
+#endif   // SANITIZER_APPLE
+
 #if SANITIZER_APPLE
 INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
   __rtsan_notify_intercepted_call("os_unfair_lock_lock");
   return REAL(os_unfair_lock_lock)(lock);
 }
+
 #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK  
\
   INTERCEPT_FUNCTION(os_unfair_lock_lock)
 #else
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp 
b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
index 59663776366bb..7eda884951c83 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
@@ -1036,10 +1036,18 @@ TEST(TestRtsanInterceptors, 
PthreadJoinDiesWhenRealtime) {
 }
 
 #if SANITIZER_APPLE
-
 #pragma clang diagnostic push
 // OSSpinLockLock is deprecated, but still in use in libc++
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#undef OSSpinLockLock
+extern "C" {
+typedef int32_t OSSpinLock;
+void OSSpinLockLock(volatile OSSpinLock *__lock);
+// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro.
+typedef volatile OSSpinLock *_os_nospin_lock_t;
+void _os_nospin_lock_lock(_os_nospin_lock_t lock);
+}
+
 TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) {
   auto Func = []() {
 OSSpinLock spin_lock{};
@@ -1048,7 +1056,14 @@ TEST(TestRtsanInterceptors, 
OsSpinLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "OSSpinLockLock");
   ExpectNonRealtimeSurvival(Func);
 }
-#pragma clang diagnostic pop
+
+TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) {
+  OSSpinLock lock{};
+  auto Func = [&]() { _os_nospin_lock_lock(&lock); };
+  ExpectRealtimeDeath(Func, "_os_nospin_lock_lock");
+  ExpectNonRealtimeSurvival(Func);
+}
+#pragma clang diagnostic pop //"-Wdeprecated-declarations"
 
 TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) {
   auto Func = []() {
@@ -1058,7 +1073,7 @@ TEST(TestRtsanInterceptors, 
OsUnfairLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "os_unfair_lock_lock");
   ExpectNonRealtimeSurvival(Func);
 }
-#endif
+#endif // SANITIZER_APPLE
 
 #if SANITIZER_LINUX
 TEST(TestRtsanInterceptors, SpinLockLockDiesWhenRealtime) {

``




https://github.com/llvm/llvm-project/pull/137353
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)

2025-04-25 Thread via llvm-branch-commits

https://github.com/joaosaffran edited 
https://github.com/llvm/llvm-project/pull/137284
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Bundle operands to specify denormal modes (PR #136501)

2025-04-25 Thread Serge Pavlov via llvm-branch-commits

https://github.com/spavloff updated 
https://github.com/llvm/llvm-project/pull/136501

>From 22742e24c1eef3ecc0fb4294dac9f42c9d160019 Mon Sep 17 00:00:00 2001
From: Serge Pavlov 
Date: Thu, 17 Apr 2025 18:42:15 +0700
Subject: [PATCH 1/3] Bundle operands to specify denormal modes

Two new operands are now supported in the "fp.control" operand bundle:

* "denorm.in=xxx"   - specifies the inpot denormal mode.
* "denorm.out=xxx"  - specifies the output denormal mode.

Here xxx must be one of the following values:

* "ieee"  - preserve denormals.
* "zero"  - flush to zero preserving sign.
* "pzero" - flush to positive zero.
* "dyn"   - mode is dynamically read from a control register.

These values align those permitted in the "denormal-fp-math" function
attribute.
---
 llvm/docs/LangRef.rst |  18 +-
 llvm/include/llvm/ADT/FloatingPointMode.h |  33 
 llvm/include/llvm/IR/InstrTypes.h |  21 +++
 llvm/lib/Analysis/ConstantFolding.cpp |  24 ++-
 llvm/lib/IR/Instructions.cpp  | 168 +-
 llvm/lib/IR/Verifier.cpp  |  14 ++
 .../constant-fold-fp-denormal-strict.ll   |  91 ++
 llvm/test/Verifier/fp-intrinsics.ll   |  36 
 8 files changed, 394 insertions(+), 11 deletions(-)
 create mode 100644 
llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal-strict.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8252971aa8a58..954f0e96b46f6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3084,7 +3084,10 @@ floating-point control modes and the treatment of status 
bits respectively.
 
 An operand bundle tagged with "fp.control" contains information about the
 control modes used for the operation execution. Operands specified in this
-bundle represent particular options. Currently, only rounding mode is 
supported.
+bundle represent particular options. The following modes are supported:
+
+* rounding mode,
+* denormal behavior.
 
 Rounding mode is represented by a metadata string value, which specifies the
 the mode used for the operation evaluation. Possible values are:
@@ -3103,6 +3106,19 @@ rounding rounding mode is taken from the control 
register (dynamic rounding).
 In the particular case of :ref:`default floating-point environment `,
 the operation uses rounding to nearest, ties to even.
 
+Denormal behavior defines whether denormal values are flushed to zero during
+the call's execution. This behavior is specified separately for input and
+output values. Such specification is a string, which starts with
+"denorm.in=" or "denorm.out=" respectively. The remainder of the string should
+be one of the values:
+
+::
+
+``"ieee"`` - preserve denormals,
+``"zero"`` - flush to +0.0 or -0.0 depending on value sign,
+``"pzero"`` - flush to +0.0,
+``"dyn"`` - concrete mode is read from some register.
+
 An operand bundle tagged with "fp.except" may be associated with operations
 that can read or write floating-point exception flags. It contains a single
 metadata string value, which can have one of the following values:
diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h 
b/llvm/include/llvm/ADT/FloatingPointMode.h
index 639d931ef88fe..5fceccfd1d0bf 100644
--- a/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -234,6 +234,39 @@ void DenormalMode::print(raw_ostream &OS) const {
   OS << denormalModeKindName(Output) << ',' << denormalModeKindName(Input);
 }
 
+/// If the specified string represents denormal mode as used in operand 
bundles,
+/// returns the corresponding mode.
+inline std::optional
+parseDenormalKindFromOperandBundle(StringRef Str) {
+  if (Str == "ieee")
+return DenormalMode::IEEE;
+  if (Str == "zero")
+return DenormalMode::PreserveSign;
+  if (Str == "pzero")
+return DenormalMode::PositiveZero;
+  if (Str == "dyn")
+return DenormalMode::Dynamic;
+  return std::nullopt;
+}
+
+/// Converts the specified denormal mode into string suitable for use in an
+/// operand bundle.
+inline std::optional
+printDenormalForOperandBundle(DenormalMode::DenormalModeKind Mode) {
+  switch (Mode) {
+  case DenormalMode::IEEE:
+return "ieee";
+  case DenormalMode::PreserveSign:
+return "zero";
+  case DenormalMode::PositiveZero:
+return "pzero";
+  case DenormalMode::Dynamic:
+return "dyn";
+  default:
+return std::nullopt;
+  }
+}
+
 /// Floating-point class tests, supported by 'is_fpclass' intrinsic. Actual
 /// test may be an OR combination of basic tests.
 enum FPClassTest : unsigned {
diff --git a/llvm/include/llvm/IR/InstrTypes.h 
b/llvm/include/llvm/IR/InstrTypes.h
index 8425243e5efe9..8492c911ffc6a 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1092,12 +1092,24 @@ template  class OperandBundleDefT {
 using OperandBundleDef = OperandBundleDefT;
 using ConstOperandBundleDef = OperandBundleDefT;
 
+s

[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)

2025-04-25 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic created 
https://github.com/llvm/llvm-project/pull/137381

Noting:
- Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would 
naturally seem an appropriate place to place.

- Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't 
introduce a dependency of `clangSema` onto `clangParse`.

- Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing 
from source and `clangFrontend` when we are parsing as a command line argument.

- Therefore, we are required to move this out of `clangParse` so that 
`clangSema` can reference it.

This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be 
linked to all its dependencies (`clangFrontend` already depends on `clangSema`)

>From 92d1d83b3fbb43523819b5be338922ab614d2054 Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 25 Apr 2025 18:21:36 +
Subject: [PATCH] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser`
 into clangSema

Noting:
- Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it
would naturally seem an appropriate place to place.

- Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't
introduce a dependency of `clangSema` onto `clangParse`.

- Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when
parsing from source and `clangFrontend` when we are parsing as a
command line argument.

- Therefore, we are required to move this out of `clangParse` so that
`clangSema` can reference it.

This commit moves `HLSLRootSignatureParser` into `clangSema` so it can
be linked to all its dependencies (`clangFrontend` already depends on
`clangSema`)
---
 .../{Parse => Sema}/ParseHLSLRootSignature.h  |  0
 clang/lib/Parse/CMakeLists.txt|  1 -
 clang/lib/Sema/CMakeLists.txt |  1 +
 .../ParseHLSLRootSignature.cpp|  2 +-
 clang/unittests/CMakeLists.txt|  1 -
 clang/unittests/Parse/CMakeLists.txt  | 20 ---
 clang/unittests/Sema/CMakeLists.txt   |  2 ++
 .../ParseHLSLRootSignatureTest.cpp|  2 +-
 8 files changed, 5 insertions(+), 24 deletions(-)
 rename clang/include/clang/{Parse => Sema}/ParseHLSLRootSignature.h (100%)
 rename clang/lib/{Parse => Sema}/ParseHLSLRootSignature.cpp (99%)
 delete mode 100644 clang/unittests/Parse/CMakeLists.txt
 rename clang/unittests/{Parse => Sema}/ParseHLSLRootSignatureTest.cpp (99%)

diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Sema/ParseHLSLRootSignature.h
similarity index 100%
rename from clang/include/clang/Parse/ParseHLSLRootSignature.h
rename to clang/include/clang/Sema/ParseHLSLRootSignature.h
diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt
index 00fde537bb9c6..22e902f7e1bc5 100644
--- a/clang/lib/Parse/CMakeLists.txt
+++ b/clang/lib/Parse/CMakeLists.txt
@@ -14,7 +14,6 @@ add_clang_library(clangParse
   ParseExpr.cpp
   ParseExprCXX.cpp
   ParseHLSL.cpp
-  ParseHLSLRootSignature.cpp
   ParseInit.cpp
   ParseObjc.cpp
   ParseOpenMP.cpp
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 4b87004e4b8ea..9ca4b8e6ab96b 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -26,6 +26,7 @@ add_clang_library(clangSema
   JumpDiagnostics.cpp
   MultiplexExternalSemaSource.cpp
   ParsedAttr.cpp
+  ParseHLSLRootSignature.cpp
   Scope.cpp
   ScopeInfo.cpp
   Sema.cpp
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Sema/ParseHLSLRootSignature.cpp
similarity index 99%
rename from clang/lib/Parse/ParseHLSLRootSignature.cpp
rename to clang/lib/Sema/ParseHLSLRootSignature.cpp
index 042aedbf1af52..87b5022cb0abe 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp
@@ -6,7 +6,7 @@
 //
 
//===--===//
 
-#include "clang/Parse/ParseHLSLRootSignature.h"
+#include "clang/Sema/ParseHLSLRootSignature.h"
 
 #include "clang/Lex/LiteralSupport.h"
 
diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt
index f3823ba309420..580533a97d700 100644
--- a/clang/unittests/CMakeLists.txt
+++ b/clang/unittests/CMakeLists.txt
@@ -49,7 +49,6 @@ endfunction()
 
 add_subdirectory(Basic)
 add_subdirectory(Lex)
-add_subdirectory(Parse)
 add_subdirectory(Driver)
 if(CLANG_ENABLE_STATIC_ANALYZER)
   add_subdirectory(Analysis)
diff --git a/clang/unittests/Parse/CMakeLists.txt 
b/clang/unittests/Parse/CMakeLists.txt
deleted file mode 100644
index 2a31be625042e..0
--- a/clang/unittests/Parse/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  Support
-  )
-add_clang_unittest(ParseTests
-  ParseHLSLRootSignatureTest.cpp
-  )
-clang_target_link_libraries(ParseTests
-  PRIVATE
-  clangAST
-  clangBasic
-  clangLex
-  clangParse
-  clangSema
-  )
-target_link_libraries(ParseTests
-  PRIVATE
-  LLVMTest

[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into `clangSema` (PR #137381)

2025-04-25 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/137381
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)

2025-04-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Finn Plummer (inbelic)


Changes

Noting:
- Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would 
naturally seem an appropriate place to place.

- Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't 
introduce a dependency of `clangSema` onto `clangParse`.

- Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing 
from source and `clangFrontend` when we are parsing as a command line argument.

- Therefore, we are required to move this out of `clangParse` so that 
`clangSema` can reference it.

This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be 
linked to all its dependencies (`clangFrontend` already depends on `clangSema`)

---
Full diff: https://github.com/llvm/llvm-project/pull/137381.diff


8 Files Affected:

- (renamed) clang/include/clang/Sema/ParseHLSLRootSignature.h () 
- (modified) clang/lib/Parse/CMakeLists.txt (-1) 
- (modified) clang/lib/Sema/CMakeLists.txt (+1) 
- (renamed) clang/lib/Sema/ParseHLSLRootSignature.cpp (+1-1) 
- (modified) clang/unittests/CMakeLists.txt (-1) 
- (removed) clang/unittests/Parse/CMakeLists.txt (-20) 
- (modified) clang/unittests/Sema/CMakeLists.txt (+2) 
- (renamed) clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp (+1-1) 


``diff
diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Sema/ParseHLSLRootSignature.h
similarity index 100%
rename from clang/include/clang/Parse/ParseHLSLRootSignature.h
rename to clang/include/clang/Sema/ParseHLSLRootSignature.h
diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt
index 00fde537bb9c6..22e902f7e1bc5 100644
--- a/clang/lib/Parse/CMakeLists.txt
+++ b/clang/lib/Parse/CMakeLists.txt
@@ -14,7 +14,6 @@ add_clang_library(clangParse
   ParseExpr.cpp
   ParseExprCXX.cpp
   ParseHLSL.cpp
-  ParseHLSLRootSignature.cpp
   ParseInit.cpp
   ParseObjc.cpp
   ParseOpenMP.cpp
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 4b87004e4b8ea..9ca4b8e6ab96b 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -26,6 +26,7 @@ add_clang_library(clangSema
   JumpDiagnostics.cpp
   MultiplexExternalSemaSource.cpp
   ParsedAttr.cpp
+  ParseHLSLRootSignature.cpp
   Scope.cpp
   ScopeInfo.cpp
   Sema.cpp
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Sema/ParseHLSLRootSignature.cpp
similarity index 99%
rename from clang/lib/Parse/ParseHLSLRootSignature.cpp
rename to clang/lib/Sema/ParseHLSLRootSignature.cpp
index 042aedbf1af52..87b5022cb0abe 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp
@@ -6,7 +6,7 @@
 //
 
//===--===//
 
-#include "clang/Parse/ParseHLSLRootSignature.h"
+#include "clang/Sema/ParseHLSLRootSignature.h"
 
 #include "clang/Lex/LiteralSupport.h"
 
diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt
index f3823ba309420..580533a97d700 100644
--- a/clang/unittests/CMakeLists.txt
+++ b/clang/unittests/CMakeLists.txt
@@ -49,7 +49,6 @@ endfunction()
 
 add_subdirectory(Basic)
 add_subdirectory(Lex)
-add_subdirectory(Parse)
 add_subdirectory(Driver)
 if(CLANG_ENABLE_STATIC_ANALYZER)
   add_subdirectory(Analysis)
diff --git a/clang/unittests/Parse/CMakeLists.txt 
b/clang/unittests/Parse/CMakeLists.txt
deleted file mode 100644
index 2a31be625042e..0
--- a/clang/unittests/Parse/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  Support
-  )
-add_clang_unittest(ParseTests
-  ParseHLSLRootSignatureTest.cpp
-  )
-clang_target_link_libraries(ParseTests
-  PRIVATE
-  clangAST
-  clangBasic
-  clangLex
-  clangParse
-  clangSema
-  )
-target_link_libraries(ParseTests
-  PRIVATE
-  LLVMTestingAnnotations
-  LLVMTestingSupport
-  clangTesting
-  )
diff --git a/clang/unittests/Sema/CMakeLists.txt 
b/clang/unittests/Sema/CMakeLists.txt
index acc76c932afeb..a5c6b44926460 100644
--- a/clang/unittests/Sema/CMakeLists.txt
+++ b/clang/unittests/Sema/CMakeLists.txt
@@ -3,6 +3,7 @@ add_clang_unittest(SemaTests
   CodeCompleteTest.cpp
   HeuristicResolverTest.cpp
   GslOwnerPointerInference.cpp
+  ParseHLSLRootSignatureTest.cpp
   SemaLookupTest.cpp
   SemaNoloadLookupTest.cpp
   CLANG_LIBS
@@ -10,6 +11,7 @@ add_clang_unittest(SemaTests
   clangASTMatchers
   clangBasic
   clangFrontend
+  clangLex
   clangParse
   clangSema
   clangSerialization
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp 
b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
similarity index 99%
rename from clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
rename to clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
index 2d4e37463bef3..76c437689c4d3 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
@@ -21,7 +21,7 @@
 #include "c

[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)

2025-04-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-hlsl

Author: Finn Plummer (inbelic)


Changes

Noting:
- Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would 
naturally seem an appropriate place to place.

- Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't 
introduce a dependency of `clangSema` onto `clangParse`.

- Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing 
from source and `clangFrontend` when we are parsing as a command line argument.

- Therefore, we are required to move this out of `clangParse` so that 
`clangSema` can reference it.

This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be 
linked to all its dependencies (`clangFrontend` already depends on `clangSema`)

---
Full diff: https://github.com/llvm/llvm-project/pull/137381.diff


8 Files Affected:

- (renamed) clang/include/clang/Sema/ParseHLSLRootSignature.h () 
- (modified) clang/lib/Parse/CMakeLists.txt (-1) 
- (modified) clang/lib/Sema/CMakeLists.txt (+1) 
- (renamed) clang/lib/Sema/ParseHLSLRootSignature.cpp (+1-1) 
- (modified) clang/unittests/CMakeLists.txt (-1) 
- (removed) clang/unittests/Parse/CMakeLists.txt (-20) 
- (modified) clang/unittests/Sema/CMakeLists.txt (+2) 
- (renamed) clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp (+1-1) 


``diff
diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h 
b/clang/include/clang/Sema/ParseHLSLRootSignature.h
similarity index 100%
rename from clang/include/clang/Parse/ParseHLSLRootSignature.h
rename to clang/include/clang/Sema/ParseHLSLRootSignature.h
diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt
index 00fde537bb9c6..22e902f7e1bc5 100644
--- a/clang/lib/Parse/CMakeLists.txt
+++ b/clang/lib/Parse/CMakeLists.txt
@@ -14,7 +14,6 @@ add_clang_library(clangParse
   ParseExpr.cpp
   ParseExprCXX.cpp
   ParseHLSL.cpp
-  ParseHLSLRootSignature.cpp
   ParseInit.cpp
   ParseObjc.cpp
   ParseOpenMP.cpp
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index 4b87004e4b8ea..9ca4b8e6ab96b 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -26,6 +26,7 @@ add_clang_library(clangSema
   JumpDiagnostics.cpp
   MultiplexExternalSemaSource.cpp
   ParsedAttr.cpp
+  ParseHLSLRootSignature.cpp
   Scope.cpp
   ScopeInfo.cpp
   Sema.cpp
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp 
b/clang/lib/Sema/ParseHLSLRootSignature.cpp
similarity index 99%
rename from clang/lib/Parse/ParseHLSLRootSignature.cpp
rename to clang/lib/Sema/ParseHLSLRootSignature.cpp
index 042aedbf1af52..87b5022cb0abe 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp
@@ -6,7 +6,7 @@
 //
 
//===--===//
 
-#include "clang/Parse/ParseHLSLRootSignature.h"
+#include "clang/Sema/ParseHLSLRootSignature.h"
 
 #include "clang/Lex/LiteralSupport.h"
 
diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt
index f3823ba309420..580533a97d700 100644
--- a/clang/unittests/CMakeLists.txt
+++ b/clang/unittests/CMakeLists.txt
@@ -49,7 +49,6 @@ endfunction()
 
 add_subdirectory(Basic)
 add_subdirectory(Lex)
-add_subdirectory(Parse)
 add_subdirectory(Driver)
 if(CLANG_ENABLE_STATIC_ANALYZER)
   add_subdirectory(Analysis)
diff --git a/clang/unittests/Parse/CMakeLists.txt 
b/clang/unittests/Parse/CMakeLists.txt
deleted file mode 100644
index 2a31be625042e..0
--- a/clang/unittests/Parse/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  Support
-  )
-add_clang_unittest(ParseTests
-  ParseHLSLRootSignatureTest.cpp
-  )
-clang_target_link_libraries(ParseTests
-  PRIVATE
-  clangAST
-  clangBasic
-  clangLex
-  clangParse
-  clangSema
-  )
-target_link_libraries(ParseTests
-  PRIVATE
-  LLVMTestingAnnotations
-  LLVMTestingSupport
-  clangTesting
-  )
diff --git a/clang/unittests/Sema/CMakeLists.txt 
b/clang/unittests/Sema/CMakeLists.txt
index acc76c932afeb..a5c6b44926460 100644
--- a/clang/unittests/Sema/CMakeLists.txt
+++ b/clang/unittests/Sema/CMakeLists.txt
@@ -3,6 +3,7 @@ add_clang_unittest(SemaTests
   CodeCompleteTest.cpp
   HeuristicResolverTest.cpp
   GslOwnerPointerInference.cpp
+  ParseHLSLRootSignatureTest.cpp
   SemaLookupTest.cpp
   SemaNoloadLookupTest.cpp
   CLANG_LIBS
@@ -10,6 +11,7 @@ add_clang_unittest(SemaTests
   clangASTMatchers
   clangBasic
   clangFrontend
+  clangLex
   clangParse
   clangSema
   clangSerialization
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp 
b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
similarity index 99%
rename from clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
rename to clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
index 2d4e37463bef3..76c437689c4d3 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp
@@ -21,7 +21,7 @@
 #include "cl

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of remaining Descriptor Table params (PR #137038)

2025-04-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner approved this pull request.


https://github.com/llvm/llvm-project/pull/137038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into `clangSema` (PR #137381)

2025-04-25 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic closed 
https://github.com/llvm/llvm-project/pull/137381
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The elements are
placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that its concat_vectors can be
mapped to a BUILD_VECTOR and so unused elements are no longer
referenced.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 .../SelectionDAG/SelectionDAGBuilder.cpp  |   6 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  29 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++
 6 files changed, 69 insertions(+), 187 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c183149b0863a..6ae1d019cad28 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1840,7 +1840,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2278,7 +2278,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9da2ba04f77cb..545da0a1fbfab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12264,7 +12264,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12957,17 +12957,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->get

[llvm-branch-commits] [llvm] release/20.x: [LoongArch] Don't crash on instruction prefetch intrinsics (#135760) (PR #135923)

2025-04-25 Thread via llvm-branch-commits

github-actions[bot] wrote:

@leecheechen (or anyone else). If you would like to add a note about this fix 
in the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/135923
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AutoUpgrade][AMDGPU] Adjust AS7 address width to 48 bits (PR #137418)

2025-04-25 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff c91038c28ff91461b8b8e9e56a56e771ce39fb7a 
bafdd8b6606c79e63a25e8c80f28abd761a32f85 --extensions c,cpp -- 
clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGen/target-data.c 
llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 6ce66e6465..4f5b48547d 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,7 +33,8 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-
"-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
+"-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
+"v32:"
 "32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
 "-ni:7:8:9";
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b16a9ef0cd..63c76b3be3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -677,7 +677,8 @@ static StringRef computeDataLayout(const Triple &TT) {
   // space 8) which cannot be non-trivilally accessed by LLVM memory operations
   // like getelementptr.
   return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
- 
"-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
+ 
"-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:"
+ "32-"
  "v32:32-v48:64-v96:"
  "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
  "G1-ni:7:8:9";
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp 
b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index a50e523379..cbe82e3f48 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -41,12 +41,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   // Check that AMDGPU targets add -G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1");
   // and that ANDGCN adds p7 and p8 as well.
-  EXPECT_EQ(
-  UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
-  
"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-  UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
-  
"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
+"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:"
+"256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
+"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:"
+"256:32");
   // but that r600 does not.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1");
 
@@ -60,7 +60,8 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   "amdgcn"),
   "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-"
   "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:"
-  
"1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-"
+  "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:"
+  "128-"
   "p9:192:256:256:32");
 
   // Check that RISCV64 upgrades -n64 to -n32:64.
@@ -144,15 +145,15 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
   // Check that AMDGPU targets don't add -G1 if there is already a -G flag.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2");
   EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2");
-  EXPECT_EQ(
-  UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
-  
"e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-  UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
-  
"G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-  UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"),
-  
"e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
+"e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:"
+"256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
+"G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:"
+"256:32");
+  EXPECT_EQ(Up

[llvm-branch-commits] [clang] 62072e7 - [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc()

2025-04-25 Thread Tom Stellard via llvm-branch-commits

Author: Nathan Ridge
Date: 2025-04-25T16:29:08-07:00
New Revision: 62072e7f877e444f386ea78dce3581904bdb727d

URL: 
https://github.com/llvm/llvm-project/commit/62072e7f877e444f386ea78dce3581904bdb727d
DIFF: 
https://github.com/llvm/llvm-project/commit/62072e7f877e444f386ea78dce3581904bdb727d.diff

LOG: [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc()

Added: 


Modified: 
clang/lib/AST/Expr.cpp
clang/test/SemaCXX/cxx2b-deducing-this.cpp

Removed: 




diff  --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 8571b617c70eb..a5b7ef8c4271b 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1665,8 +1665,11 @@ SourceLocation CallExpr::getBeginLoc() const {
 Method && Method->isExplicitObjectMemberFunction()) {
   bool HasFirstArg = getNumArgs() > 0 && getArg(0);
   assert(HasFirstArg);
-  if (HasFirstArg)
-return getArg(0)->getBeginLoc();
+  if (HasFirstArg) {
+if (auto FirstArgLoc = getArg(0)->getBeginLoc(); 
FirstArgLoc.isValid()) {
+  return FirstArgLoc;
+}
+  }
 }
   }
 

diff  --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp 
b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
index 6f17ce7275456..7e392213710a4 100644
--- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp
+++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
@@ -1134,3 +1134,10 @@ struct S {
 static_assert((S{} << 11) == a);
 // expected-error@-1 {{use of undeclared identifier 'a'}}
 }
+
+namespace GH135522 {
+struct S {
+  auto f(this auto) -> S;
+  bool g() { return f(); } // expected-error {{no viable conversion from 
returned value of type 'S' to function return type 'bool'}}
+};
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc() (PR #135927)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/135927

>From 62072e7f877e444f386ea78dce3581904bdb727d Mon Sep 17 00:00:00 2001
From: Nathan Ridge 
Date: Tue, 15 Apr 2025 03:40:37 -0400
Subject: [PATCH] [clang][AST] Handle implicit first argument in
 CallExpr::getBeginLoc()

---
 clang/lib/AST/Expr.cpp | 7 +--
 clang/test/SemaCXX/cxx2b-deducing-this.cpp | 7 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 8571b617c70eb..a5b7ef8c4271b 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1665,8 +1665,11 @@ SourceLocation CallExpr::getBeginLoc() const {
 Method && Method->isExplicitObjectMemberFunction()) {
   bool HasFirstArg = getNumArgs() > 0 && getArg(0);
   assert(HasFirstArg);
-  if (HasFirstArg)
-return getArg(0)->getBeginLoc();
+  if (HasFirstArg) {
+if (auto FirstArgLoc = getArg(0)->getBeginLoc(); 
FirstArgLoc.isValid()) {
+  return FirstArgLoc;
+}
+  }
 }
   }
 
diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp 
b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
index 6f17ce7275456..7e392213710a4 100644
--- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp
+++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
@@ -1134,3 +1134,10 @@ struct S {
 static_assert((S{} << 11) == a);
 // expected-error@-1 {{use of undeclared identifier 'a'}}
 }
+
+namespace GH135522 {
+struct S {
+  auto f(this auto) -> S;
+  bool g() { return f(); } // expected-error {{no viable conversion from 
returned value of type 'S' to function return type 'bool'}}
+};
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [RISCV] Handle scalarized reductions in getArithmeticReductionCost (PR #136688)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/136688
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)

2025-04-25 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/136808

>From 182e8b7f8a710f5a08bd329d1ab299ad1709cafb Mon Sep 17 00:00:00 2001
From: Owen Pan 
Date: Tue, 22 Apr 2025 21:08:56 -0700
Subject: [PATCH] [clang-format] Correctly annotate kw_operator in using decls
 (#136545)

Fix #136541

(cherry picked from commit 037657de7e5ccd4a37054829874a209b82fb8be7)
---
 clang/lib/Format/TokenAnnotator.cpp   | 6 --
 clang/unittests/Format/TokenAnnotatorTest.cpp | 5 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index 44580d8624684..11b941c5a0411 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3961,8 +3961,10 @@ void 
TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
   FormatToken *AfterLastAttribute = nullptr;
   FormatToken *ClosingParen = nullptr;
 
-  for (auto *Tok = FirstNonComment ? FirstNonComment->Next : nullptr; Tok;
-   Tok = Tok->Next) {
+  for (auto *Tok = FirstNonComment && FirstNonComment->isNot(tok::kw_using)
+   ? FirstNonComment->Next
+   : nullptr;
+   Tok; Tok = Tok->Next) {
 if (Tok->is(TT_StartOfName))
   SeenName = true;
 if (Tok->Previous->EndsCppAttributeGroup)
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp 
b/clang/unittests/Format/TokenAnnotatorTest.cpp
index b7b8a21b726b6..757db66c3e298 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1073,6 +1073,11 @@ TEST_F(TokenAnnotatorTest, 
UnderstandsOverloadedOperators) {
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::identifier, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen);
+
+  Tokens = annotate("using std::operator==;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  // Not TT_FunctionDeclarationName.
+  EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 182e8b7 - [clang-format] Correctly annotate kw_operator in using decls (#136545)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

Author: Owen Pan
Date: 2025-04-25T16:49:42-07:00
New Revision: 182e8b7f8a710f5a08bd329d1ab299ad1709cafb

URL: 
https://github.com/llvm/llvm-project/commit/182e8b7f8a710f5a08bd329d1ab299ad1709cafb
DIFF: 
https://github.com/llvm/llvm-project/commit/182e8b7f8a710f5a08bd329d1ab299ad1709cafb.diff

LOG: [clang-format] Correctly annotate kw_operator in using decls (#136545)

Fix #136541

(cherry picked from commit 037657de7e5ccd4a37054829874a209b82fb8be7)

Added: 


Modified: 
clang/lib/Format/TokenAnnotator.cpp
clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 




diff  --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index 44580d8624684..11b941c5a0411 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3961,8 +3961,10 @@ void 
TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
   FormatToken *AfterLastAttribute = nullptr;
   FormatToken *ClosingParen = nullptr;
 
-  for (auto *Tok = FirstNonComment ? FirstNonComment->Next : nullptr; Tok;
-   Tok = Tok->Next) {
+  for (auto *Tok = FirstNonComment && FirstNonComment->isNot(tok::kw_using)
+   ? FirstNonComment->Next
+   : nullptr;
+   Tok; Tok = Tok->Next) {
 if (Tok->is(TT_StartOfName))
   SeenName = true;
 if (Tok->Previous->EndsCppAttributeGroup)

diff  --git a/clang/unittests/Format/TokenAnnotatorTest.cpp 
b/clang/unittests/Format/TokenAnnotatorTest.cpp
index b7b8a21b726b6..757db66c3e298 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1073,6 +1073,11 @@ TEST_F(TokenAnnotatorTest, 
UnderstandsOverloadedOperators) {
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::identifier, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen);
+
+  Tokens = annotate("using std::operator==;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  // Not TT_FunctionDeclarationName.
+  EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Extend mingw workarounds for armv7/aarch64 too (#136419) (PR #136752)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

tstellar wrote:

Are these legitimate test failures?

https://github.com/llvm/llvm-project/pull/136752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/136808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)

2025-04-25 Thread via llvm-branch-commits

github-actions[bot] wrote:

@owenca (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/136808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AutoUpgrade][AMDGPU] Adjust AS7 address width to 48 bits (PR #137418)

2025-04-25 Thread Krzysztof Drewniak via llvm-branch-commits

krzysz00 wrote:

You'll want to get p9 as well, and _maybe_ p8 - though p8 is a very weird kind 
of "pointer" that's just the resource and can't - for example - be GEP'd

https://github.com/llvm/llvm-project/pull/137418
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 425d1aa - [RISCV] Handle scalarized reductions in getArithmeticReductionCost

2025-04-25 Thread Tom Stellard via llvm-branch-commits

Author: Luke Lau
Date: 2025-04-25T16:46:30-07:00
New Revision: 425d1aad294f1132ed90d79ff51320ac2dfcb72d

URL: 
https://github.com/llvm/llvm-project/commit/425d1aad294f1132ed90d79ff51320ac2dfcb72d
DIFF: 
https://github.com/llvm/llvm-project/commit/425d1aad294f1132ed90d79ff51320ac2dfcb72d.diff

LOG: [RISCV] Handle scalarized reductions in getArithmeticReductionCost

This fixes a crash reported at
https://github.com/llvm/llvm-project/pull/114250#issuecomment-2813686061

If the vector type isn't legal at all, e.g. bfloat with +zvfbfmin,
then the legalized type will be scalarized. So use getScalarType()
instead of getVectorElement() when checking for f16/bf16.

(cherry picked from commit 053451cb3502144564b4d0b30a9046045d1820d4)

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index add82dc80c429..8f1094413a756 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1658,9 +1658,8 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, 
VectorType *Ty,
 break;
   case ISD::FADD:
 // We can't promote f16/bf16 fadd reductions.
-if ((LT.second.getVectorElementType() == MVT::f16 &&
- !ST->hasVInstructionsF16()) ||
-LT.second.getVectorElementType() == MVT::bf16)
+if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) 
||
+LT.second.getScalarType() == MVT::bf16)
   return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
 if (TTI::requiresOrderedReduction(FMF)) {
   Opcodes.push_back(RISCV::VFMV_S_F);

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll 
b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 1762f701a9b2d..71685b4acc822 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,25 +1,60 @@
 ; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=throughput 2>&1 -disable-output | 
FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=throughput 2>&1 -disable-output | 
FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print" 
-cost-kind=throughput 2>&1 -disable-output | FileCheck %s 
--check-prefixes=FP-REDUCE,FP-REDUCE-NO-ZFHMIN-NO-ZFBFMIN
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin 
-passes="print" -cost-kind=code-size 2>&1 -disable-output | 
FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fadd_bfloat() {
-; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR, <1 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR, <2 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: 
%V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR, <4 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: 
%V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR, <8 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: 
%V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR, <16 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: 
%v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR, <32 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: 
%V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR, <64 x 
bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: 
%V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR, <128 
x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast 
bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR,  
undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast 
bfloat @llvm.vect

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)

2025-04-25 Thread Matt Arsenault via llvm-branch-commits


@@ -1421,6 +1424,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {

arsenm wrote:

Bump 

https://github.com/llvm/llvm-project/pull/120640
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: refactor issue reporting (PR #135662)

2025-04-25 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko edited 
https://github.com/llvm/llvm-project/pull/135662
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)

2025-04-25 Thread via llvm-branch-commits

https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120386

>From 47d8c3acda357d0ac2d8cb067f78442fac5e3fda Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:38:23 -0500
Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG

When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.

commit-id:f9d761c5
---
 llvm/lib/Target/X86/X86ISelLowering.cpp|  4 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0fc50dc1a87b6..d604db17cb8cf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2650,6 +2650,10 @@ X86TargetLowering::X86TargetLowering(const 
X86TargetMachine &TM,
 setOperationAction(Op, MVT::f32, Promote);
   }
 
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d23cfb89f9fc8..6efcbb80c0ce6 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   %ret = load atomic <1 x i64>, ptr %x acquire, align 8
   ret <1 x i64> %ret
 }
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; CHECK0-NEXT:## implicit-def: $xmm0
+; CHECK0-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x half>, ptr %x acquire, align 2
+  ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x float>, ptr %x acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_double_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 8
+  ret <1 x double> %ret
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 78f6719 - [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801)

2025-04-25 Thread Tom Stellard via llvm-branch-commits

Author: Nikita Popov
Date: 2025-04-25T16:19:10-07:00
New Revision: 78f6719ca9b515d416ec67430466d896305d7b81

URL: 
https://github.com/llvm/llvm-project/commit/78f6719ca9b515d416ec67430466d896305d7b81
DIFF: 
https://github.com/llvm/llvm-project/commit/78f6719ca9b515d416ec67430466d896305d7b81.diff

LOG: [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801)

The llvm.metadata section is not emitted and has special semantics. We
should not merge globals in it, similarly to how we already skip merging
of `llvm.xyz` globals.

Fixes https://github.com/llvm/llvm-project/issues/131394.

(cherry picked from commit 9356091a98c24718572f99b51553838ed664b67a)

Added: 
llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll

Modified: 
llvm/lib/CodeGen/GlobalMerge.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/GlobalMerge.cpp 
b/llvm/lib/CodeGen/GlobalMerge.cpp
index 5993fc939a08a..b4650a4851c3c 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -711,7 +711,8 @@ bool GlobalMergeImpl::run(Module &M) {
   continue;
 
 // Ignore all 'special' globals.
-if (GV.getName().starts_with("llvm.") || 
GV.getName().starts_with(".llvm."))
+if (GV.getName().starts_with("llvm.") ||
+GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
   continue;
 
 // Ignore all "required" globals:

diff  --git a/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll 
b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll
new file mode 100644
index 0..7db092e13afeb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+@index = global i32 0, align 4
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, section 
"llvm.metadata"
+@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section 
"llvm.metadata" 
+@llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] 
[{ ptr, ptr, ptr, i32, ptr } { ptr @index, ptr @.str, ptr @.str.1, i32 1, ptr 
null }], section "llvm.metadata"
+
+; CHECK-NOT: .set
+; CHECK-NOT: _MergedGlobals



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801) (PR #134052)

2025-04-25 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/134052

>From 78f6719ca9b515d416ec67430466d896305d7b81 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Wed, 2 Apr 2025 16:40:53 +0800
Subject: [PATCH] [GlobalMerge][PPC] Don't merge globals in llvm.metadata
 section (#131801)

The llvm.metadata section is not emitted and has special semantics. We
should not merge globals in it, similarly to how we already skip merging
of `llvm.xyz` globals.

Fixes https://github.com/llvm/llvm-project/issues/131394.

(cherry picked from commit 9356091a98c24718572f99b51553838ed664b67a)
---
 llvm/lib/CodeGen/GlobalMerge.cpp| 3 ++-
 llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll | 9 +
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll

diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 5993fc939a08a..b4650a4851c3c 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -711,7 +711,8 @@ bool GlobalMergeImpl::run(Module &M) {
   continue;
 
 // Ignore all 'special' globals.
-if (GV.getName().starts_with("llvm.") || 
GV.getName().starts_with(".llvm."))
+if (GV.getName().starts_with("llvm.") ||
+GV.getName().starts_with(".llvm.") || Section == "llvm.metadata")
   continue;
 
 // Ignore all "required" globals:
diff --git a/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll 
b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll
new file mode 100644
index 0..7db092e13afeb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+@index = global i32 0, align 4
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, section 
"llvm.metadata"
+@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section 
"llvm.metadata" 
+@llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] 
[{ ptr, ptr, ptr, i32, ptr } { ptr @index, ptr @.str, ptr @.str.1, i32 1, ptr 
null }], section "llvm.metadata"
+
+; CHECK-NOT: .set
+; CHECK-NOT: _MergedGlobals

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >