[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From d327d3edddf4a9a770cb8cfb4bbb35d0c3cc3de7 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +- llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a3e9700fa3089..e84d25afe620d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + if (I->getType()->getScalarType()->isPointerTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +unsigned AS = + cast(I->getType()->getScalarType())->getAddressSpace(); +ElementCount EC = cast(I->getType())->getElementCount(); +Value *BC = Builder.CreateBitCast( +Result, +VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), +EC)); +Value *IntToPtr = Builder.CreateIntToPtr( +BC, VectorType::get(PointerType::get(Ctx, AS), EC)); +V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LAB
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From bca16c0850eb48e8eedaf04ac744e1d00798438e Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 02dd78717b392f9fbb3b9d436e58183898dd70e9 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0fc50dc1a87b6..d604db17cb8cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2650,6 +2650,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 4b9e4d34d274373d09fbea29e466e603409c864f Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54da9fe3c6a40..c6cb334e0aa9a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -459,6 +462,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), + N->getBasePtr(), N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_vec
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 7565845a3dd33fc1df7a771b165fbf095145f9d6 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG][X86] Split via Concat vector types for atomic load Vector types that aren't widened are 'split' via CONCAT_VECTORS so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 33 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 205 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..7905f5a94c579 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 328859738cae0..983958703be51 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1423,6 +1426,36 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + SDLoc dl(LD); + + EVT MemoryVT = LD->getMemoryVT(); + unsigned NumElts = MemoryVT.getVectorMinNumElements(); + + EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); + EVT ElemVT = + EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1); + + // Create a single atomic to load all the elements at once. + SDValue Atomic = + DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, IntMemoryVT, IntMemoryVT, +LD->getChain(), LD->getBasePtr(), +LD->getMemOperand()); + + // Instead of splitting, put all the elements back into a vector. + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) { +SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic, + DAG.getVectorIdxConstant(i, dl)); +Elt = DAG.getBitcast(ElemVT, Elt); +Ops.push_back(Elt); + } + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops); + + ReplaceValueWith(SDValue(LD, 0), Concat); + ReplaceValueWith(SDValue(LD, 1), LD->getChain()); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHECK0-NEXT:## implicit-def: $ecx +; CHECK0-NEXT:movw %dx, %cx +; CHECK0-NEXT:## implicit-def: $xmm1 +; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1 +; CHECK0-NEXT:movw %ax, %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-N
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 4b9e4d34d274373d09fbea29e466e603409c864f Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54da9fe3c6a40..c6cb334e0aa9a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -459,6 +462,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), + N->getBasePtr(), N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_vec
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From e262387ca4870f7bae5de152b954461fa0311eb2 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 108 ++ llvm/lib/Target/X86/X86InstrCompiler.td | 7 ++ llvm/test/CodeGen/X86/atomic-load-store.ll| 81 + llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 5 files changed, 177 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c6cb334e0aa9a..328859738cae0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5982,6 +5985,89 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); +unsigned NumConcat = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +SmallVector ConcatOps(NumConcat); +SDValue UndefVal = DAG.getUNDEF(FirstVT); +ConcatOps[0] = LdOp; +for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; +return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); + + // Find the vector type that can load from. + st
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From e262387ca4870f7bae5de152b954461fa0311eb2 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 108 ++ llvm/lib/Target/X86/X86InstrCompiler.td | 7 ++ llvm/test/CodeGen/X86/atomic-load-store.ll| 81 + llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 5 files changed, 177 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c6cb334e0aa9a..328859738cae0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5982,6 +5985,89 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); +unsigned NumConcat = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +SmallVector ConcatOps(NumConcat); +SDValue UndefVal = DAG.getUNDEF(FirstVT); +ConcatOps[0] = LdOp; +for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; +return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); + + // Find the vector type that can load from. + st
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From de6a81035509ce371f750cce4a74118a04c1e17d Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++ 6 files changed, 69 insertions(+), 187 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c183149b0863a..6ae1d019cad28 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1840,7 +1840,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2278,7 +2278,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9da2ba04f77cb..545da0a1fbfab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12264,7 +12264,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12957,17 +12957,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->get
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Handle scalarized reductions in getArithmeticReductionCost (PR #136688)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/136688 >From 425d1aad294f1132ed90d79ff51320ac2dfcb72d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 21 Apr 2025 16:04:25 +0800 Subject: [PATCH] [RISCV] Handle scalarized reductions in getArithmeticReductionCost This fixes a crash reported at https://github.com/llvm/llvm-project/pull/114250#issuecomment-2813686061 If the vector type isn't legal at all, e.g. bfloat with +zvfbfmin, then the legalized type will be scalarized. So use getScalarType() instead of getVectorElement() when checking for f16/bf16. (cherry picked from commit 053451cb3502144564b4d0b30a9046045d1820d4) --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +- .../Analysis/CostModel/RISCV/reduce-fadd.ll | 167 ++ 2 files changed, 137 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index add82dc80c429..8f1094413a756 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1658,9 +1658,8 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, break; case ISD::FADD: // We can't promote f16/bf16 fadd reductions. -if ((LT.second.getVectorElementType() == MVT::f16 && - !ST->hasVInstructionsF16()) || -LT.second.getVectorElementType() == MVT::bf16) +if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) || +LT.second.getScalarType() == MVT::bf16) return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); if (TTI::requiresOrderedReduction(FMF)) { Opcodes.push_back(RISCV::VFMV_S_F); diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 1762f701a9b2d..71685b4acc822 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -1,25 +1,60 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-NO-ZFHMIN-NO-ZFBFMIN ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE define void @reduce_fadd_bfloat() { -; FP-REDUCE-LABEL: 'reduce_fadd_bfloat' -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR, <1 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR, <2 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR, <4 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR, <8 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR, <16 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR, <32 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR, <64 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR, <128 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 =
[llvm-branch-commits] [llvm] [AMDGPU] Make `AllocaInst` return AS5 in `getAssumedAddrSpace` (PR #136798)
https://github.com/shiltian closed https://github.com/llvm/llvm-project/pull/136798 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Support exotic terminators in instructions-to-return (PR #134794)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/134794 >From 18d8c3083affbeb9d54ac5e558f427dcfd9da300 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 8 Apr 2025 11:16:01 +0700 Subject: [PATCH] llvm-reduce: Support exotic terminators in instructions-to-return Use splitBasicBlock and avoid directly dealing with the specific of how to trim the existing terminators. We just need to deal with unconditional branch to return. --- .../reduce-values-to-return-callbr.ll | 54 ++ .../reduce-values-to-return-invoke.ll | 56 +++ .../llvm-reduce/remove-bb-switch-default.ll | 6 +- .../deltas/ReduceValuesToReturn.cpp | 50 - llvm/tools/llvm-reduce/deltas/Utils.cpp | 2 +- 5 files changed, 126 insertions(+), 42 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll new file mode 100644 index 0..da2f225f0405b --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll @@ -0,0 +1,54 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + +@gv = global i32 0, align 4 + +; INTERESTING-LABEL: @callbr0( +; INTERESTING: %load0 = load i32, ptr %arg0 +; INTERESTING: store i32 %load0, ptr @gv + +; RESULT-LABEL: define void @callbr0(ptr %arg0) { +; RESULT: %load0 = load i32, ptr %arg0, align 4 +; RESULT-NEXT: %callbr = callbr i32 asm +define void @callbr0(ptr %arg0) { +entry: + %load0 = load i32, ptr %arg0 + %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0) + to label %one [label %two, label %three] +one: + store i32 %load0, ptr @gv + ret void + +two: + store i32 %load0, ptr @gv + ret void + +three: + store i32 %load0, ptr @gv + ret void +} + +; INTERESTING-LABEL: @callbr1( +; INTERESTING: %load0 = load i32, ptr %arg0 + +; RESULT-LABEL: define i32 @callbr1(ptr %arg0) { +; RESULT-NEXT: entry: +; RESULT-NEXT: %load0 = load i32, ptr %arg0 +; RESULT-NEXT: ret i32 %load0 +define void @callbr1(ptr %arg0) { +entry: + %load0 = load i32, ptr %arg0 + %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0) + to label %one [label %two, label %three] +one: + store i32 %load0, ptr @gv + ret void + +two: + store i32 %load0, ptr @gv + ret void + +three: + store i32 %load0, ptr @gv + ret void +} diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll new file mode 100644 index 0..efa1e5377160e --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll @@ -0,0 +1,56 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + +@gv = global i32 0, align 4 + + +define i32 @has_invoke_user(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i32 9 +} + +declare i32 @__gxx_personality_v0(...) + +; INTERESTING-LABEL: @invoker_keep_invoke( +; INTERESTING: %invoke +; RESULT: %invoke = invoke i32 @has_invoke_user(ptr %arg) +define void @invoker_keep_invoke(ptr %arg) personality ptr @__gxx_personality_v0 { +bb: + %invoke = invoke i32 @has_invoke_user(ptr %arg) +to label %bb3 unwind label %bb1 + +bb1: + landingpad { ptr, i32 } + catch ptr null + ret void + +bb3: + store i32 %invoke, ptr null + ret void +} + +; INTERESTING-LABEL: @invoker_drop_invoke( +; INTERESTING: %add = add i32 + +; RESULT-LABEL: define i32 @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: bb: +; RESULT-NEXT: %add = add i32 %arg0, 9 +; RESULT-NEXT: ret i32 %add +; RESULT-NEXT: } +define void @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr @__gxx_personality_v0 { +bb: + %add = add i32 %arg0, 9 + %invoke = invoke i32 @has_invoke_user(ptr %arg1) +to label %bb3 unwind label %bb1 + +bb1: + landingpad { ptr, i32 } + catch ptr null + br label %bb3 + +bb3: + %phi = phi i32 [ %invoke, %bb ], [ %add, %bb1 ] + store i32 %phi, ptr null + ret void +} diff --git a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll index b509d1181f74d..27e599e45e9a3 100644 --- a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll +++ b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll @@ -16,13 +16,14 @@ ; RESULT0-NEXT: br i1 %arg0, label %bb1, label %bb2 ; RESULT0: bb1: -; RESULT0: %bb1.phi =
[llvm-branch-commits] [llvm] llvm-reduce: Change function return types if function is not called (PR #134035)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/134035 >From d65f9ca48d04cf36380bc43840527065195d9e4b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Apr 2025 11:45:24 +0700 Subject: [PATCH 1/4] llvm-reduce: Change function return types if function is not called Extend the early return on value reduction to mutate the function return type if the function has no call uses. This could be generalized to rewrite cases where all callsites are used, but it turns out that complicates the visitation order given we try to compute all opportunities up front. This is enough to cleanup the common case where we end up with one function with a return of an uninteresting constant. --- .../reduce-instructions-to-return.ll | 20 +++- ...reduce-values-to-return-new-return-type.ll | 95 +++ .../deltas/ReduceValuesToReturn.cpp | 7 +- 3 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll index 2af87aad05169..77501418f5283 100644 --- a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll +++ b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll @@ -196,13 +196,25 @@ define i32 @callsite_already_new_return_type(ptr %arg) { ; INTERESTING: ret ; RESULT-LABEL: define ptr @non_void_no_op( -; RESULT: ret ptr null +; RESULT-NEXT: %load = load i32, ptr %arg +; RESULT-NEXT: store i32 %load, ptr @gv +; RESULT-NEXT: ret ptr null define ptr @non_void_no_op(ptr %arg) { %load = load i32, ptr %arg store i32 %load, ptr @gv ret ptr null } +; INTERESTING-LABEL: @non_void_no_op_caller( + +; RESULT-LABEL: define ptr @non_void_no_op_caller(ptr %arg) { +; RESULT-NEXT: %call = call ptr @non_void_no_op(ptr %arg) +; RESULT-NEXT: ret ptr %call +define ptr @non_void_no_op_caller(ptr %arg) { + %call = call ptr @non_void_no_op(ptr %arg) + ret ptr %call +} + ; INTERESTING-LABEL: @non_void_same_type_use( ; INTERESTING: = load ; INTERESTING: ret @@ -230,6 +242,12 @@ define i32 @non_void_bitcastable_type_use(ptr %arg) { ret i32 0 } +; INTERESTING-LABEL: @non_void_bitcastable_type_use_caller( +define i32 @non_void_bitcastable_type_use_caller(ptr %arg) { + %ret = call i32 @non_void_bitcastable_type_use(ptr %arg) + ret i32 %ret +} + ; INTERESTING-LABEL: @form_return_struct( ; INTERESTING: = load { i32, float } diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll new file mode 100644 index 0..9ddbbe3def44f --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll @@ -0,0 +1,95 @@ +; Test that llvm-reduce can move intermediate values by inserting +; early returns when the function already has a different return type +; +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + + +@gv = global i32 0, align 4 +@ptr_array = global [2 x ptr] [ptr @inst_to_return_has_different_type_but_no_func_call_use, + ptr @multiple_callsites_wrong_return_type] + +; Should rewrite this return from i64 to i32 since the function has no +; uses. +; INTERESTING-LABEL: @inst_to_return_has_different_type_but_no_func_call_use( +; RESULT-LABEL: define i32 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) { +; RESULT-NEXT: %load = load i32, ptr %arg, align 4 +; RESULT-NEXT: ret i32 %load +define i64 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i64 0 +} + +; INTERESTING-LABEL: @callsite_different_type_unused_0( +; RESULT-LABEL: define i64 @inst_to_return_has_different_type_but_call_result_unused( +; RESULT-NEXT: %load = load i32, ptr %arg +; RESULT-NEXT: store i32 %load, ptr @gv +; RESULT-NEXT: ret i64 0 +define void @callsite_different_type_unused_0(ptr %arg) { + %unused0 = call i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) + %unused1 = call i64 @inst_to_return_has_different_type_but_call_result_unused(ptr null) + ret void +} + +; TODO: Could rewrite this return from i64 to i32 since the callsite is unused. +; INTERESTING-LABEL: @inst_to_return_has_different_type_but_call_result_unused( +; RESULT-LABEL: define i64 @inst_to_return_has_different_type_but_call_result_unused( +; RESULT: ret i64 0 +define i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i64 0 +} + +; INTERESTING-LABEL: @multiple_callsites_wrong_return_type( +; RESULT-LABEL: define i64 @multipl
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133627 >From 2ee2c34b9da4fc887d33e0f6eb5402d60fdd30c3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Mar 2025 14:33:36 +0700 Subject: [PATCH 1/2] llvm-reduce: Reduce with early return of arguments Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. --- .../reduce-values-to-return-args.ll | 77 +++ llvm/tools/llvm-reduce/DeltaPasses.def| 6 +- .../deltas/ReduceValuesToReturn.cpp | 42 +- .../llvm-reduce/deltas/ReduceValuesToReturn.h | 1 + 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll new file mode 100644 index 0..abbc643822033 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll @@ -0,0 +1,77 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=arguments-to-return --test FileCheck --test-arg --check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefixes=RESULT %s < %t + + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret void +} + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return_existing_ret( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret i32 0 +} + +; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return( +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg +define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +entry: + br i1 %cond0, label %bb0, label %bb1 + +bb0: + %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ] + store i32 %arg, ptr %ptr0 + store i32 %phi, ptr %ptr1 + br label %bb1 + +bb1: + br i1 %cond1, label %bb0, label %bb2 + +bb2: + ret void +} + +; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) { +; INTERESTING: %arg1 + +; RESULT-LABEL: define ptr @keep_second_arg( +; RESULT-NEXT: ret ptr %arg1 +; RESULT-NEXT: } +define void @keep_second_arg(i32 %arg0, ptr %arg1) { + store i32 %arg0, ptr %arg1 + ret void +} + +; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; INTERESTING: i32 %arg2 + +; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg2 +define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +entry: + br i1 %arg0, label %bb0, label %bb1 + +bb0: + store i32 %arg2, ptr %arg1 + ret void + +bb1: + ret void +} diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def b/llvm/tools/llvm-reduce/DeltaPasses.def index 421e4472006b6..3aed0ccd74b84 100644 --- a/llvm/tools/llvm-reduce/DeltaPasses.def +++ b/llvm/tools/llvm-reduce/DeltaPasses.def @@ -49,7 +49,11 @@ DELTA_PASS_IR("attributes", reduceAttributesDeltaPass, "Reducing Attributes") DELTA_PASS_IR("target-features-attr", reduceTargetFeaturesAttrDeltaPass, "Reducing target-features") DELTA_PASS_IR("module-data", reduceModuleDataDeltaPass, "Reducing Module Data") DELTA_PASS_IR("opcodes", reduceOpcodesDeltaPass, "Reducing Opcodes") -DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, "Early return of instructions") + +DELTA_PASS_IR("arguments-to-return", reduceArgumentsToReturnDeltaPass, + "Converting arguments to function return value") +DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, + "Early return of instructions") DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing Volatile Instructions") DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing Atomic Ordering") DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic Sync Scopes") diff --git a/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp b/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp index 9ee0af3e1a69b..3e400ffc89482 100644 --- a/llvm/tools/llvm-reduce/de
[llvm-branch-commits] [clang] [llvm] [llvm] Introduce callee_type metadata (PR #87573)
@@ -1,23 +1,23 @@ ;; Test if the callee_type metadata is dropped when an indirect function call through a function ptr is promoted ;; to a direct function call during instcombine. -; RUN: opt < %s -O2 | llvm-dis | FileCheck %s +; RUN: opt < %s -passes="cgscc(inline),instcombine" -S | FileCheck %s arsenm wrote: instcombine test should not be running the inliner, only instcombine https://github.com/llvm/llvm-project/pull/87573 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove uselist for constantdata (PR #134692)
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/134692 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/137314 This is a follow up change to eliminating uselists for ConstantData. In the previous revision, ConstantData had a replacement reference count instead of a uselist. This reference count was misleading, and not useful in the same way as it would be for another value. The references may not have even been in the current module, since these are shared throughout the LLVMContext. This doesn't space leak any more than we previously did; nothing was attempting to garbage collect unused constants. Previously the use_empty, and hasNUses type of APIs were supported through the reference count. These now behave as if the uses are always empty. Ideally it would be illegal to inspect these, but this forces API complexity into quite a few places. It may be doable to make it illegal to check these counts, but I would like there to be a targeted fuzzing effort to make sure every transform properly deals with a constant in every operand position. All tests pass if I turn the hasNUses* and getNumUses queries into assertions, only hasOneUse in particular appears to hit in some set of contexts. I've added unit tests to ensure logical consistency between these cases >From 96635fca09fa1835e372f175eff83013b03da28e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 19 Apr 2025 21:11:23 +0200 Subject: [PATCH] IR: Remove reference counts from ConstantData This is a follow up change to eliminating uselists for ConstantData. In the previous revision, ConstantData had a replacement reference count instead of a uselist. This reference count was misleading, and not useful in the same way as it would be for another value. The references may not have even been in the current module, since these are shared throughout the LLVMContext. This doesn't space leak any more than we previously did; nothing was attempting to garbage collect unused constants. Previously the use_empty, and hasNUses type of APIs were supported through the reference count. These now behave as if the uses are always empty. Ideally it would be illegal to inspect these, but this forces API complexity into quite a few places. It may be doable to make it illegal to check these counts, but I would like there to be a targeted fuzzing effort to make sure every transform properly deals with a constant in every operand position. All tests pass if I turn the hasNUses* and getNumUses queries into assertions, only hasOneUse in particular appears to hit in some set of contexts. I've added unit tests to ensure logical consistency between these cases --- llvm/docs/ReleaseNotes.md | 4 +- llvm/include/llvm/IR/Constants.h| 3 +- llvm/include/llvm/IR/Use.h | 9 +-- llvm/include/llvm/IR/Value.h| 118 ++-- llvm/lib/IR/Instruction.cpp | 4 +- llvm/lib/IR/Value.cpp | 28 +++ llvm/unittests/IR/ConstantsTest.cpp | 36 + 7 files changed, 98 insertions(+), 104 deletions(-) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 411cefe004e16..4665302a4144c 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing. Changes to the LLVM IR -- -* It is no longer permitted to inspect the uses of ConstantData +* It is no longer permitted to inspect the uses of ConstantData. Use + count APIs will behave as if they have no uses (i.e. use_empty() is + always true). * The `nocapture` attribute has been replaced by `captures(none)`. * The constant expression variants of the following instructions have been diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 7b1dbdece43f7..07d71cf7108d2 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -51,7 +51,8 @@ template struct ConstantAggrKeyType; /// Since they can be in use by unrelated modules (and are never based on /// GlobalValues), it never makes sense to RAUW them. /// -/// These do not have use lists. It is illegal to inspect the uses. +/// These do not have use lists. It is illegal to inspect the uses. These behave +/// as if they have no uses (i.e. use_empty() is always true). class ConstantData : public Constant { constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h index bcd1fd6677497..dc22d69ba561d 100644 --- a/llvm/include/llvm/IR/Use.h +++ b/llvm/include/llvm/IR/Use.h @@ -23,7 +23,6 @@ namespace llvm { template struct simplify_type; -class ConstantData; class User; class Value; @@ -43,7 +42,7 @@ class Use { private: /// Destructor - Only for zap() - ~Use(); + ~Use() { removeFromList(); } /// Constructor Use(User *Parent) : Parent(Parent) {} @@ -85,10 +84,8 @@ class Use { Use **Prev = nullptr; User *Parent = nullptr; - inline void addT
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#137314** https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/137314?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#137313** https://app.graphite.dev/github/pr/llvm/llvm-project/137313?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/137314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) Changes This is a follow up change to eliminating uselists for ConstantData. In the previous revision, ConstantData had a replacement reference count instead of a uselist. This reference count was misleading, and not useful in the same way as it would be for another value. The references may not have even been in the current module, since these are shared throughout the LLVMContext. This doesn't space leak any more than we previously did; nothing was attempting to garbage collect unused constants. Previously the use_empty, and hasNUses type of APIs were supported through the reference count. These now behave as if the uses are always empty. Ideally it would be illegal to inspect these, but this forces API complexity into quite a few places. It may be doable to make it illegal to check these counts, but I would like there to be a targeted fuzzing effort to make sure every transform properly deals with a constant in every operand position. All tests pass if I turn the hasNUses* and getNumUses queries into assertions, only hasOneUse in particular appears to hit in some set of contexts. I've added unit tests to ensure logical consistency between these cases --- Full diff: https://github.com/llvm/llvm-project/pull/137314.diff 7 Files Affected: - (modified) llvm/docs/ReleaseNotes.md (+3-1) - (modified) llvm/include/llvm/IR/Constants.h (+2-1) - (modified) llvm/include/llvm/IR/Use.h (+3-6) - (modified) llvm/include/llvm/IR/Value.h (+40-78) - (modified) llvm/lib/IR/Instruction.cpp (+1-3) - (modified) llvm/lib/IR/Value.cpp (+13-15) - (modified) llvm/unittests/IR/ConstantsTest.cpp (+36) ``diff diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 411cefe004e16..4665302a4144c 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing. Changes to the LLVM IR -- -* It is no longer permitted to inspect the uses of ConstantData +* It is no longer permitted to inspect the uses of ConstantData. Use + count APIs will behave as if they have no uses (i.e. use_empty() is + always true). * The `nocapture` attribute has been replaced by `captures(none)`. * The constant expression variants of the following instructions have been diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 7b1dbdece43f7..07d71cf7108d2 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -51,7 +51,8 @@ template struct ConstantAggrKeyType; /// Since they can be in use by unrelated modules (and are never based on /// GlobalValues), it never makes sense to RAUW them. /// -/// These do not have use lists. It is illegal to inspect the uses. +/// These do not have use lists. It is illegal to inspect the uses. These behave +/// as if they have no uses (i.e. use_empty() is always true). class ConstantData : public Constant { constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h index bcd1fd6677497..dc22d69ba561d 100644 --- a/llvm/include/llvm/IR/Use.h +++ b/llvm/include/llvm/IR/Use.h @@ -23,7 +23,6 @@ namespace llvm { template struct simplify_type; -class ConstantData; class User; class Value; @@ -43,7 +42,7 @@ class Use { private: /// Destructor - Only for zap() - ~Use(); + ~Use() { removeFromList(); } /// Constructor Use(User *Parent) : Parent(Parent) {} @@ -85,10 +84,8 @@ class Use { Use **Prev = nullptr; User *Parent = nullptr; - inline void addToList(unsigned &Count); - inline void addToList(Use *&List); - inline void removeFromList(unsigned &Count); - inline void removeFromList(Use *&List); + inline void addToList(Use **List); + inline void removeFromList(); }; /// Allow clients to treat uses just like values when using diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 180b6238eda6c..ae874304c4316 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -116,10 +116,7 @@ class Value { private: Type *VTy; - union { -Use *List = nullptr; -unsigned Count; - } Uses; + Use *UseList = nullptr; friend class ValueAsMetadata; // Allow access to IsUsedByMD. friend class ValueHandleBase; // Allow access to HasValueHandle. @@ -347,23 +344,21 @@ class Value { bool use_empty() const { assertModuleIsMaterialized(); -return hasUseList() ? Uses.List == nullptr : Uses.Count == 0; +return UseList == nullptr; } - bool materialized_use_empty() const { -return hasUseList() ? Uses.List == nullptr : !Uses.Count; - } + bool materialized_use_empty() const { return UseList == nullptr; } using use_iterator = use_iterator_impl; using const_use_iterator = use_iterator_impl; use_iterator materialized_use_begin() { assert(hasUseList())
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/137314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove uselist for constantdata (PR #134692)
arsenm wrote: Reposted as essentially the same form in https://github.com/llvm/llvm-project/pull/137313. https://github.com/llvm/llvm-project/pull/137314 takes the next step and eliminates the reference counts https://github.com/llvm/llvm-project/pull/134692 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,113 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. + +// Attributes for SD symbols. +struct SDAttr { + GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for ED symbols. +struct EDAttr { + bool IsReadOnly = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDAmode Amode; uweigand wrote: Just to double-check: your current code does *not* set Amode on the PR symbol. Is is necessary to do this or not? https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,113 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. + +// Attributes for SD symbols. +struct SDAttr { + GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for ED symbols. +struct EDAttr { + bool IsReadOnly = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDAmode Amode; + GOFF::ESDRmode Rmode; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented; + GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate; + GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial; + GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0; + GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword; +}; + +// Attributes for LD symbols. +struct LDAttr { + bool IsRenamable = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong; uweigand wrote: I see. This is not currently reflected in the HLASM output, however. How would one do this? https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,145 @@ +//===- MCSectionGOFF.cpp - GOFF Code Section Representation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "llvm/MC/MCSectionGOFF.h" +#include "llvm/BinaryFormat/GOFF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +void emitRMode(raw_ostream &OS, GOFF::ESDRmode Rmode, bool UseParenthesis) { + if (Rmode != GOFF::ESD_RMODE_None) { +OS << "RMODE" << (UseParenthesis ? '(' : ' '); +switch (Rmode) { +case GOFF::ESD_RMODE_24: + OS << "24"; + break; +case GOFF::ESD_RMODE_31: + OS << "31"; + break; +case GOFF::ESD_RMODE_64: + OS << "64"; + break; +case GOFF::ESD_RMODE_None: + break; +} +if (UseParenthesis) + OS << ')'; + } +} + +void emitCATTR(raw_ostream &OS, StringRef Name, StringRef ParentName, + bool EmitAmodeAndRmode, GOFF::ESDAmode Amode, + GOFF::ESDRmode Rmode, GOFF::ESDAlignment Alignment, + GOFF::ESDLoadingBehavior LoadBehavior, + GOFF::ESDExecutable Executable, bool IsReadOnly, + StringRef PartName) { + if (EmitAmodeAndRmode && Amode != GOFF::ESD_AMODE_None) { +OS << ParentName << " AMODE "; +switch (Amode) { +case GOFF::ESD_AMODE_24: + OS << "24"; + break; +case GOFF::ESD_AMODE_31: + OS << "31"; + break; +case GOFF::ESD_AMODE_ANY: + OS << "ANY"; + break; +case GOFF::ESD_AMODE_64: + OS << "64"; + break; +case GOFF::ESD_AMODE_MIN: + OS << "ANY64"; + break; +case GOFF::ESD_AMODE_None: + break; +} +OS << "\n"; + } + if (EmitAmodeAndRmode && Rmode != GOFF::ESD_RMODE_None) { +OS << ParentName << ' '; +emitRMode(OS, Rmode, /*UseParenthesis=*/false); +OS << "\n"; + } + OS << Name << " CATTR "; + OS << "ALIGN(" << static_cast(Alignment) << ")"; + switch (LoadBehavior) { + case GOFF::ESD_LB_Deferred: +OS << ",DEFLOAD"; +break; + case GOFF::ESD_LB_NoLoad: +OS << ",NOLOAD"; +break; + default: +break; + } + switch (Executable) { + case GOFF::ESD_EXE_CODE: +OS << ",EXECUTABLE"; +break; + case GOFF::ESD_EXE_DATA: +OS << ",NOTEXECUTABLE"; +break; + default: +break; + } + if (IsReadOnly) +OS << ",READONLY"; + if (Rmode != GOFF::ESD_RMODE_None) { +OS << ','; +emitRMode(OS, Rmode, /*UseParenthesis=*/true); + } + if (!PartName.empty()) +OS << ",PART(" << PartName << ")"; + OS << '\n'; +} +} // namespace + +void MCSectionGOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, + raw_ostream &OS, + uint32_t Subsection) const { + switch (SymbolType) { + case GOFF::ESD_ST_SectionDefinition: { +OS << Name << " CSECT\n"; +Emitted = true; +break; + } + case GOFF::ESD_ST_ElementDefinition: { +bool ParentEmitted = getParent()->Emitted; +getParent()->printSwitchToSection(MAI, T, OS, Subsection); +if (!Emitted) { + emitCATTR(OS, Name, getParent()->getName(), !ParentEmitted, +EDAttributes.Amode, EDAttributes.Rmode, EDAttributes.Alignment, +EDAttributes.LoadBehavior, EDAttributes.Executable, +EDAttributes.IsReadOnly, StringRef()); + Emitted = true; +} else + OS << Name << " CATTR ,\n"; +break; + } + case GOFF::ESD_ST_PartReference: { +MCSectionGOFF *ED = getParent(); +bool SDEmitted = ED->getParent()->Emitted; +ED->getParent()->printSwitchToSection(MAI, T, OS, Subsection); +if (!Emitted) { + emitCATTR(OS, ED->getName(), ED->getParent()->getName(), !SDEmitted, +PRAttributes.Amode, getParent()->EDAttributes.Rmode, +PRAttributes.Alignment, getParent()->EDAttributes.LoadBehavior, uweigand wrote: The HLASM docs contain the somewhat cryptic statement: "Binding attributes assigned to the class are also assigned to the part." where is not really defined anywhere (I can see) what exactly "binding attributes" mean. Do we need to replicate the HLASM behavior of setting Amode and LoadBehavior or not? https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,106 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. + +// Attributes for SD symbols. +struct SDAttr { + GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for ED symbols. +struct EDAttr { + bool IsReadOnly = false; + GOFF::ESDRmode Rmode; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented; + GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate; + GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial; + GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0; + GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword; +}; + +// Attributes for LD symbols. +struct LDAttr { + bool IsRenamable = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong; + GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink; + GOFF::ESDAmode Amode; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for PR symbols. +struct PRAttr { + bool IsRenamable = false; + bool IsReadOnly = false; // Not documented. uweigand wrote: Does it ever make sense for this value to differ from the ED value? Or is this one of those attributes that should be copied from the element to the part? It doesn't seem possible to specific differing values in HLASM ... https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,106 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. + +// Attributes for SD symbols. +struct SDAttr { + GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for ED symbols. +struct EDAttr { + bool IsReadOnly = false; + GOFF::ESDRmode Rmode; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented; + GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate; + GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial; + GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0; + GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword; +}; + +// Attributes for LD symbols. +struct LDAttr { + bool IsRenamable = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong; + GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink; + GOFF::ESDAmode Amode; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for PR symbols. +struct PRAttr { + bool IsRenamable = false; + bool IsReadOnly = false; // Not documented. + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDLinkageType Linkage = GOFF::ESD_LT_XPLink; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; + GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Byte; uweigand wrote: Same question for the alignment, can it ever differ from the ED alignment? How would you specify this in HLASM? https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -223,13 +197,95 @@ void GOFFOstream::finalizeRecord() { } namespace { +// A GOFFSymbol holds all the data required for writing an ESD record. +class GOFFSymbol { +public: + std::string Name; + uint32_t EsdId; + uint32_t ParentEsdId; + uint64_t Offset = 0; // Offset of the symbol into the section. LD only. + // Offset is only 32 bit, the larger type is used to + // enable error checking. + GOFF::ESDSymbolType SymbolType; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_ProgramManagementBinder; + + GOFF::BehavioralAttributes BehavAttrs; + GOFF::SymbolFlags SymbolFlags; + uint32_t SortKey = 0; + uint32_t SectionLength = 0; + uint32_t ADAEsdId = 0; + uint32_t EASectionEDEsdId = 0; + uint32_t EASectionOffset = 0; + uint8_t FillByteValue = 0; + + GOFFSymbol() : EsdId(0), ParentEsdId(0) {} + + GOFFSymbol(StringRef Name, uint32_t EsdID, const GOFF::SDAttr &Attr) + : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(0), +SymbolType(GOFF::ESD_ST_SectionDefinition) { +BehavAttrs.setTaskingBehavior(Attr.TaskingBehavior); +BehavAttrs.setBindingScope(Attr.BindingScope); + } + + GOFFSymbol(StringRef Name, uint32_t EsdID, uint32_t ParentEsdID, + const GOFF::EDAttr &Attr) + : Name(Name.data(), Name.size()), EsdId(EsdID), ParentEsdId(ParentEsdID), +SymbolType(GOFF::ESD_ST_ElementDefinition) { +this->NameSpace = Attr.NameSpace; +// TODO Do we need/should set the "mangled" flag? +SymbolFlags.setFillBytePresence(1); uweigand wrote: Oh, and one more thing here: by hard-coding this here, we're not emitting any HLASM output. Should we emit a CATTR FILL(0) then as well? (But that's only supported on the PART apparently, while the fill byte is supposed to go on the ED?) https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][test] Fix callcont-fallthru.s after #129481 (PR #135867)
paschalis-mpeis wrote: Hey folks, any updates on this? I spent some time experimenting with @MaskRay's suggestion. I used a mock libc shared object that had a `puts` symbol. Indeed there won't be unresolved symbols now, however, still GNU `nm` doesn't show a PLT entry when using `--synthetic` . https://github.com/llvm/llvm-project/pull/135867 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][OpenMP] Convert omp.cancel sections to LLVMIR (PR #137193)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/137193 >From 4d28bd8f0106321af6679e9b155fd36ef2f919fc Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 10 Apr 2025 11:43:18 + Subject: [PATCH] [mlir][OpenMP] Convert omp.cancel sections to LLVMIR This is quite ugly but it is the best I could think of. The old FiniCBWrapper was way too brittle depending upon the exact block structure inside of the section, and could be confused by any control flow in the section (e.g. an if clause on cancel). The wording in the comment and variable names didn't seem to match where it was actually branching too as well. Clang's (non-OpenMPIRBuilder) lowering for cancel inside of sections branches to a block containing __kmpc_for_static_fini. This was hard to achieve here because sometimes the FiniCBWrapper has to run before the worksharing loop finalization has been crated. To get around this ordering issue I created a dummy branch to a dummy block, which is then fixed later once all of the information is available. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 27 --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 6 +- mlir/test/Target/LLVMIR/openmp-cancel.mlir| 76 +++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 16 4 files changed, 97 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index be05f01c94603..3f19088e6c73d 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2172,6 +2172,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( if (!updateToLocation(Loc)) return Loc.IP; + // FiniCBWrapper needs to create a branch to the loop finalization block, but + // this has not been created yet at some times when this callback runs. + SmallVector CancellationBranches; auto FiniCBWrapper = [&](InsertPointTy IP) { if (IP.getBlock()->end() != IP.getPoint()) return FiniCB(IP); @@ -2179,16 +2182,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( // will fail because that function requires the Finalization Basic Block to // have a terminator, which is already removed by EmitOMPRegionBody. // IP is currently at cancelation block. -// We need to backtrack to the condition block to fetch -// the exit block and create a branch from cancelation -// to exit block. -IRBuilder<>::InsertPointGuard IPG(Builder); -Builder.restoreIP(IP); -auto *CaseBB = IP.getBlock()->getSinglePredecessor(); -auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); -auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); -Instruction *I = Builder.CreateBr(ExitBB); -IP = InsertPointTy(I->getParent(), I->getIterator()); +BranchInst *DummyBranch = Builder.CreateBr(IP.getBlock()); +IP = InsertPointTy(DummyBranch->getParent(), DummyBranch->getIterator()); +CancellationBranches.push_back(DummyBranch); return FiniCB(IP); }; @@ -2251,6 +2247,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( return WsloopIP.takeError(); InsertPointTy AfterIP = *WsloopIP; + BasicBlock *LoopFini = AfterIP.getBlock()->getSinglePredecessor(); + assert(LoopFini && "Bad structure of static workshare loop finalization"); + // Apply the finalization callback in LoopAfterBB auto FiniInfo = FinalizationStack.pop_back_val(); assert(FiniInfo.DK == OMPD_sections && @@ -2264,6 +2263,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( AfterIP = {FiniBB, FiniBB->begin()}; } + // Now we can fix the dummy branch to point to the right place + if (!CancellationBranches.empty()) { +for (BranchInst *DummyBranch : CancellationBranches) { + assert(DummyBranch->getNumSuccessors() == 1); + DummyBranch->setSuccessor(0, LoopFini); +} + } + return AfterIP; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a29f9b7e2f96f..473688acdcd69 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -161,7 +161,8 @@ static LogicalResult checkImplementationStatus(Operation &op) { auto checkCancelDirective = [&todo](auto op, LogicalResult &result) { omp::ClauseCancellationConstructType cancelledDirective = op.getCancelDirective(); -if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel) +if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel && +cancelledDirective != omp::ClauseCancellationConstructType::Sections) result = todo("cancel directive construct type not yet supported"); }; auto checkDepend = [&todo](auto op, LogicalResult &result) { @@
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/137314 >From ee561eefea8e236d9c5d7e40f4b82d662df5f873 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 19 Apr 2025 21:11:23 +0200 Subject: [PATCH 1/2] IR: Remove reference counts from ConstantData This is a follow up change to eliminating uselists for ConstantData. In the previous revision, ConstantData had a replacement reference count instead of a uselist. This reference count was misleading, and not useful in the same way as it would be for another value. The references may not have even been in the current module, since these are shared throughout the LLVMContext. This doesn't space leak any more than we previously did; nothing was attempting to garbage collect unused constants. Previously the use_empty, and hasNUses type of APIs were supported through the reference count. These now behave as if the uses are always empty. Ideally it would be illegal to inspect these, but this forces API complexity into quite a few places. It may be doable to make it illegal to check these counts, but I would like there to be a targeted fuzzing effort to make sure every transform properly deals with a constant in every operand position. All tests pass if I turn the hasNUses* and getNumUses queries into assertions, only hasOneUse in particular appears to hit in some set of contexts. I've added unit tests to ensure logical consistency between these cases --- llvm/docs/ReleaseNotes.md | 4 +- llvm/include/llvm/IR/Constants.h | 3 +- llvm/include/llvm/IR/Use.h | 9 +- llvm/include/llvm/IR/Value.h | 118 +++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- llvm/lib/IR/AsmWriter.cpp | 3 +- llvm/lib/IR/Instruction.cpp| 4 +- llvm/lib/IR/Value.cpp | 28 +++-- llvm/unittests/IR/ConstantsTest.cpp| 36 +++ 9 files changed, 100 insertions(+), 107 deletions(-) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 411cefe004e16..4665302a4144c 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing. Changes to the LLVM IR -- -* It is no longer permitted to inspect the uses of ConstantData +* It is no longer permitted to inspect the uses of ConstantData. Use + count APIs will behave as if they have no uses (i.e. use_empty() is + always true). * The `nocapture` attribute has been replaced by `captures(none)`. * The constant expression variants of the following instructions have been diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 7b1dbdece43f7..07d71cf7108d2 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -51,7 +51,8 @@ template struct ConstantAggrKeyType; /// Since they can be in use by unrelated modules (and are never based on /// GlobalValues), it never makes sense to RAUW them. /// -/// These do not have use lists. It is illegal to inspect the uses. +/// These do not have use lists. It is illegal to inspect the uses. These behave +/// as if they have no uses (i.e. use_empty() is always true). class ConstantData : public Constant { constexpr static IntrusiveOperandsAllocMarker AllocMarker{0}; diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h index bcd1fd6677497..dc22d69ba561d 100644 --- a/llvm/include/llvm/IR/Use.h +++ b/llvm/include/llvm/IR/Use.h @@ -23,7 +23,6 @@ namespace llvm { template struct simplify_type; -class ConstantData; class User; class Value; @@ -43,7 +42,7 @@ class Use { private: /// Destructor - Only for zap() - ~Use(); + ~Use() { removeFromList(); } /// Constructor Use(User *Parent) : Parent(Parent) {} @@ -85,10 +84,8 @@ class Use { Use **Prev = nullptr; User *Parent = nullptr; - inline void addToList(unsigned &Count); - inline void addToList(Use *&List); - inline void removeFromList(unsigned &Count); - inline void removeFromList(Use *&List); + inline void addToList(Use **List); + inline void removeFromList(); }; /// Allow clients to treat uses just like values when using diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 180b6238eda6c..ae874304c4316 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -116,10 +116,7 @@ class Value { private: Type *VTy; - union { -Use *List = nullptr; -unsigned Count; - } Uses; + Use *UseList = nullptr; friend class ValueAsMetadata; // Allow access to IsUsedByMD. friend class ValueHandleBase; // Allow access to HasValueHandle. @@ -347,23 +344,21 @@ class Value { bool use_empty() const { assertModuleIsMaterialized(); -return hasUseList() ? Uses.List == nullptr : Uses.Count == 0; +return UseList == nullptr; } - bool materialized_use_empty() const { -
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/137338 The previous statement removed all uses. >From c62d35ffa390f7bcceb6d81d76e6dd178e68b847 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 25 Apr 2025 16:20:32 +0200 Subject: [PATCH] ARM: Remove unnecessary use_empty check The previous statement removed all uses. --- llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index e998359bd3496..ce59ae0c95dcf 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1099,8 +1099,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, // The instruction has now been "absorbed" into the phi value Offs->replaceAllUsesWith(NewPhi); - if (Offs->use_empty()) -Offs->eraseFromParent(); + Offs->eraseFromParent(); // Clean up the old increment in case it's unused because we built a new // one if (IncInstruction->use_empty()) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/137338 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#137338** https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/137338?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#137337** https://app.graphite.dev/github/pr/llvm/llvm-project/137337?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/137338 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
llvmbot wrote: @llvm/pr-subscribers-backend-arm Author: Matt Arsenault (arsenm) Changes The previous statement removed all uses. --- Full diff: https://github.com/llvm/llvm-project/pull/137338.diff 1 Files Affected: - (modified) llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp (+1-2) ``diff diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index e998359bd3496..ce59ae0c95dcf 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1099,8 +1099,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, // The instruction has now been "absorbed" into the phi value Offs->replaceAllUsesWith(NewPhi); - if (Offs->use_empty()) -Offs->eraseFromParent(); + Offs->eraseFromParent(); // Clean up the old increment in case it's unused because we built a new // one if (IncInstruction->use_empty()) `` https://github.com/llvm/llvm-project/pull/137338 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][OpenMP] convert wsloop cancellation to LLVMIR (PR #137194)
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/137194 >From bb1af0154c62a622775f0f05cbdc486de69266c9 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 15 Apr 2025 15:05:50 + Subject: [PATCH] [mlir][OpenMP] convert wsloop cancellation to LLVMIR Taskloop support will follow in a later patch. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 40 - mlir/test/Target/LLVMIR/openmp-cancel.mlir| 87 +++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 16 3 files changed, 125 insertions(+), 18 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 473688acdcd69..c05bc98ec7be8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -161,8 +161,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { auto checkCancelDirective = [&todo](auto op, LogicalResult &result) { omp::ClauseCancellationConstructType cancelledDirective = op.getCancelDirective(); -if (cancelledDirective != omp::ClauseCancellationConstructType::Parallel && -cancelledDirective != omp::ClauseCancellationConstructType::Sections) +if (cancelledDirective == omp::ClauseCancellationConstructType::Taskgroup) result = todo("cancel directive construct type not yet supported"); }; auto checkDepend = [&todo](auto op, LogicalResult &result) { @@ -2356,6 +2355,30 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, ? llvm::omp::WorksharingLoopType::DistributeForStaticLoop : llvm::omp::WorksharingLoopType::ForStaticLoop; + SmallVector cancelTerminators; + // This callback is invoked only if there is cancellation inside of the wsloop + // body. + auto finiCB = [&](llvm::OpenMPIRBuilder::InsertPointTy ip) -> llvm::Error { +llvm::IRBuilderBase &llvmBuilder = ompBuilder->Builder; +llvm::IRBuilderBase::InsertPointGuard guard(llvmBuilder); + +// ip is currently in the block branched to if cancellation occured. +// We need to create a branch to terminate that block. +llvmBuilder.restoreIP(ip); + +// We must still clean up the wsloop after cancelling it, so we need to +// branch to the block that finalizes the wsloop. +// That block has not been created yet so use this block as a dummy for now +// and fix this after creating the wsloop. +cancelTerminators.push_back(llvmBuilder.CreateBr(ip.getBlock())); +return llvm::Error::success(); + }; + // We have to add the cleanup to the OpenMPIRBuilder before the body gets + // created in case the body contains omp.cancel (which will then expect to be + // able to find this cleanup callback). + ompBuilder->pushFinalizationCB({finiCB, llvm::omp::Directive::OMPD_for, + constructIsCancellable(wsloopOp)}); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -2377,6 +2400,19 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(handleError(wsloopIP, opInst))) return failure(); + ompBuilder->popFinalizationCB(); + if (!cancelTerminators.empty()) { +// If we cancelled the loop, we should branch to the finalization block of +// the wsloop (which is always immediately before the loop continuation +// block). Now the finalization has been created, we can fix the branch. +llvm::BasicBlock *wsloopFini = wsloopIP->getBlock()->getSinglePredecessor(); +for (llvm::BranchInst *cancelBranch : cancelTerminators) { + assert(cancelBranch->getNumSuccessors() == 1 && + "cancel branch should have one target"); + cancelBranch->setSuccessor(0, wsloopFini); +} + } + // Process the reductions if required. if (failed(createReductionsAndCleanup( wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, diff --git a/mlir/test/Target/LLVMIR/openmp-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-cancel.mlir index fca16b936fc85..3c195a98d1000 100644 --- a/mlir/test/Target/LLVMIR/openmp-cancel.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cancel.mlir @@ -156,3 +156,90 @@ llvm.func @cancel_sections_if(%cond : i1) { // CHECK: ret void // CHECK: .cncl:; preds = %[[VAL_27]] // CHECK: br label %[[VAL_19]] + +llvm.func @cancel_wsloop_if(%lb : i32, %ub : i32, %step : i32, %cond : i1) { + omp.wsloop { +omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.cancel cancellation_construct_type(loop) if(%cond) + omp.yield +} + } + llvm.return +} +// CHECK-LABEL: define void @cancel_wsloop_if +// CHECK: %[[VAL_0:.*]] = alloca i32, align 4 +// CHECK:
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133627 >From 2ee2c34b9da4fc887d33e0f6eb5402d60fdd30c3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Mar 2025 14:33:36 +0700 Subject: [PATCH 1/2] llvm-reduce: Reduce with early return of arguments Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. --- .../reduce-values-to-return-args.ll | 77 +++ llvm/tools/llvm-reduce/DeltaPasses.def| 6 +- .../deltas/ReduceValuesToReturn.cpp | 42 +- .../llvm-reduce/deltas/ReduceValuesToReturn.h | 1 + 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll new file mode 100644 index 0..abbc643822033 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll @@ -0,0 +1,77 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=arguments-to-return --test FileCheck --test-arg --check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefixes=RESULT %s < %t + + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret void +} + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return_existing_ret( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret i32 0 +} + +; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return( +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg +define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +entry: + br i1 %cond0, label %bb0, label %bb1 + +bb0: + %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ] + store i32 %arg, ptr %ptr0 + store i32 %phi, ptr %ptr1 + br label %bb1 + +bb1: + br i1 %cond1, label %bb0, label %bb2 + +bb2: + ret void +} + +; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) { +; INTERESTING: %arg1 + +; RESULT-LABEL: define ptr @keep_second_arg( +; RESULT-NEXT: ret ptr %arg1 +; RESULT-NEXT: } +define void @keep_second_arg(i32 %arg0, ptr %arg1) { + store i32 %arg0, ptr %arg1 + ret void +} + +; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; INTERESTING: i32 %arg2 + +; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg2 +define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +entry: + br i1 %arg0, label %bb0, label %bb1 + +bb0: + store i32 %arg2, ptr %arg1 + ret void + +bb1: + ret void +} diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def b/llvm/tools/llvm-reduce/DeltaPasses.def index 421e4472006b6..3aed0ccd74b84 100644 --- a/llvm/tools/llvm-reduce/DeltaPasses.def +++ b/llvm/tools/llvm-reduce/DeltaPasses.def @@ -49,7 +49,11 @@ DELTA_PASS_IR("attributes", reduceAttributesDeltaPass, "Reducing Attributes") DELTA_PASS_IR("target-features-attr", reduceTargetFeaturesAttrDeltaPass, "Reducing target-features") DELTA_PASS_IR("module-data", reduceModuleDataDeltaPass, "Reducing Module Data") DELTA_PASS_IR("opcodes", reduceOpcodesDeltaPass, "Reducing Opcodes") -DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, "Early return of instructions") + +DELTA_PASS_IR("arguments-to-return", reduceArgumentsToReturnDeltaPass, + "Converting arguments to function return value") +DELTA_PASS_IR("instructions-to-return", reduceInstructionsToReturnDeltaPass, + "Early return of instructions") DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing Volatile Instructions") DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing Atomic Ordering") DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic Sync Scopes") diff --git a/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp b/llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp index 9ee0af3e1a69b..3e400ffc89482 100644 --- a/llvm/tools/llvm-reduce/de
[llvm-branch-commits] [llvm] [Attributor] Use `getAssumedAddrSpace` to get address space for `AllocaInst` (PR #136865)
https://github.com/shiltian closed https://github.com/llvm/llvm-project/pull/136865 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Change function return types if function is not called (PR #134035)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/134035 >From d65f9ca48d04cf36380bc43840527065195d9e4b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Apr 2025 11:45:24 +0700 Subject: [PATCH 1/4] llvm-reduce: Change function return types if function is not called Extend the early return on value reduction to mutate the function return type if the function has no call uses. This could be generalized to rewrite cases where all callsites are used, but it turns out that complicates the visitation order given we try to compute all opportunities up front. This is enough to cleanup the common case where we end up with one function with a return of an uninteresting constant. --- .../reduce-instructions-to-return.ll | 20 +++- ...reduce-values-to-return-new-return-type.ll | 95 +++ .../deltas/ReduceValuesToReturn.cpp | 7 +- 3 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll index 2af87aad05169..77501418f5283 100644 --- a/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll +++ b/llvm/test/tools/llvm-reduce/reduce-instructions-to-return.ll @@ -196,13 +196,25 @@ define i32 @callsite_already_new_return_type(ptr %arg) { ; INTERESTING: ret ; RESULT-LABEL: define ptr @non_void_no_op( -; RESULT: ret ptr null +; RESULT-NEXT: %load = load i32, ptr %arg +; RESULT-NEXT: store i32 %load, ptr @gv +; RESULT-NEXT: ret ptr null define ptr @non_void_no_op(ptr %arg) { %load = load i32, ptr %arg store i32 %load, ptr @gv ret ptr null } +; INTERESTING-LABEL: @non_void_no_op_caller( + +; RESULT-LABEL: define ptr @non_void_no_op_caller(ptr %arg) { +; RESULT-NEXT: %call = call ptr @non_void_no_op(ptr %arg) +; RESULT-NEXT: ret ptr %call +define ptr @non_void_no_op_caller(ptr %arg) { + %call = call ptr @non_void_no_op(ptr %arg) + ret ptr %call +} + ; INTERESTING-LABEL: @non_void_same_type_use( ; INTERESTING: = load ; INTERESTING: ret @@ -230,6 +242,12 @@ define i32 @non_void_bitcastable_type_use(ptr %arg) { ret i32 0 } +; INTERESTING-LABEL: @non_void_bitcastable_type_use_caller( +define i32 @non_void_bitcastable_type_use_caller(ptr %arg) { + %ret = call i32 @non_void_bitcastable_type_use(ptr %arg) + ret i32 %ret +} + ; INTERESTING-LABEL: @form_return_struct( ; INTERESTING: = load { i32, float } diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll new file mode 100644 index 0..9ddbbe3def44f --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-new-return-type.ll @@ -0,0 +1,95 @@ +; Test that llvm-reduce can move intermediate values by inserting +; early returns when the function already has a different return type +; +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + + +@gv = global i32 0, align 4 +@ptr_array = global [2 x ptr] [ptr @inst_to_return_has_different_type_but_no_func_call_use, + ptr @multiple_callsites_wrong_return_type] + +; Should rewrite this return from i64 to i32 since the function has no +; uses. +; INTERESTING-LABEL: @inst_to_return_has_different_type_but_no_func_call_use( +; RESULT-LABEL: define i32 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) { +; RESULT-NEXT: %load = load i32, ptr %arg, align 4 +; RESULT-NEXT: ret i32 %load +define i64 @inst_to_return_has_different_type_but_no_func_call_use(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i64 0 +} + +; INTERESTING-LABEL: @callsite_different_type_unused_0( +; RESULT-LABEL: define i64 @inst_to_return_has_different_type_but_call_result_unused( +; RESULT-NEXT: %load = load i32, ptr %arg +; RESULT-NEXT: store i32 %load, ptr @gv +; RESULT-NEXT: ret i64 0 +define void @callsite_different_type_unused_0(ptr %arg) { + %unused0 = call i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) + %unused1 = call i64 @inst_to_return_has_different_type_but_call_result_unused(ptr null) + ret void +} + +; TODO: Could rewrite this return from i64 to i32 since the callsite is unused. +; INTERESTING-LABEL: @inst_to_return_has_different_type_but_call_result_unused( +; RESULT-LABEL: define i64 @inst_to_return_has_different_type_but_call_result_unused( +; RESULT: ret i64 0 +define i64 @inst_to_return_has_different_type_but_call_result_unused(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i64 0 +} + +; INTERESTING-LABEL: @multiple_callsites_wrong_return_type( +; RESULT-LABEL: define i64 @multipl
[llvm-branch-commits] [llvm] llvm-reduce: Support exotic terminators in instructions-to-return (PR #134794)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/134794 >From 18d8c3083affbeb9d54ac5e558f427dcfd9da300 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 8 Apr 2025 11:16:01 +0700 Subject: [PATCH] llvm-reduce: Support exotic terminators in instructions-to-return Use splitBasicBlock and avoid directly dealing with the specific of how to trim the existing terminators. We just need to deal with unconditional branch to return. --- .../reduce-values-to-return-callbr.ll | 54 ++ .../reduce-values-to-return-invoke.ll | 56 +++ .../llvm-reduce/remove-bb-switch-default.ll | 6 +- .../deltas/ReduceValuesToReturn.cpp | 50 - llvm/tools/llvm-reduce/deltas/Utils.cpp | 2 +- 5 files changed, 126 insertions(+), 42 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll new file mode 100644 index 0..da2f225f0405b --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-callbr.ll @@ -0,0 +1,54 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + +@gv = global i32 0, align 4 + +; INTERESTING-LABEL: @callbr0( +; INTERESTING: %load0 = load i32, ptr %arg0 +; INTERESTING: store i32 %load0, ptr @gv + +; RESULT-LABEL: define void @callbr0(ptr %arg0) { +; RESULT: %load0 = load i32, ptr %arg0, align 4 +; RESULT-NEXT: %callbr = callbr i32 asm +define void @callbr0(ptr %arg0) { +entry: + %load0 = load i32, ptr %arg0 + %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0) + to label %one [label %two, label %three] +one: + store i32 %load0, ptr @gv + ret void + +two: + store i32 %load0, ptr @gv + ret void + +three: + store i32 %load0, ptr @gv + ret void +} + +; INTERESTING-LABEL: @callbr1( +; INTERESTING: %load0 = load i32, ptr %arg0 + +; RESULT-LABEL: define i32 @callbr1(ptr %arg0) { +; RESULT-NEXT: entry: +; RESULT-NEXT: %load0 = load i32, ptr %arg0 +; RESULT-NEXT: ret i32 %load0 +define void @callbr1(ptr %arg0) { +entry: + %load0 = load i32, ptr %arg0 + %callbr = callbr i32 asm "", "=r,r,!i,!i"(i32 %load0) + to label %one [label %two, label %three] +one: + store i32 %load0, ptr @gv + ret void + +two: + store i32 %load0, ptr @gv + ret void + +three: + store i32 %load0, ptr @gv + ret void +} diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll new file mode 100644 index 0..efa1e5377160e --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-invoke.ll @@ -0,0 +1,56 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=RESULT %s < %t + +@gv = global i32 0, align 4 + + +define i32 @has_invoke_user(ptr %arg) { + %load = load i32, ptr %arg + store i32 %load, ptr @gv + ret i32 9 +} + +declare i32 @__gxx_personality_v0(...) + +; INTERESTING-LABEL: @invoker_keep_invoke( +; INTERESTING: %invoke +; RESULT: %invoke = invoke i32 @has_invoke_user(ptr %arg) +define void @invoker_keep_invoke(ptr %arg) personality ptr @__gxx_personality_v0 { +bb: + %invoke = invoke i32 @has_invoke_user(ptr %arg) +to label %bb3 unwind label %bb1 + +bb1: + landingpad { ptr, i32 } + catch ptr null + ret void + +bb3: + store i32 %invoke, ptr null + ret void +} + +; INTERESTING-LABEL: @invoker_drop_invoke( +; INTERESTING: %add = add i32 + +; RESULT-LABEL: define i32 @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr @__gxx_personality_v0 { +; RESULT-NEXT: bb: +; RESULT-NEXT: %add = add i32 %arg0, 9 +; RESULT-NEXT: ret i32 %add +; RESULT-NEXT: } +define void @invoker_drop_invoke(i32 %arg0, ptr %arg1) personality ptr @__gxx_personality_v0 { +bb: + %add = add i32 %arg0, 9 + %invoke = invoke i32 @has_invoke_user(ptr %arg1) +to label %bb3 unwind label %bb1 + +bb1: + landingpad { ptr, i32 } + catch ptr null + br label %bb3 + +bb3: + %phi = phi i32 [ %invoke, %bb ], [ %add, %bb1 ] + store i32 %phi, ptr null + ret void +} diff --git a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll index b509d1181f74d..27e599e45e9a3 100644 --- a/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll +++ b/llvm/test/tools/llvm-reduce/remove-bb-switch-default.ll @@ -16,13 +16,14 @@ ; RESULT0-NEXT: br i1 %arg0, label %bb1, label %bb2 ; RESULT0: bb1: -; RESULT0: %bb1.phi =
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
arsenm wrote: ### Merge activity * **Apr 25, 11:24 AM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/137338). https://github.com/llvm/llvm-project/pull/137338 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ARM: Remove unnecessary use_empty check (PR #137338)
https://github.com/tgymnich approved this pull request. https://github.com/llvm/llvm-project/pull/137338 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)
https://github.com/thetruestblue edited https://github.com/llvm/llvm-project/pull/137353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++ 6 files changed, 69 insertions(+), 187 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c183149b0863a..6ae1d019cad28 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1840,7 +1840,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2278,7 +2278,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9da2ba04f77cb..545da0a1fbfab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12264,7 +12264,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12957,17 +12957,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->get
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 51488b849e778d9cf7e56491a289d2d908bbf727 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 108 ++ llvm/lib/Target/X86/X86InstrCompiler.td | 7 ++ llvm/test/CodeGen/X86/atomic-load-store.ll| 81 + llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 5 files changed, 177 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 05387afaf840d..6733d4c7d31e8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5982,6 +5985,89 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); +unsigned NumConcat = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +SmallVector ConcatOps(NumConcat); +SDValue UndefVal = DAG.getUNDEF(FirstVT); +ConcatOps[0] = LdOp; +for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; +return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); + + // Find the vector type that can load from. + st
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From bf3f6b0a6246e9e4890b320517b5f8333e638236 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG][X86] Split via Concat vector types for atomic load Vector types that aren't widened are 'split' via CONCAT_VECTORS so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 32 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 204 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..7905f5a94c579 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6733d4c7d31e8..9bbf583691106 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1423,6 +1426,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + SDLoc dl(LD); + + EVT MemoryVT = LD->getMemoryVT(); + unsigned NumElts = MemoryVT.getVectorMinNumElements(); + + EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); + EVT ElemVT = + EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1); + + // Create a single atomic to load all the elements at once. + SDValue Atomic = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT, +LD->getChain(), LD->getBasePtr(), LD->getMemOperand()); + + // Instead of splitting, put all the elements back into a vector. + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) { +SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic, + DAG.getVectorIdxConstant(i, dl)); +Elt = DAG.getBitcast(ElemVT, Elt); +Ops.push_back(Elt); + } + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops); + + ReplaceValueWith(SDValue(LD, 0), Concat); + ReplaceValueWith(SDValue(LD, 1), LD->getChain()); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHECK0-NEXT:## implicit-def: $ecx +; CHECK0-NEXT:movw %dx, %cx +; CHECK0-NEXT:## implicit-def: $xmm1 +; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1 +; CHECK0-NEXT:movw %ax, %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +;
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++ 6 files changed, 69 insertions(+), 187 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c183149b0863a..6ae1d019cad28 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1840,7 +1840,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2278,7 +2278,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9da2ba04f77cb..545da0a1fbfab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12264,7 +12264,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12957,17 +12957,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->get
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54da9fe3c6a40..05387afaf840d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -459,6 +462,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54da9fe3c6a40..05387afaf840d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -459,6 +462,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +- llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a3e9700fa3089..e84d25afe620d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + if (I->getType()->getScalarType()->isPointerTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +unsigned AS = + cast(I->getType()->getScalarType())->getAddressSpace(); +ElementCount EC = cast(I->getType())->getElementCount(); +Value *BC = Builder.CreateBitCast( +Result, +VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), +EC)); +Value *IntToPtr = Builder.CreateIntToPtr( +BC, VectorType::get(PointerType::get(Ctx, AS), EC)); +V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LAB
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 40f307cde75fdb15b97a38c89e5ee0d41dc09d8c Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +- llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a3e9700fa3089..e84d25afe620d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + if (I->getType()->getScalarType()->isPointerTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +unsigned AS = + cast(I->getType()->getScalarType())->getAddressSpace(); +ElementCount EC = cast(I->getType())->getElementCount(); +Value *BC = Builder.CreateBitCast( +Result, +VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), +EC)); +Value *IntToPtr = Builder.CreateIntToPtr( +BC, VectorType::get(PointerType::get(Ctx, AS), EC)); +V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LAB
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From bf3f6b0a6246e9e4890b320517b5f8333e638236 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG][X86] Split via Concat vector types for atomic load Vector types that aren't widened are 'split' via CONCAT_VECTORS so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 32 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 204 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..7905f5a94c579 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6733d4c7d31e8..9bbf583691106 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1173,6 +1173,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1423,6 +1426,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + SDLoc dl(LD); + + EVT MemoryVT = LD->getMemoryVT(); + unsigned NumElts = MemoryVT.getVectorMinNumElements(); + + EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); + EVT ElemVT = + EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1); + + // Create a single atomic to load all the elements at once. + SDValue Atomic = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT, +LD->getChain(), LD->getBasePtr(), LD->getMemOperand()); + + // Instead of splitting, put all the elements back into a vector. + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) { +SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Atomic, + DAG.getVectorIdxConstant(i, dl)); +Elt = DAG.getBitcast(ElemVT, Elt); +Ops.push_back(Elt); + } + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops); + + ReplaceValueWith(SDValue(LD, 0), Concat); + ReplaceValueWith(SDValue(LD, 1), LD->getChain()); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHECK0-NEXT:## implicit-def: $ecx +; CHECK0-NEXT:movw %dx, %cx +; CHECK0-NEXT:## implicit-def: $xmm1 +; CHECK0-NEXT:pinsrw $0, %ecx, %xmm1 +; CHECK0-NEXT:movw %ax, %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +;
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From e9820bfc104a419217ea5913e21a1f88a2f2c30d Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54da9fe3c6a40..05387afaf840d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -459,6 +462,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 2dbf42a7a7bcbff59e1f55d19d6bbcf0e59cc3f7 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 20 +- llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a3e9700fa3089..e84d25afe620d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + if (I->getType()->getScalarType()->isPointerTy() && + I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { +unsigned AS = + cast(I->getType()->getScalarType())->getAddressSpace(); +ElementCount EC = cast(I->getType())->getElementCount(); +Value *BC = Builder.CreateBitCast( +Result, +VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), +EC)); +Value *IntToPtr = Builder.CreateIntToPtr( +BC, VectorType::get(PointerType::get(Ctx, AS), EC)); +V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LAB
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 51488b849e778d9cf7e56491a289d2d908bbf727 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 108 ++ llvm/lib/Target/X86/X86InstrCompiler.td | 7 ++ llvm/test/CodeGen/X86/atomic-load-store.ll| 81 + llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 5 files changed, 177 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 05387afaf840d..6733d4c7d31e8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4592,6 +4592,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5982,6 +5985,89 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); +unsigned NumConcat = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +SmallVector ConcatOps(NumConcat); +SDValue UndefVal = DAG.getUNDEF(FirstVT); +ConcatOps[0] = LdOp; +for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; +return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + // Allow wider loads if they are sufficiently aligned to avoid memory faults + // and if the original load is simple. + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); + + // Find the vector type that can load from. + st
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: refactor issue reporting (PR #135662)
@@ -263,8 +242,75 @@ struct GenericReport : public Report { const BinaryContext &BC) const override; }; +/// An information about an issue collected on the slower, detailed, +/// run of an analysis. +class ExtraInfo { +public: + virtual void print(raw_ostream &OS, const MCInstReference Location) const = 0; + + virtual ~ExtraInfo() {} +}; + +class ClobberingInfo : public ExtraInfo { + SmallVector ClobberingInstrs; + +public: + ClobberingInfo(const ArrayRef Instrs) + : ClobberingInstrs(Instrs) {} + + void print(raw_ostream &OS, const MCInstReference Location) const override; +}; + +/// A brief version of a report that can be further augmented with the details. +/// +/// It is common for a particular type of gadget detector to be tied to some +/// specific kind of analysis. If an issue is returned by that detector, it may +/// be further augmented with the detailed info in an analysis-specific way, +/// or just be left as-is (f.e. if a free-form warning was reported). +template struct BriefReport { + BriefReport(std::shared_ptr Issue, + const std::optional RequestedDetails) + : Issue(Issue), RequestedDetails(RequestedDetails) {} atrosinenko wrote: > If I understand the code correctly, `Report`, `GadgetReport` and > `GenericReport` are similar, but `BriefReport` and `DetailedReport` really > are something somewhat different. Yes, these are two groups of classes with misleading common "report" suffix. I like the idea of replacing `Report` with `Diagnostic` in `Report`, `GadgetReport` and `GenericReport`, thanks! This way, the "report" suffix is left for the last two classes which are closer to the issues _reported_ to the user. I'm not sure `DiagnosticUnderConstruction` is better than `BriefReport`, but `DetailedReport` can definitely be renamed to `FinalReport` for clarity. > Assuming all of my guesses above are correct, I'm also not sure if there is a > need for a `DetailedReport` class? Couldn't `ExtraInfo` just be added to the > `BaseDiagnostic` or `GadgetDiagnostic` classes, which would eliminate the > need of one class, and as a result make the diagnostic reporting engine a > little bit less complex? After thinking a bit more, I think the updated structure still makes sense, added a detailed explanation as a comment. Thank you for pointing this out! https://github.com/llvm/llvm-project/pull/135662 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 40f307cde75fdb15b97a38c89e5ee0d41dc09d8c Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/120385 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -462,7 +563,22 @@ class DataflowSrcSafetyAnalysis return DFParent::getStateBefore(Inst); } - void run() override { DFParent::run(); } + void run() override { +for (BinaryBasicBlock &BB : Func) { + if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { +MCInst *LastInstOfChecker = BB.getLastNonPseudoInstr(); +LLVM_DEBUG({ + dbgs() << "Found pointer checking sequence in " << BB.getName() + << ":\n"; + traceReg(BC, "Checked register", CheckerInfo->first); + traceInst(BC, "First instruction", *CheckerInfo->second); + traceInst(BC, "Last instruction", *LastInstOfChecker); +}); +CheckerSequenceInfo[LastInstOfChecker] = *CheckerInfo; + } +} kbeyls wrote: Another nit-pick. This to me looks like it's initializing the `CheckerSequenceInfo` variable of the `SrcSafetyAnalysis` parent class. Wouldn't it be cleaner to do this initializing in the constructor for `SrcSafetyAnalysis`, rather than in this `run` method? https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -591,7 +591,9 @@ obscure_indirect_call_arg_nocfg: .globl safe_lr_at_function_entry_nocfg .type safe_lr_at_function_entry_nocfg,@function safe_lr_at_function_entry_nocfg: -// CHECK-NOT: safe_lr_at_function_entry_nocfg +// Due to state being reset after a label, paciasp is reported as +// a signing oracle - this is a known false positive, ignore it. +// CHECK-NOT: non-protected call{{.*}}safe_lr_at_function_entry_nocfg cbz x0, 1f ret// LR is safe at the start of the function 1: kbeyls wrote: [Re: lines +594 to +600] I'm wondering if this false positive pattern could end up appearing quite a few times in real code, specifically in code that has been shrink-wrap optimized? Did you run this scanner on a larger code base? How many and what kind of false positives did you see? See this comment inline on https://app.graphite.dev/github/pr/llvm/llvm-project/134146?utm_source=unchanged-line-comment";>Graphite. https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
@@ -355,6 +389,46 @@ class SrcSafetyAnalysis { return Regs; } + // Returns all registers made trusted by this instruction. + SmallVector getRegsMadeTrusted(const MCInst &Point, +const SrcState &Cur) const { +SmallVector Regs; +const MCPhysReg NoReg = BC.MIB->getNoRegister(); + +// An authenticated pointer can be checked, or +MCPhysReg CheckedReg = +BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false); +if (CheckedReg != NoReg && Cur.SafeToDerefRegs[CheckedReg]) + Regs.push_back(CheckedReg); + +if (CheckerSequenceInfo.contains(&Point)) { + MCPhysReg CheckedReg; + const MCInst *FirstCheckerInst; + std::tie(CheckedReg, FirstCheckerInst) = CheckerSequenceInfo.at(&Point); + + // FirstCheckerInst should belong to the same basic block, meaning + // it was deterministically processed a few steps before this instruction. + const SrcState &StateBeforeChecker = + getStateBefore(*FirstCheckerInst).get(); kbeyls wrote: This is a nitpick. I was trying to get my head around whether it's guaranteed to get to a fixed point in a dataflow analysis, where next to using just the previous state on the current instruction, also a state on another instruction is used as input to compute the next state for the current instruction. I'm assuming this is probably fine (probably, because as you write in the comment, this instruction should be in the same basic block). I thought I'd just ask if you ended up checking this somewhere else and, if so, you'd happen to have a pointer to something stating that you can also take state from another instruction in the same basic block, and still be guaranteed to reach a fixed point in a dataflow analysis? Orthogonal to that: would it be hard to add an assert here that `*FirstCheckerInst` is indeed in the same basic block? https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
https://github.com/kbeyls commented: I finally managed to read through this patch end-to-end. I only have 3 very nit-picky questions left. This looks almost ready to merge. https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect signing oracles (PR #134146)
https://github.com/kbeyls edited https://github.com/llvm/llvm-project/pull/134146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: use more appropriate types (NFC) (PR #135661)
https://github.com/kbeyls approved this pull request. https://github.com/llvm/llvm-project/pull/135661 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: use more appropriate types (NFC) (PR #135661)
https://github.com/kbeyls edited https://github.com/llvm/llvm-project/pull/135661 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/nikic commented: Looks like there are some polly assertion failures. https://github.com/llvm/llvm-project/pull/137314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/nikic edited https://github.com/llvm/llvm-project/pull/137314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove reference counts from ConstantData (PR #137314)
https://github.com/nikic commented: The general approach here makes sense to me. For reference, this is the diff for both PRs together, which is a bit clearer as the second undoes half of the first: https://github.com/llvm/llvm-project/compare/main...users/arsenm/ir/remove-constantdata-reference-counts https://github.com/llvm/llvm-project/pull/137314 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of text records (PR #137235)
https://github.com/uweigand commented: Why do we need all that "virtual section" stuff? Wouldn't it suffice to exit early from ::writeText if the section length is zero? https://github.com/llvm/llvm-project/pull/137235 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of text records (PR #137235)
uweigand wrote: Also, the new tests seem to be failing in CI. https://github.com/llvm/llvm-project/pull/137235 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)
https://github.com/cjappl approved this pull request. https://github.com/llvm/llvm-project/pull/137353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock (PR #137353)
https://github.com/thetruestblue created https://github.com/llvm/llvm-project/pull/137353 This cherry pick updates the OS Spin Lock logic to fix to intercept both OS Spin Lock and os_nospin_lock which needs to be intercepted due to macro replacement of OSSpinLock within atomic_load compiler_rt call. This is causing a test to fail in release bot: https://green.lab.llvm.org/job/llvm.org/job/clang-stage1-RA-release-branch/205/testReport/ rdar://145488759 >From 2453cd24fd5fb9778b55155f77bf9d05a49539f9 Mon Sep 17 00:00:00 2001 From: davidtrevelyan Date: Thu, 13 Mar 2025 10:18:25 + Subject: [PATCH 1/2] [rtsan][Apple] Add interceptor for _os_nospin_lock_lock (#131034) Follows the discussion here: https://github.com/llvm/llvm-project/pull/129309 Recently, the test `TestRtsan.AccessingALargeAtomicVariableDiesWhenRealtime` has been failing on newer MacOS versions, because the internal locking mechanism in `std::atomic::load` (for types `T` that are larger than the hardware lock-free limit), has changed to a function that wasn't being intercepted by rtsan. This PR introduces an interceptor for `_os_nospin_lock_lock`, which is the new internal locking mechanism. _Note: we'd probably do well to introduce interceptors for `_os_nospin_lock_unlock` (and `os_unfair_lock_unlock`) too, which also appear to have blocking implementations. This can follow in a separate PR._ (cherry picked from commit 481a55a3d9645a6bc1540d326319b78ad8ed8db1) --- .../lib/rtsan/rtsan_interceptors_posix.cpp| 11 +++ .../tests/rtsan_test_interceptors_posix.cpp | 19 +++ 2 files changed, 30 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 6816119065263..4d602a88ba9ae 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -30,6 +30,12 @@ extern "C" { typedef int32_t OSSpinLock; void OSSpinLockLock(volatile OSSpinLock *__lock); +// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but +// it's an internal implementation detail of `os/lock.c` on Darwin, and +// therefore not available in any headers. As a workaround, we forward declare +// it here, which is enough to facilitate interception of _os_nospin_lock_lock. +struct _os_nospin_lock_s; +using _os_nospin_lock_t = _os_nospin_lock_s *; } #endif // TARGET_OS_MAC @@ -642,6 +648,11 @@ INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { __rtsan_notify_intercepted_call("os_unfair_lock_lock"); return REAL(os_unfair_lock_lock)(lock); } + +INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) { + __rtsan_notify_intercepted_call("_os_nospin_lock_lock"); + return REAL(_os_nospin_lock_lock)(lock); +} #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK \ INTERCEPT_FUNCTION(os_unfair_lock_lock) #else diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 59663776366bb..75f723081c4b6 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1058,6 +1058,25 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "os_unfair_lock_lock"); ExpectNonRealtimeSurvival(Func); } + +// We intercept _os_nospin_lock_lock because it's the internal +// locking mechanism for MacOS's atomic implementation for data +// types that are larger than the hardware's maximum lock-free size. +// However, it's a private implementation detail and not visible in any headers, +// so we must duplicate the required type definitions to forward declaration +// what we need here. +extern "C" { +struct _os_nospin_lock_s { + unsigned int oul_value; +}; +void _os_nospin_lock_lock(_os_nospin_lock_s *); +} +TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { + _os_nospin_lock_s lock{}; + auto Func = [&]() { _os_nospin_lock_lock(&lock); }; + ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); + ExpectNonRealtimeSurvival(Func); +} #endif #if SANITIZER_LINUX >From e2a5aa2526b06497869514287d26ea4732a14b95 Mon Sep 17 00:00:00 2001 From: thetruestblue Date: Fri, 18 Apr 2025 11:25:31 -0700 Subject: [PATCH 2/2] [RTSan][Darwin] Adjust OSSpinLock/_os_nospin_lock interceptor and tests (#132867) These changes align with these lock types and allows builds and tests to pass with various SDKS. rdar://147067322 (cherry picked from commit 7cc4472037b43971bd3ee373fe75b5043f5abca9) --- .../lib/rtsan/rtsan_interceptors_posix.cpp| 37 +++-- .../tests/rtsan_test_interceptors_posix.cpp | 40 +-- 2 files changed, 32 insertions(+), 45 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 4d602a88ba9ae..040f501ee52
[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock to release/20.x (PR #137353)
https://github.com/thetruestblue edited https://github.com/llvm/llvm-project/pull/137353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [RTSan] Cherry pick rtsan osspinlock fix to release/20.x (PR #137353)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (thetruestblue) Changes This cherry pick updates the OS Spin Lock logic to fix to intercept both OS Spin Lock and os_nospin_lock which needs to be intercepted due to macro replacement of OSSpinLock within atomic_load compiler_rt call. This is causing a test to fail in release bot: https://green.lab.llvm.org/job/llvm.org/job/clang-stage1-RA-release-branch/205/testReport/ rdar://145488759 --- Full diff: https://github.com/llvm/llvm-project/pull/137353.diff 2 Files Affected: - (modified) compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp (+15-13) - (modified) compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp (+18-3) ``diff diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 6816119065263..040f501ee52e9 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -21,18 +21,6 @@ #include "rtsan/rtsan.h" #if SANITIZER_APPLE - -#if TARGET_OS_MAC -// On MacOS OSSpinLockLock is deprecated and no longer present in the headers, -// but the symbol still exists on the system. Forward declare here so we -// don't get compilation errors. -#include -extern "C" { -typedef int32_t OSSpinLock; -void OSSpinLockLock(volatile OSSpinLock *__lock); -} -#endif // TARGET_OS_MAC - #include #include #endif // SANITIZER_APPLE @@ -627,21 +615,35 @@ INTERCEPTOR(mode_t, umask, mode_t cmask) { #pragma clang diagnostic push // OSSpinLockLock is deprecated, but still in use in libc++ #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#undef OSSpinLockLock + INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) { __rtsan_notify_intercepted_call("OSSpinLockLock"); return REAL(OSSpinLockLock)(lock); } -#pragma clang diagnostic pop + #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK INTERCEPT_FUNCTION(OSSpinLockLock) #else #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK #endif // SANITIZER_APPLE +#if SANITIZER_APPLE +// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro. +typedef volatile OSSpinLock *_os_nospin_lock_t; + +INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) { + __rtsan_notify_intercepted_call("_os_nospin_lock_lock"); + return REAL(_os_nospin_lock_lock)(lock); +} +#pragma clang diagnostic pop // "-Wdeprecated-declarations" +#endif // SANITIZER_APPLE + #if SANITIZER_APPLE INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { __rtsan_notify_intercepted_call("os_unfair_lock_lock"); return REAL(os_unfair_lock_lock)(lock); } + #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK \ INTERCEPT_FUNCTION(os_unfair_lock_lock) #else diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 59663776366bb..7eda884951c83 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1036,10 +1036,18 @@ TEST(TestRtsanInterceptors, PthreadJoinDiesWhenRealtime) { } #if SANITIZER_APPLE - #pragma clang diagnostic push // OSSpinLockLock is deprecated, but still in use in libc++ #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#undef OSSpinLockLock +extern "C" { +typedef int32_t OSSpinLock; +void OSSpinLockLock(volatile OSSpinLock *__lock); +// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro. +typedef volatile OSSpinLock *_os_nospin_lock_t; +void _os_nospin_lock_lock(_os_nospin_lock_t lock); +} + TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) { auto Func = []() { OSSpinLock spin_lock{}; @@ -1048,7 +1056,14 @@ TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "OSSpinLockLock"); ExpectNonRealtimeSurvival(Func); } -#pragma clang diagnostic pop + +TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { + OSSpinLock lock{}; + auto Func = [&]() { _os_nospin_lock_lock(&lock); }; + ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); + ExpectNonRealtimeSurvival(Func); +} +#pragma clang diagnostic pop //"-Wdeprecated-declarations" TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { auto Func = []() { @@ -1058,7 +1073,7 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "os_unfair_lock_lock"); ExpectNonRealtimeSurvival(Func); } -#endif +#endif // SANITIZER_APPLE #if SANITIZER_LINUX TEST(TestRtsanInterceptors, SpinLockLockDiesWhenRealtime) { `` https://github.com/llvm/llvm-project/pull/137353 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] Refactoring MCDXBC to support out of order storage of root parameters (PR #137284)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/137284 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Bundle operands to specify denormal modes (PR #136501)
https://github.com/spavloff updated https://github.com/llvm/llvm-project/pull/136501 >From 22742e24c1eef3ecc0fb4294dac9f42c9d160019 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Thu, 17 Apr 2025 18:42:15 +0700 Subject: [PATCH 1/3] Bundle operands to specify denormal modes Two new operands are now supported in the "fp.control" operand bundle: * "denorm.in=xxx" - specifies the inpot denormal mode. * "denorm.out=xxx" - specifies the output denormal mode. Here xxx must be one of the following values: * "ieee" - preserve denormals. * "zero" - flush to zero preserving sign. * "pzero" - flush to positive zero. * "dyn" - mode is dynamically read from a control register. These values align those permitted in the "denormal-fp-math" function attribute. --- llvm/docs/LangRef.rst | 18 +- llvm/include/llvm/ADT/FloatingPointMode.h | 33 llvm/include/llvm/IR/InstrTypes.h | 21 +++ llvm/lib/Analysis/ConstantFolding.cpp | 24 ++- llvm/lib/IR/Instructions.cpp | 168 +- llvm/lib/IR/Verifier.cpp | 14 ++ .../constant-fold-fp-denormal-strict.ll | 91 ++ llvm/test/Verifier/fp-intrinsics.ll | 36 8 files changed, 394 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal-strict.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8252971aa8a58..954f0e96b46f6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3084,7 +3084,10 @@ floating-point control modes and the treatment of status bits respectively. An operand bundle tagged with "fp.control" contains information about the control modes used for the operation execution. Operands specified in this -bundle represent particular options. Currently, only rounding mode is supported. +bundle represent particular options. The following modes are supported: + +* rounding mode, +* denormal behavior. Rounding mode is represented by a metadata string value, which specifies the the mode used for the operation evaluation. Possible values are: @@ -3103,6 +3106,19 @@ rounding rounding mode is taken from the control register (dynamic rounding). In the particular case of :ref:`default floating-point environment `, the operation uses rounding to nearest, ties to even. +Denormal behavior defines whether denormal values are flushed to zero during +the call's execution. This behavior is specified separately for input and +output values. Such specification is a string, which starts with +"denorm.in=" or "denorm.out=" respectively. The remainder of the string should +be one of the values: + +:: + +``"ieee"`` - preserve denormals, +``"zero"`` - flush to +0.0 or -0.0 depending on value sign, +``"pzero"`` - flush to +0.0, +``"dyn"`` - concrete mode is read from some register. + An operand bundle tagged with "fp.except" may be associated with operations that can read or write floating-point exception flags. It contains a single metadata string value, which can have one of the following values: diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h index 639d931ef88fe..5fceccfd1d0bf 100644 --- a/llvm/include/llvm/ADT/FloatingPointMode.h +++ b/llvm/include/llvm/ADT/FloatingPointMode.h @@ -234,6 +234,39 @@ void DenormalMode::print(raw_ostream &OS) const { OS << denormalModeKindName(Output) << ',' << denormalModeKindName(Input); } +/// If the specified string represents denormal mode as used in operand bundles, +/// returns the corresponding mode. +inline std::optional +parseDenormalKindFromOperandBundle(StringRef Str) { + if (Str == "ieee") +return DenormalMode::IEEE; + if (Str == "zero") +return DenormalMode::PreserveSign; + if (Str == "pzero") +return DenormalMode::PositiveZero; + if (Str == "dyn") +return DenormalMode::Dynamic; + return std::nullopt; +} + +/// Converts the specified denormal mode into string suitable for use in an +/// operand bundle. +inline std::optional +printDenormalForOperandBundle(DenormalMode::DenormalModeKind Mode) { + switch (Mode) { + case DenormalMode::IEEE: +return "ieee"; + case DenormalMode::PreserveSign: +return "zero"; + case DenormalMode::PositiveZero: +return "pzero"; + case DenormalMode::Dynamic: +return "dyn"; + default: +return std::nullopt; + } +} + /// Floating-point class tests, supported by 'is_fpclass' intrinsic. Actual /// test may be an OR combination of basic tests. enum FPClassTest : unsigned { diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 8425243e5efe9..8492c911ffc6a 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1092,12 +1092,24 @@ template class OperandBundleDefT { using OperandBundleDef = OperandBundleDefT; using ConstOperandBundleDef = OperandBundleDefT; +s
[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)
https://github.com/inbelic created https://github.com/llvm/llvm-project/pull/137381 Noting: - Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would naturally seem an appropriate place to place. - Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't introduce a dependency of `clangSema` onto `clangParse`. - Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing from source and `clangFrontend` when we are parsing as a command line argument. - Therefore, we are required to move this out of `clangParse` so that `clangSema` can reference it. This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be linked to all its dependencies (`clangFrontend` already depends on `clangSema`) >From 92d1d83b3fbb43523819b5be338922ab614d2054 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 25 Apr 2025 18:21:36 + Subject: [PATCH] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema Noting: - Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would naturally seem an appropriate place to place. - Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't introduce a dependency of `clangSema` onto `clangParse`. - Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing from source and `clangFrontend` when we are parsing as a command line argument. - Therefore, we are required to move this out of `clangParse` so that `clangSema` can reference it. This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be linked to all its dependencies (`clangFrontend` already depends on `clangSema`) --- .../{Parse => Sema}/ParseHLSLRootSignature.h | 0 clang/lib/Parse/CMakeLists.txt| 1 - clang/lib/Sema/CMakeLists.txt | 1 + .../ParseHLSLRootSignature.cpp| 2 +- clang/unittests/CMakeLists.txt| 1 - clang/unittests/Parse/CMakeLists.txt | 20 --- clang/unittests/Sema/CMakeLists.txt | 2 ++ .../ParseHLSLRootSignatureTest.cpp| 2 +- 8 files changed, 5 insertions(+), 24 deletions(-) rename clang/include/clang/{Parse => Sema}/ParseHLSLRootSignature.h (100%) rename clang/lib/{Parse => Sema}/ParseHLSLRootSignature.cpp (99%) delete mode 100644 clang/unittests/Parse/CMakeLists.txt rename clang/unittests/{Parse => Sema}/ParseHLSLRootSignatureTest.cpp (99%) diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Sema/ParseHLSLRootSignature.h similarity index 100% rename from clang/include/clang/Parse/ParseHLSLRootSignature.h rename to clang/include/clang/Sema/ParseHLSLRootSignature.h diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt index 00fde537bb9c6..22e902f7e1bc5 100644 --- a/clang/lib/Parse/CMakeLists.txt +++ b/clang/lib/Parse/CMakeLists.txt @@ -14,7 +14,6 @@ add_clang_library(clangParse ParseExpr.cpp ParseExprCXX.cpp ParseHLSL.cpp - ParseHLSLRootSignature.cpp ParseInit.cpp ParseObjc.cpp ParseOpenMP.cpp diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 4b87004e4b8ea..9ca4b8e6ab96b 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -26,6 +26,7 @@ add_clang_library(clangSema JumpDiagnostics.cpp MultiplexExternalSemaSource.cpp ParsedAttr.cpp + ParseHLSLRootSignature.cpp Scope.cpp ScopeInfo.cpp Sema.cpp diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Sema/ParseHLSLRootSignature.cpp similarity index 99% rename from clang/lib/Parse/ParseHLSLRootSignature.cpp rename to clang/lib/Sema/ParseHLSLRootSignature.cpp index 042aedbf1af52..87b5022cb0abe 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp @@ -6,7 +6,7 @@ // //===--===// -#include "clang/Parse/ParseHLSLRootSignature.h" +#include "clang/Sema/ParseHLSLRootSignature.h" #include "clang/Lex/LiteralSupport.h" diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index f3823ba309420..580533a97d700 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -49,7 +49,6 @@ endfunction() add_subdirectory(Basic) add_subdirectory(Lex) -add_subdirectory(Parse) add_subdirectory(Driver) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(Analysis) diff --git a/clang/unittests/Parse/CMakeLists.txt b/clang/unittests/Parse/CMakeLists.txt deleted file mode 100644 index 2a31be625042e..0 --- a/clang/unittests/Parse/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Support - ) -add_clang_unittest(ParseTests - ParseHLSLRootSignatureTest.cpp - ) -clang_target_link_libraries(ParseTests - PRIVATE - clangAST - clangBasic - clangLex - clangParse - clangSema - ) -target_link_libraries(ParseTests - PRIVATE - LLVMTest
[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into `clangSema` (PR #137381)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/137381 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Finn Plummer (inbelic) Changes Noting: - Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would naturally seem an appropriate place to place. - Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't introduce a dependency of `clangSema` onto `clangParse`. - Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing from source and `clangFrontend` when we are parsing as a command line argument. - Therefore, we are required to move this out of `clangParse` so that `clangSema` can reference it. This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be linked to all its dependencies (`clangFrontend` already depends on `clangSema`) --- Full diff: https://github.com/llvm/llvm-project/pull/137381.diff 8 Files Affected: - (renamed) clang/include/clang/Sema/ParseHLSLRootSignature.h () - (modified) clang/lib/Parse/CMakeLists.txt (-1) - (modified) clang/lib/Sema/CMakeLists.txt (+1) - (renamed) clang/lib/Sema/ParseHLSLRootSignature.cpp (+1-1) - (modified) clang/unittests/CMakeLists.txt (-1) - (removed) clang/unittests/Parse/CMakeLists.txt (-20) - (modified) clang/unittests/Sema/CMakeLists.txt (+2) - (renamed) clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp (+1-1) ``diff diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Sema/ParseHLSLRootSignature.h similarity index 100% rename from clang/include/clang/Parse/ParseHLSLRootSignature.h rename to clang/include/clang/Sema/ParseHLSLRootSignature.h diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt index 00fde537bb9c6..22e902f7e1bc5 100644 --- a/clang/lib/Parse/CMakeLists.txt +++ b/clang/lib/Parse/CMakeLists.txt @@ -14,7 +14,6 @@ add_clang_library(clangParse ParseExpr.cpp ParseExprCXX.cpp ParseHLSL.cpp - ParseHLSLRootSignature.cpp ParseInit.cpp ParseObjc.cpp ParseOpenMP.cpp diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 4b87004e4b8ea..9ca4b8e6ab96b 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -26,6 +26,7 @@ add_clang_library(clangSema JumpDiagnostics.cpp MultiplexExternalSemaSource.cpp ParsedAttr.cpp + ParseHLSLRootSignature.cpp Scope.cpp ScopeInfo.cpp Sema.cpp diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Sema/ParseHLSLRootSignature.cpp similarity index 99% rename from clang/lib/Parse/ParseHLSLRootSignature.cpp rename to clang/lib/Sema/ParseHLSLRootSignature.cpp index 042aedbf1af52..87b5022cb0abe 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp @@ -6,7 +6,7 @@ // //===--===// -#include "clang/Parse/ParseHLSLRootSignature.h" +#include "clang/Sema/ParseHLSLRootSignature.h" #include "clang/Lex/LiteralSupport.h" diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index f3823ba309420..580533a97d700 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -49,7 +49,6 @@ endfunction() add_subdirectory(Basic) add_subdirectory(Lex) -add_subdirectory(Parse) add_subdirectory(Driver) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(Analysis) diff --git a/clang/unittests/Parse/CMakeLists.txt b/clang/unittests/Parse/CMakeLists.txt deleted file mode 100644 index 2a31be625042e..0 --- a/clang/unittests/Parse/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Support - ) -add_clang_unittest(ParseTests - ParseHLSLRootSignatureTest.cpp - ) -clang_target_link_libraries(ParseTests - PRIVATE - clangAST - clangBasic - clangLex - clangParse - clangSema - ) -target_link_libraries(ParseTests - PRIVATE - LLVMTestingAnnotations - LLVMTestingSupport - clangTesting - ) diff --git a/clang/unittests/Sema/CMakeLists.txt b/clang/unittests/Sema/CMakeLists.txt index acc76c932afeb..a5c6b44926460 100644 --- a/clang/unittests/Sema/CMakeLists.txt +++ b/clang/unittests/Sema/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_unittest(SemaTests CodeCompleteTest.cpp HeuristicResolverTest.cpp GslOwnerPointerInference.cpp + ParseHLSLRootSignatureTest.cpp SemaLookupTest.cpp SemaNoloadLookupTest.cpp CLANG_LIBS @@ -10,6 +11,7 @@ add_clang_unittest(SemaTests clangASTMatchers clangBasic clangFrontend + clangLex clangParse clangSema clangSerialization diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp similarity index 99% rename from clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp rename to clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp index 2d4e37463bef3..76c437689c4d3 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp @@ -21,7 +21,7 @@ #include "c
[llvm-branch-commits] [clang] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into clangSema (PR #137381)
llvmbot wrote: @llvm/pr-subscribers-hlsl Author: Finn Plummer (inbelic) Changes Noting: - Currently, `HLSLRootSignatureParser` is defined in `clangParse`, as it would naturally seem an appropriate place to place. - Surprisingly, `clangParse` has a dependency on `clangSema`. So we can't introduce a dependency of `clangSema` onto `clangParse`. - Given the users of `HLSLRootSignatureParser` will be `SemaHLSL` when parsing from source and `clangFrontend` when we are parsing as a command line argument. - Therefore, we are required to move this out of `clangParse` so that `clangSema` can reference it. This commit moves `HLSLRootSignatureParser` into `clangSema` so it can be linked to all its dependencies (`clangFrontend` already depends on `clangSema`) --- Full diff: https://github.com/llvm/llvm-project/pull/137381.diff 8 Files Affected: - (renamed) clang/include/clang/Sema/ParseHLSLRootSignature.h () - (modified) clang/lib/Parse/CMakeLists.txt (-1) - (modified) clang/lib/Sema/CMakeLists.txt (+1) - (renamed) clang/lib/Sema/ParseHLSLRootSignature.cpp (+1-1) - (modified) clang/unittests/CMakeLists.txt (-1) - (removed) clang/unittests/Parse/CMakeLists.txt (-20) - (modified) clang/unittests/Sema/CMakeLists.txt (+2) - (renamed) clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp (+1-1) ``diff diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Sema/ParseHLSLRootSignature.h similarity index 100% rename from clang/include/clang/Parse/ParseHLSLRootSignature.h rename to clang/include/clang/Sema/ParseHLSLRootSignature.h diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt index 00fde537bb9c6..22e902f7e1bc5 100644 --- a/clang/lib/Parse/CMakeLists.txt +++ b/clang/lib/Parse/CMakeLists.txt @@ -14,7 +14,6 @@ add_clang_library(clangParse ParseExpr.cpp ParseExprCXX.cpp ParseHLSL.cpp - ParseHLSLRootSignature.cpp ParseInit.cpp ParseObjc.cpp ParseOpenMP.cpp diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 4b87004e4b8ea..9ca4b8e6ab96b 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -26,6 +26,7 @@ add_clang_library(clangSema JumpDiagnostics.cpp MultiplexExternalSemaSource.cpp ParsedAttr.cpp + ParseHLSLRootSignature.cpp Scope.cpp ScopeInfo.cpp Sema.cpp diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Sema/ParseHLSLRootSignature.cpp similarity index 99% rename from clang/lib/Parse/ParseHLSLRootSignature.cpp rename to clang/lib/Sema/ParseHLSLRootSignature.cpp index 042aedbf1af52..87b5022cb0abe 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Sema/ParseHLSLRootSignature.cpp @@ -6,7 +6,7 @@ // //===--===// -#include "clang/Parse/ParseHLSLRootSignature.h" +#include "clang/Sema/ParseHLSLRootSignature.h" #include "clang/Lex/LiteralSupport.h" diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index f3823ba309420..580533a97d700 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -49,7 +49,6 @@ endfunction() add_subdirectory(Basic) add_subdirectory(Lex) -add_subdirectory(Parse) add_subdirectory(Driver) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(Analysis) diff --git a/clang/unittests/Parse/CMakeLists.txt b/clang/unittests/Parse/CMakeLists.txt deleted file mode 100644 index 2a31be625042e..0 --- a/clang/unittests/Parse/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Support - ) -add_clang_unittest(ParseTests - ParseHLSLRootSignatureTest.cpp - ) -clang_target_link_libraries(ParseTests - PRIVATE - clangAST - clangBasic - clangLex - clangParse - clangSema - ) -target_link_libraries(ParseTests - PRIVATE - LLVMTestingAnnotations - LLVMTestingSupport - clangTesting - ) diff --git a/clang/unittests/Sema/CMakeLists.txt b/clang/unittests/Sema/CMakeLists.txt index acc76c932afeb..a5c6b44926460 100644 --- a/clang/unittests/Sema/CMakeLists.txt +++ b/clang/unittests/Sema/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_unittest(SemaTests CodeCompleteTest.cpp HeuristicResolverTest.cpp GslOwnerPointerInference.cpp + ParseHLSLRootSignatureTest.cpp SemaLookupTest.cpp SemaNoloadLookupTest.cpp CLANG_LIBS @@ -10,6 +11,7 @@ add_clang_unittest(SemaTests clangASTMatchers clangBasic clangFrontend + clangLex clangParse clangSema clangSerialization diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp similarity index 99% rename from clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp rename to clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp index 2d4e37463bef3..76c437689c4d3 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Sema/ParseHLSLRootSignatureTest.cpp @@ -21,7 +21,7 @@ #include "cl
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add parsing of remaining Descriptor Table params (PR #137038)
https://github.com/bogner approved this pull request. https://github.com/llvm/llvm-project/pull/137038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][HLSL][RootSignature] Move `HLSLRootSignatureParser` into `clangSema` (PR #137381)
https://github.com/inbelic closed https://github.com/llvm/llvm-project/pull/137381 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From ed8d4f8750143defbbc331379cdb4c1c85749d0a Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 167 ++ 6 files changed, 69 insertions(+), 187 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c183149b0863a..6ae1d019cad28 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1840,7 +1840,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2278,7 +2278,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9da2ba04f77cb..545da0a1fbfab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12264,7 +12264,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12957,17 +12957,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->get
[llvm-branch-commits] [llvm] release/20.x: [LoongArch] Don't crash on instruction prefetch intrinsics (#135760) (PR #135923)
github-actions[bot] wrote: @leecheechen (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/135923 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AutoUpgrade][AMDGPU] Adjust AS7 address width to 48 bits (PR #137418)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff c91038c28ff91461b8b8e9e56a56e771ce39fb7a bafdd8b6606c79e63a25e8c80f28abd761a32f85 --extensions c,cpp -- clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGen/target-data.c llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 6ce66e6465..4f5b48547d 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,7 +33,8 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" +"-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" +"v32:" "32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" "-ni:7:8:9"; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b16a9ef0cd..63c76b3be3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -677,7 +677,8 @@ static StringRef computeDataLayout(const Triple &TT) { // space 8) which cannot be non-trivilally accessed by LLVM memory operations // like getelementptr. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" + "-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:" + "32-" "v32:32-v48:64-v96:" "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" "G1-ni:7:8:9"; diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index a50e523379..cbe82e3f48 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -41,12 +41,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), +"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:" +"256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), +"e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:" +"256:32"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -60,7 +60,8 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "amdgcn"), "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" - "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-" + "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32:48-p8:128:" + "128-" "p9:192:256:256:32"); // Check that RISCV64 upgrades -n64 to -n32:64. @@ -144,15 +145,15 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), +"e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:" +"256:32"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), +"G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32:48-p8:128:128-p9:192:256:" +"256:32"); + EXPECT_EQ(Up
[llvm-branch-commits] [clang] 62072e7 - [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc()
Author: Nathan Ridge Date: 2025-04-25T16:29:08-07:00 New Revision: 62072e7f877e444f386ea78dce3581904bdb727d URL: https://github.com/llvm/llvm-project/commit/62072e7f877e444f386ea78dce3581904bdb727d DIFF: https://github.com/llvm/llvm-project/commit/62072e7f877e444f386ea78dce3581904bdb727d.diff LOG: [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc() Added: Modified: clang/lib/AST/Expr.cpp clang/test/SemaCXX/cxx2b-deducing-this.cpp Removed: diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 8571b617c70eb..a5b7ef8c4271b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1665,8 +1665,11 @@ SourceLocation CallExpr::getBeginLoc() const { Method && Method->isExplicitObjectMemberFunction()) { bool HasFirstArg = getNumArgs() > 0 && getArg(0); assert(HasFirstArg); - if (HasFirstArg) -return getArg(0)->getBeginLoc(); + if (HasFirstArg) { +if (auto FirstArgLoc = getArg(0)->getBeginLoc(); FirstArgLoc.isValid()) { + return FirstArgLoc; +} + } } } diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 6f17ce7275456..7e392213710a4 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -1134,3 +1134,10 @@ struct S { static_assert((S{} << 11) == a); // expected-error@-1 {{use of undeclared identifier 'a'}} } + +namespace GH135522 { +struct S { + auto f(this auto) -> S; + bool g() { return f(); } // expected-error {{no viable conversion from returned value of type 'S' to function return type 'bool'}} +}; +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc() (PR #135927)
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/135927 >From 62072e7f877e444f386ea78dce3581904bdb727d Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Tue, 15 Apr 2025 03:40:37 -0400 Subject: [PATCH] [clang][AST] Handle implicit first argument in CallExpr::getBeginLoc() --- clang/lib/AST/Expr.cpp | 7 +-- clang/test/SemaCXX/cxx2b-deducing-this.cpp | 7 +++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 8571b617c70eb..a5b7ef8c4271b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1665,8 +1665,11 @@ SourceLocation CallExpr::getBeginLoc() const { Method && Method->isExplicitObjectMemberFunction()) { bool HasFirstArg = getNumArgs() > 0 && getArg(0); assert(HasFirstArg); - if (HasFirstArg) -return getArg(0)->getBeginLoc(); + if (HasFirstArg) { +if (auto FirstArgLoc = getArg(0)->getBeginLoc(); FirstArgLoc.isValid()) { + return FirstArgLoc; +} + } } } diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 6f17ce7275456..7e392213710a4 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -1134,3 +1134,10 @@ struct S { static_assert((S{} << 11) == a); // expected-error@-1 {{use of undeclared identifier 'a'}} } + +namespace GH135522 { +struct S { + auto f(this auto) -> S; + bool g() { return f(); } // expected-error {{no viable conversion from returned value of type 'S' to function return type 'bool'}} +}; +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Handle scalarized reductions in getArithmeticReductionCost (PR #136688)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/136688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/136808 >From 182e8b7f8a710f5a08bd329d1ab299ad1709cafb Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 22 Apr 2025 21:08:56 -0700 Subject: [PATCH] [clang-format] Correctly annotate kw_operator in using decls (#136545) Fix #136541 (cherry picked from commit 037657de7e5ccd4a37054829874a209b82fb8be7) --- clang/lib/Format/TokenAnnotator.cpp | 6 -- clang/unittests/Format/TokenAnnotatorTest.cpp | 5 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 44580d8624684..11b941c5a0411 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3961,8 +3961,10 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { FormatToken *AfterLastAttribute = nullptr; FormatToken *ClosingParen = nullptr; - for (auto *Tok = FirstNonComment ? FirstNonComment->Next : nullptr; Tok; - Tok = Tok->Next) { + for (auto *Tok = FirstNonComment && FirstNonComment->isNot(tok::kw_using) + ? FirstNonComment->Next + : nullptr; + Tok; Tok = Tok->Next) { if (Tok->is(TT_StartOfName)) SeenName = true; if (Tok->Previous->EndsCppAttributeGroup) diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b7b8a21b726b6..757db66c3e298 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1073,6 +1073,11 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) { ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen); + + Tokens = annotate("using std::operator==;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + // Not TT_FunctionDeclarationName. + EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown); } TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 182e8b7 - [clang-format] Correctly annotate kw_operator in using decls (#136545)
Author: Owen Pan Date: 2025-04-25T16:49:42-07:00 New Revision: 182e8b7f8a710f5a08bd329d1ab299ad1709cafb URL: https://github.com/llvm/llvm-project/commit/182e8b7f8a710f5a08bd329d1ab299ad1709cafb DIFF: https://github.com/llvm/llvm-project/commit/182e8b7f8a710f5a08bd329d1ab299ad1709cafb.diff LOG: [clang-format] Correctly annotate kw_operator in using decls (#136545) Fix #136541 (cherry picked from commit 037657de7e5ccd4a37054829874a209b82fb8be7) Added: Modified: clang/lib/Format/TokenAnnotator.cpp clang/unittests/Format/TokenAnnotatorTest.cpp Removed: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 44580d8624684..11b941c5a0411 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3961,8 +3961,10 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { FormatToken *AfterLastAttribute = nullptr; FormatToken *ClosingParen = nullptr; - for (auto *Tok = FirstNonComment ? FirstNonComment->Next : nullptr; Tok; - Tok = Tok->Next) { + for (auto *Tok = FirstNonComment && FirstNonComment->isNot(tok::kw_using) + ? FirstNonComment->Next + : nullptr; + Tok; Tok = Tok->Next) { if (Tok->is(TT_StartOfName)) SeenName = true; if (Tok->Previous->EndsCppAttributeGroup) diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b7b8a21b726b6..757db66c3e298 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1073,6 +1073,11 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) { ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen); + + Tokens = annotate("using std::operator==;"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + // Not TT_FunctionDeclarationName. + EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_Unknown); } TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] [test] Extend mingw workarounds for armv7/aarch64 too (#136419) (PR #136752)
tstellar wrote: Are these legitimate test failures? https://github.com/llvm/llvm-project/pull/136752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/136808 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate kw_operator in using decls (#136545) (PR #136808)
github-actions[bot] wrote: @owenca (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/136808 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AutoUpgrade][AMDGPU] Adjust AS7 address width to 48 bits (PR #137418)
krzysz00 wrote: You'll want to get p9 as well, and _maybe_ p8 - though p8 is a very weird kind of "pointer" that's just the resource and can't - for example - be GEP'd https://github.com/llvm/llvm-project/pull/137418 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 425d1aa - [RISCV] Handle scalarized reductions in getArithmeticReductionCost
Author: Luke Lau Date: 2025-04-25T16:46:30-07:00 New Revision: 425d1aad294f1132ed90d79ff51320ac2dfcb72d URL: https://github.com/llvm/llvm-project/commit/425d1aad294f1132ed90d79ff51320ac2dfcb72d DIFF: https://github.com/llvm/llvm-project/commit/425d1aad294f1132ed90d79ff51320ac2dfcb72d.diff LOG: [RISCV] Handle scalarized reductions in getArithmeticReductionCost This fixes a crash reported at https://github.com/llvm/llvm-project/pull/114250#issuecomment-2813686061 If the vector type isn't legal at all, e.g. bfloat with +zvfbfmin, then the legalized type will be scalarized. So use getScalarType() instead of getVectorElement() when checking for f16/bf16. (cherry picked from commit 053451cb3502144564b4d0b30a9046045d1820d4) Added: Modified: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index add82dc80c429..8f1094413a756 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1658,9 +1658,8 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, break; case ISD::FADD: // We can't promote f16/bf16 fadd reductions. -if ((LT.second.getVectorElementType() == MVT::f16 && - !ST->hasVInstructionsF16()) || -LT.second.getVectorElementType() == MVT::bf16) +if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) || +LT.second.getScalarType() == MVT::bf16) return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); if (TTI::requiresOrderedReduction(FMF)) { Opcodes.push_back(RISCV::VFMV_S_F); diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 1762f701a9b2d..71685b4acc822 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -1,25 +1,60 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-NO-ZFHMIN-NO-ZFBFMIN ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE define void @reduce_fadd_bfloat() { -; FP-REDUCE-LABEL: 'reduce_fadd_bfloat' -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR, <1 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR, <2 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR, <4 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR, <8 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR, <16 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR, <32 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR, <64 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR, <128 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR, undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vect
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Split via Concat vector types for atomic load (PR #120640)
@@ -1421,6 +1424,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { arsenm wrote: Bump https://github.com/llvm/llvm-project/pull/120640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: refactor issue reporting (PR #135662)
https://github.com/atrosinenko edited https://github.com/llvm/llvm-project/pull/135662 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 47d8c3acda357d0ac2d8cb067f78442fac5e3fda Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0fc50dc1a87b6..d604db17cb8cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2650,6 +2650,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 78f6719 - [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801)
Author: Nikita Popov Date: 2025-04-25T16:19:10-07:00 New Revision: 78f6719ca9b515d416ec67430466d896305d7b81 URL: https://github.com/llvm/llvm-project/commit/78f6719ca9b515d416ec67430466d896305d7b81 DIFF: https://github.com/llvm/llvm-project/commit/78f6719ca9b515d416ec67430466d896305d7b81.diff LOG: [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801) The llvm.metadata section is not emitted and has special semantics. We should not merge globals in it, similarly to how we already skip merging of `llvm.xyz` globals. Fixes https://github.com/llvm/llvm-project/issues/131394. (cherry picked from commit 9356091a98c24718572f99b51553838ed664b67a) Added: llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll Modified: llvm/lib/CodeGen/GlobalMerge.cpp Removed: diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 5993fc939a08a..b4650a4851c3c 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -711,7 +711,8 @@ bool GlobalMergeImpl::run(Module &M) { continue; // Ignore all 'special' globals. -if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm.")) +if (GV.getName().starts_with("llvm.") || +GV.getName().starts_with(".llvm.") || Section == "llvm.metadata") continue; // Ignore all "required" globals: diff --git a/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll new file mode 100644 index 0..7db092e13afeb --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +@index = global i32 0, align 4 +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, section "llvm.metadata" +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata" +@llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @index, ptr @.str, ptr @.str.1, i32 1, ptr null }], section "llvm.metadata" + +; CHECK-NOT: .set +; CHECK-NOT: _MergedGlobals ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801) (PR #134052)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/134052 >From 78f6719ca9b515d416ec67430466d896305d7b81 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Apr 2025 16:40:53 +0800 Subject: [PATCH] [GlobalMerge][PPC] Don't merge globals in llvm.metadata section (#131801) The llvm.metadata section is not emitted and has special semantics. We should not merge globals in it, similarly to how we already skip merging of `llvm.xyz` globals. Fixes https://github.com/llvm/llvm-project/issues/131394. (cherry picked from commit 9356091a98c24718572f99b51553838ed664b67a) --- llvm/lib/CodeGen/GlobalMerge.cpp| 3 ++- llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll | 9 + 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 5993fc939a08a..b4650a4851c3c 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -711,7 +711,8 @@ bool GlobalMergeImpl::run(Module &M) { continue; // Ignore all 'special' globals. -if (GV.getName().starts_with("llvm.") || GV.getName().starts_with(".llvm.")) +if (GV.getName().starts_with("llvm.") || +GV.getName().starts_with(".llvm.") || Section == "llvm.metadata") continue; // Ignore all "required" globals: diff --git a/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll new file mode 100644 index 0..7db092e13afeb --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/global-merge-llvm-metadata.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +@index = global i32 0, align 4 +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, section "llvm.metadata" +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata" +@llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @index, ptr @.str, ptr @.str.1, i32 1, ptr null }], section "llvm.metadata" + +; CHECK-NOT: .set +; CHECK-NOT: _MergedGlobals ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits