https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713
>From fb2ed73b44facf865312d7efe32053718fcd6458 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Thu, 12 Sep 2024 15:25:43 -0400 Subject: [PATCH] [Attributor] Use more appropriate approach to check flat address space --- llvm/include/llvm/Transforms/IPO/Attributor.h | 7 ++--- .../Transforms/IPO/AttributorAttributes.cpp | 26 ++++++++++++++----- .../CodeGen/AMDGPU/simple-indirect-call.ll | 5 ++-- .../Attributor/address_space_info.ll | 4 ++- .../Attributor/memory_locations_gpu.ll | 8 +++--- .../test/Transforms/Attributor/nocapture-1.ll | 4 +-- .../reduced/openmp_opt_constant_type_crash.ll | 1 - .../Transforms/Attributor/value-simplify.ll | 3 +-- .../Transforms/OpenMP/nested_parallelism.ll | 4 +-- .../OpenMP/spmdization_kernel_env_dep.ll | 25 +++++++++--------- 10 files changed, 51 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 921fe945539510..59bae547522ea7 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6267,11 +6267,12 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> { return (AA->getIdAddr() == &ID); } - // No address space which indicates the associated value is dead. - static const uint32_t NoAddressSpace = ~0U; - /// Unique ID (due to the unique address) static const char ID; + +protected: + // Invalid address space which indicates the associated value is dead. + static const uint32_t InvalidAddressSpace = ~0U; }; struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 217c7cccb5775a..b2888f556d7d0d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); - if (getAssociatedType()->getPointerAddressSpace()) + + if (!A.getInfoCache().getDL().getFlatAddressSpace().has_value()) { + indicatePessimisticFixpoint(); + return; + } + + unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value(); + unsigned AS = getAssociatedType()->getPointerAddressSpace(); + if (AS != FlatAS) { + [[maybe_unused]] bool R = takeAddressSpace(AS); + assert(R && "The take should happen"); indicateOptimisticFixpoint(); + } } ChangeStatus updateImpl(Attributor &A) override { @@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { - Value *AssociatedValue = &getAssociatedValue(); - Value *OriginalValue = peelAddrspacecast(AssociatedValue); - if (getAddressSpace() == NoAddressSpace || + if (getAddressSpace() == InvalidAddressSpace || getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; + Value *AssociatedValue = &getAssociatedValue(); + Value *OriginalValue = peelAddrspacecast(AssociatedValue); + PointerType *NewPtrTy = PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = @@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (!isValidState()) return "addrspace(<invalid>)"; return "addrspace(" + - (AssumedAddressSpace == NoAddressSpace + (AssumedAddressSpace == InvalidAddressSpace ? "none" : std::to_string(AssumedAddressSpace)) + ")"; } private: - uint32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = InvalidAddressSpace; bool takeAddressSpace(uint32_t AS) { - if (AssumedAddressSpace == NoAddressSpace) { + if (AssumedAddressSpace == InvalidAddressSpace) { AssumedAddressSpace = AS; return true; } diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index cca7b49996ff3b..971161a1c59855 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -34,8 +34,9 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { ; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr +; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 ; ATTRIBUTOR_GCN-NEXT: call void @indirect() ; ATTRIBUTOR_GCN-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll index 73dd93c55b819b..0c8b06ac6666a4 100644 --- a/llvm/test/Transforms/Attributor/address_space_info.ll +++ b/llvm/test/Transforms/Attributor/address_space_info.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK + +; REQUIRES: amdgpu-registered-target @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4 diff --git a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll index c10883b54ad591..db4647232c882e 100644 --- a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll +++ b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll @@ -29,7 +29,7 @@ define i32 @test_const_as_global2() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test_const_as_global2 ; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(4) @G, align 4 +; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr), align 4 ; CHECK-NEXT: ret i32 [[L2]] ; %l2 = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr) @@ -41,7 +41,8 @@ define i32 @test_const_as_call1() { ; CHECK-LABEL: define {{[^@]+}}@test_const_as_call1 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(4) @ptr_to_const() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(4) [[P1]], align 4 +; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(4) [[P1]] to ptr +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4 ; CHECK-NEXT: ret i32 [[L1]] ; %p1 = call ptr addrspace(4) @ptr_to_const() @@ -71,7 +72,8 @@ define i32 @test_shared_as_call1() { ; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call1 ; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(3) @ptr_to_shared() #[[ATTR4]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(3) [[P1]], align 4 +; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(3) [[P1]] to ptr +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4 ; CHECK-NEXT: ret i32 [[L1]] ; %p1 = call ptr addrspace(3) @ptr_to_shared() diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 3401ddfdd7d758..de5f31e470edfc 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -257,7 +257,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) { ; TUNIT-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr ; TUNIT-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]] ; TUNIT-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4 -; TUNIT-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4 +; TUNIT-NEXT: store i32 0, ptr [[TMP]], align 4 ; TUNIT-NEXT: store ptr [[Q]], ptr @g, align 8 ; TUNIT-NEXT: ret i32 [[VAL]] ; @@ -272,7 +272,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) { ; CGSCC-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr ; CGSCC-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]] ; CGSCC-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4 -; CGSCC-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4 +; CGSCC-NEXT: store i32 0, ptr [[TMP]], align 4 ; CGSCC-NEXT: store ptr [[Q]], ptr @g, align 8 ; CGSCC-NEXT: ret i32 [[VAL]] ; diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll index 518ed97f42bc10..fda72a6e31a0c7 100644 --- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll @@ -116,7 +116,6 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized ; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5) ; CHECK-NEXT: ret i1 false ; ; diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 68f179c88116e4..a5789790cc92a1 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -838,8 +838,7 @@ define void @user() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user ; TUNIT-SAME: () #[[ATTR5]] { -; TUNIT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr) to ptr addrspace(3) -; TUNIT-NEXT: store i32 0, ptr addrspace(3) [[TMP1]], align 4 +; TUNIT-NEXT: store i32 0, ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr), align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write) diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll index 5c4386b24a3d5a..4f4a87cbddfec1 100644 --- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll +++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll @@ -60,7 +60,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]] ; CHECK: region.guarded.i: ; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32 -; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16 +; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16 ; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]] ; CHECK: _Z3fooi.internalized.exit: ; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]] @@ -140,7 +140,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, ; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]] -; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16 +; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16 ; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8 ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]]) diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll index 52be16c41f872d..ce7b4f89b893ff 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll @@ -27,22 +27,21 @@ define i32 @fputs() { define internal i32 @__kmpc_target_init(ptr %0, ptr %dyn) { ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init ; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[DYN:%.*]]) #[[ATTR1:[0-9]+]] { -; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1) -; AMDGPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2 -; AMDGPU-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 2 -; AMDGPU-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0 -; AMDGPU-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false -; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; AMDGPU: 8: +; AMDGPU-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2 +; AMDGPU-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 2 +; AMDGPU-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 +; AMDGPU-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false +; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; AMDGPU: 7: ; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648 -; AMDGPU-NEXT: br label [[TMP9]] +; AMDGPU-NEXT: br label [[TMP8]] +; AMDGPU: 8: +; AMDGPU-NEXT: br label [[TMP10:%.*]] ; AMDGPU: 9: -; AMDGPU-NEXT: br label [[TMP11:%.*]] -; AMDGPU: 10: ; AMDGPU-NEXT: unreachable -; AMDGPU: 11: +; AMDGPU: 10: ; AMDGPU-NEXT: ret i32 0 ; %2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits