https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/101593
>From 9b743f5bd577be07c858b1357da8d39264bc34db Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Thu, 1 Aug 2024 20:30:07 -0400 Subject: [PATCH] [WIP][AMDGPU] Enable `AAAddressSpace` in `AMDGPUAttributor` --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 13 ++++++- .../AMDGPU/annotate-kernel-features-hsa.ll | 36 +++++++++++-------- .../CodeGen/AMDGPU/simple-indirect-call.ll | 15 ++++---- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index de1f3421cce4e..39c52140dfbd2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1038,7 +1038,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, - &AAUnderlyingObjects::ID}); + &AAUnderlyingObjects::ID, &AAAddressSpace::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; @@ -1064,6 +1064,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { } else if (CC == CallingConv::AMDGPU_KERNEL) { addPreloadKernArgHint(F, TM); } + + for (auto &I : instructions(F)) { + if (auto *LI = dyn_cast<LoadInst>(&I)) { + A.getOrCreateAAFor<AAAddressSpace>( + IRPosition::value(*LI->getPointerOperand())); + } + if (auto *SI = dyn_cast<StoreInst>(&I)) { + A.getOrCreateAAFor<AAAddressSpace>( + IRPosition::value(*SI->getPointerOperand())); + } + } } ChangeStatus Change = A.run(); diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 43cdf85ed3818..879bceaef97c0 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -425,8 +425,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast ; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { -; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr -; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 +; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(3) %ptr to ptr @@ -443,8 +442,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast ; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] { -; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr -; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 +; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(5) %ptr to ptr @@ -478,11 +476,16 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 { ; No-op addrspacecast should not use queue ptr define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 { -; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast -; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { -; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr -; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast +; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { +; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4 +; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(1) %ptr to ptr store volatile i32 0, ptr %stof @@ -490,11 +493,16 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt } define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 { -; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast -; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { -; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr -; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast +; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr +; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast +; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[PTR]], align 4 +; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(4) %ptr to ptr %ld = load volatile i32, ptr %stof diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index e86ee1adef3d0..3a6b0485d2417 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -34,9 +34,8 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { ; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 ; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; @@ -75,12 +74,16 @@ define amdgpu_kernel void @test_simple_indirect_call() { ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +;. +; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +;. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits