llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> This did not consider the IR change to allow a scalar base with a vector offset part. Reject any users that are not explicitly handled. In this situation we could handle the vector GEP, but that is a larger change. This just avoids the IR verifier error by rejecting it. --- Full diff: https://github.com/llvm/llvm-project/pull/114113.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+14-4) - (added) llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll (+44) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index f8744d6a483cff..7dd7388376f474 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) { if (LI->isVolatile()) return false; - continue; } @@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // Reject if the stored value is not the pointer operand. if (SI->getPointerOperand() != Val) return false; - } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) { + continue; + } + + if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) { if (RMW->isVolatile()) return false; - } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) { + continue; + } + + if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) { if (CAS->isVolatile()) return false; + continue; } // Only promote a select if we know that the other select operand @@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // May need to rewrite constant operands. WorkList.push_back(ICmp); + continue; } // TODO: If we know the address is only observed through flat pointers, we @@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (isa<InsertValueInst>(User) || isa<InsertElementInst>(User)) return false; + // TODO: Handle vectors of pointers. if (!User->getType()->isPointerTy()) - continue; + return false; if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) { // Be conservative if an address could be computed outside the bounds of @@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS); + assert(isa<PointerType>(V->getType())); + // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll new file mode 100644 index 00000000000000..b0d578e421e280 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Check that invalid IR is not produced on a vector typed +; getelementptr with a scalar alloca pointer base. + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> + %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0 + store i32 0, ptr addrspace(5) %extractelement + ret void +} + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> +; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 3, i64 2, i64 1, i64 0> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]] +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3> + %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0> + %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1 + %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 + store i32 0, ptr addrspace(5) %extractelement + ret void +} `````````` </details> https://github.com/llvm/llvm-project/pull/114113 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits