https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102007
None >From 0cd0fd76cf0c4bd5139f5ae138c32da5c0c154c4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 5 Aug 2024 19:49:31 +0400 Subject: [PATCH] InferAddressSpaces: Handle masked load and store intrinsics --- .../Transforms/Scalar/InferAddressSpaces.cpp | 16 +++-- .../AMDGPU/masked-load-store.ll | 68 +++++++++++++++++++ 2 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 87b885447cc02..2ddf24be67702 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const { Module *M = II->getParent()->getParent()->getParent(); - - switch (II->getIntrinsicID()) { - case Intrinsic::objectsize: { + Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + case Intrinsic::objectsize: + case Intrinsic::masked_load: { Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); - Function *NewDecl = - Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); + Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy}); II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); return true; @@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::masked_gather: { Type *RetTy = II->getType(); Type *NewPtrTy = NewV->getType(); - Function *NewDecl = - Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy}); + Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy}); II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); return true; } + case Intrinsic::masked_store: case Intrinsic::masked_scatter: { Type *ValueTy = II->getOperand(0)->getType(); Type *NewPtrTy = NewV->getType(); @@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; + case Intrinsic::masked_load: case Intrinsic::masked_gather: case Intrinsic::prefetch: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; + case Intrinsic::masked_store: case Intrinsic::masked_scatter: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1), PostorderStack, Visited); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll new file mode 100644 index 0000000000000..e14dfd055cbe8 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s + +define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} +define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} + +define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat( +; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer) +; CHECK-NEXT: ret <32 x i32> [[LOAD]] +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer) + ret <32 x i32> %load +} + +define void @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + +define void @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + +define void @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) { +; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat( +; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) { +; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask) + ret void +} + _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits