https://github.com/hekota updated https://github.com/llvm/llvm-project/pull/155053
>From 75a7511da94a609d7b2c944d3719a60057f9fa53 Mon Sep 17 00:00:00 2001 From: Helena Kotas <heko...@microsoft.com> Date: Thu, 14 Aug 2025 23:32:05 -0700 Subject: [PATCH 1/3] [HLSL] Codegen for indexing of sub-arrays of multi-dimensional resource arrays Closes #145426 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 134 ++++++++++++++---- .../res-array-global-subarray-many.hlsl | 102 +++++++++++++ .../res-array-global-subarray-one.hlsl | 62 ++++++++ 3 files changed, 270 insertions(+), 28 deletions(-) create mode 100644 clang/test/CodeGenHLSL/resources/res-array-global-subarray-many.hlsl create mode 100644 clang/test/CodeGenHLSL/resources/res-array-global-subarray-one.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 12ece217ba563..c41601159dd2c 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -18,6 +18,7 @@ #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attrs.inc" #include "clang/AST/Decl.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Alignment.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" +#include <cstdint> using namespace clang; using namespace CodeGen; @@ -190,6 +192,71 @@ static void createResourceCtorArgs(CodeGenModule &CGM, CXXConstructorDecl *CD, Args.add(RValue::get(NameStr), AST.getPointerType(AST.CharTy.withConst())); } +// Initializes local resource array variable. For multi-dimensional arrays it +// calls itself recursively to initialize its sub-arrays. The Index used in the +// resource constructor calls will begin at StartIndex and will be incremented +// for each array element. The last last used resource Index is returned to the +// caller. +static Value *initializeLocalResourceArray( + CodeGenFunction &CGF, AggValueSlot &ValueSlot, + const ConstantArrayType *ArrayTy, CXXConstructorDecl *CD, + llvm::Value *Range, llvm::Value *StartIndex, StringRef ResourceName, + HLSLResourceBindingAttr *RBA, HLSLVkBindingAttr *VkBinding, + ArrayRef<llvm::Value *> PrevGEPIndices, SourceLocation ArraySubsExprLoc) { + + llvm::IntegerType *IntTy = CGF.CGM.IntTy; + llvm::Value *Index = StartIndex; + llvm::Value *One = llvm::ConstantInt::get(IntTy, 1); + uint64_t ArraySize = ArrayTy->getSExtSize(); + QualType ElemType = ArrayTy->getElementType(); + Address TmpArrayAddr = ValueSlot.getAddress(); + + // Add additional index to the getelementptr call indices. + // This index will be updated for each array element in the loops below. + SmallVector<llvm::Value *> GEPIndices(PrevGEPIndices); + GEPIndices.push_back(llvm::ConstantInt::get(IntTy, 0)); + + // array of arrays - recursively initialize the sub-arrays + if (ElemType->isArrayType()) { + const ConstantArrayType *SubArrayTy = cast<ConstantArrayType>(ElemType); + for (uint64_t I = 0; I < ArraySize; I++) { + if (I > 0) { + Index = CGF.Builder.CreateAdd(Index, One); + GEPIndices.back() = llvm::ConstantInt::get(IntTy, I); + } + // recursively initialize the sub-array + Index = initializeLocalResourceArray( + CGF, ValueSlot, SubArrayTy, CD, Range, Index, ResourceName, RBA, + VkBinding, GEPIndices, ArraySubsExprLoc); + } + return Index; + } + + // array of resources - initialize each resource in the array + llvm::Type *Ty = CGF.ConvertTypeForMem(ElemType); + CharUnits ElemSize = CD->getASTContext().getTypeSizeInChars(ElemType); + CharUnits Align = + TmpArrayAddr.getAlignment().alignmentOfArrayElement(ElemSize); + + for (uint64_t I = 0; I < ArraySize; I++) { + if (I > 0) { + Index = CGF.Builder.CreateAdd(Index, One); + GEPIndices.back() = llvm::ConstantInt::get(IntTy, I); + } + Address ThisAddress = + CGF.Builder.CreateGEP(TmpArrayAddr, GEPIndices, Ty, Align); + llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo(ThisAddress, ElemType); + + CallArgList Args; + createResourceCtorArgs(CGF.CGM, CD, ThisPtr, Range, Index, ResourceName, + RBA, VkBinding, Args); + CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, ThisAddress, + Args, ValueSlot.mayOverlap(), ArraySubsExprLoc, + ValueSlot.isSanitizerChecked()); + } + return Index; +} + } // namespace llvm::Type * @@ -802,16 +869,14 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr( ArraySubsExpr->getType()->isHLSLResourceRecordArray() && "expected resource array subscript expression"); - // let clang codegen handle local resource array subscripts - const VarDecl *ArrayDecl = dyn_cast<VarDecl>(getArrayDecl(ArraySubsExpr)); + // Let clang codegen handle local resource array subscripts, + // or when the subscript references on opaque expression (as part of + // ArrayInitLoopExpr AST node). + const VarDecl *ArrayDecl = + dyn_cast_or_null<VarDecl>(getArrayDecl(ArraySubsExpr)); if (!ArrayDecl || !ArrayDecl->hasGlobalStorage()) return std::nullopt; - if (ArraySubsExpr->getType()->isArrayType()) - // FIXME: this is not yet implemented (llvm/llvm-project#145426) - llvm_unreachable( - "indexing of sub-arrays of multidimensional arrays not supported yet"); - // get the resource array type ASTContext &AST = ArrayDecl->getASTContext(); const Type *ResArrayTy = ArrayDecl->getType().getTypePtr(); @@ -832,26 +897,30 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr( CGM.IntTy, AST.getConstantArrayElementCount(ArrayTy)); SubIndex = CGF.Builder.CreateMul(SubIndex, Multiplier); } - Index = Index ? CGF.Builder.CreateAdd(Index, SubIndex) : SubIndex; ASE = dyn_cast<ArraySubscriptExpr>(ASE->getBase()->IgnoreParenImpCasts()); } // find binding info for the resource array (for implicit binding // an HLSLResourceBindingAttr should have been added by SemaHLSL) - QualType ResourceTy = ArraySubsExpr->getType(); HLSLVkBindingAttr *VkBinding = ArrayDecl->getAttr<HLSLVkBindingAttr>(); HLSLResourceBindingAttr *RBA = ArrayDecl->getAttr<HLSLResourceBindingAttr>(); assert((VkBinding || RBA) && "resource array must have a binding attribute"); + // Find the individual resource type + QualType ResultTy = ArraySubsExpr->getType(); + QualType ResourceTy = + ResultTy->isArrayType() ? AST.getBaseElementType(ResultTy) : ResultTy; + // lookup the resource class constructor based on the resource type and // binding CXXConstructorDecl *CD = findResourceConstructorDecl( AST, ResourceTy, VkBinding || RBA->hasRegisterSlot()); - // create a temporary variable for the resource class instance (we need to + // create a temporary variable for the result, which is either going + // to be a single resource instance or a local array of resources (we need to // return an LValue) - RawAddress TmpVar = CGF.CreateMemTemp(ResourceTy); + RawAddress TmpVar = CGF.CreateMemTemp(ResultTy); if (CGF.EmitLifetimeStart(TmpVar.getPointer())) CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>( NormalEHLifetimeMarker, TmpVar); @@ -860,26 +929,35 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr( TmpVar, Qualifiers(), AggValueSlot::IsDestructed_t(true), AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsAliased_t(false), AggValueSlot::DoesNotOverlap); - - Address ThisAddress = ValueSlot.getAddress(); - llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo( - ThisAddress, CD->getThisType()->getPointeeType()); + Address TmpVarAddress = ValueSlot.getAddress(); // get total array size (= range size) llvm::Value *Range = llvm::ConstantInt::get(CGM.IntTy, getTotalArraySize(AST, ResArrayTy)); - // assemble the constructor parameters - CallArgList Args; - createResourceCtorArgs(CGM, CD, ThisPtr, Range, Index, ArrayDecl->getName(), - RBA, VkBinding, Args); - - // call the constructor - CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, ThisAddress, Args, - ValueSlot.mayOverlap(), - ArraySubsExpr->getExprLoc(), - ValueSlot.isSanitizerChecked()); - - return CGF.MakeAddrLValue(TmpVar, ArraySubsExpr->getType(), - AlignmentSource::Decl); + // if the result of the subscript operation is a single resource - call the + // constructor + if (ResultTy == ResourceTy) { + QualType ThisType = CD->getThisType()->getPointeeType(); + llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo(TmpVarAddress, ThisType); + + // assemble the constructor parameters + CallArgList Args; + createResourceCtorArgs(CGM, CD, ThisPtr, Range, Index, ArrayDecl->getName(), + RBA, VkBinding, Args); + // call the constructor + CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, TmpVarAddress, + Args, ValueSlot.mayOverlap(), + ArraySubsExpr->getExprLoc(), + ValueSlot.isSanitizerChecked()); + } else { + // result of the subscript operation is a local resource array + const ConstantArrayType *ArrayTy = + cast<ConstantArrayType>(ResultTy.getTypePtr()); + initializeLocalResourceArray(CGF, ValueSlot, ArrayTy, CD, Range, Index, + ArrayDecl->getName(), RBA, VkBinding, + {llvm::ConstantInt::get(CGM.IntTy, 0)}, + ArraySubsExpr->getExprLoc()); + } + return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl); } diff --git a/clang/test/CodeGenHLSL/resources/res-array-global-subarray-many.hlsl b/clang/test/CodeGenHLSL/resources/res-array-global-subarray-many.hlsl new file mode 100644 index 0000000000000..dbd03b4c034af --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/res-array-global-subarray-many.hlsl @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1 + +RWBuffer<float> A[5][4][3][2] : register(u10, space2); +RWStructuredBuffer<float> Out; + +float foo(RWBuffer<float> Arr[3][2]) { + return Arr[1][0][0]; +} + +// NOTE: +// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding +// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name). +// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> + +// CHECK: define internal void @_Z4mainj(i32 noundef %GI) +// CHECK-NEXT: entry: +// CHECK-NEXT: %[[GI_alloca:.*]] = alloca i32, align 4 +// CHECK-NEXT: %Sub = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 +// CHECK-NEXT: %[[Tmp0:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 +// CHECK-NEXT: %a = alloca float, align 4 +// CHECK-NEXT: %b = alloca float, align 4 +// CHECK-NEXT: %[[Tmp1:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 +// CHECK-NEXT: %[[Tmp2:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4 +// CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4 +[numthreads(4,1,1)] +void main(uint GI : SV_GroupThreadID) { +// Codegen for "A[4][1]" - create local array [[Tmp0]] of size 3 x 2 and initialize +// each element by a call to the resource constructor +// The resource index for A[4][1][0][0] is 102 = 4 * (4 * 3 * 2) + 1 * (3 * 2) +// (index in the resource array as if it was flattened) +// CHECK-NEXT: %[[Ptr_Tmp0_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 102, ptr noundef @A.str) #6 +// CHECK-NEXT: %[[Ptr_Tmp0_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 103, ptr noundef @A.str) #6 +// CHECK-NEXT: %[[Ptr_Tmp0_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 104, ptr noundef @A.str) #6 +// CHECK-NEXT: %[[Ptr_Tmp0_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 105, ptr noundef @A.str) #6 +// CHECK-NEXT: %[[Ptr_Tmp0_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 106, ptr noundef @A.str) #6 +// CHECK-NEXT: %[[Ptr_Tmp0_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 107, ptr noundef @A.str) #6 +// After this Tmp0 values are copied to %Sub using the standard array loop initializaion +// (generated from ArrayInitLoopExpr AST node) + RWBuffer<float> Sub[3][2] = A[4][1]; + +// CHECK: %[[Ptr_Sub_2:.*]] = getelementptr inbounds [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %Sub, i32 0, i32 2 +// CHECK: %[[Ptr_Sub_2_1:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Ptr_Sub_2]], i32 0, i32 1 +// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Ptr_Sub_2_1]], i32 noundef 0) +// CHECK-NEXT: %[[Sub_2_1_0_Value:.*]] = load float, ptr %[[BufPtr]], align 4 +// CHECK-NEXT: store float %[[Sub_2_1_0_Value]], ptr %a, align 4 +float a = Sub[2][1][0]; + +// Codegen for "foo(A[2][GI])" - create local array [[Tmp2]] of size 3 x 2 and initialize +// each element by a call to the resource constructor with dynamic index, and then +// copy-in the array as an argument of "foo" + +// Calculate the resource index for A[2][GI][0][0] (index in the resource array as if it was flattened) +// The index is 2 * (4 * 3 * 2) + GI * (3 * 2) = 48 + GI * 6 +// CHECK: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4 +// CHECK-NEXT: %[[Index_A_2_GI_Tmp:.*]] = mul i32 %[[GI]], 6 +// CHECK-NEXT: %[[Index_A_2_GI_0_0:.*]] = add i32 %[[Index_A_2_GI_Tmp]], 48 + +// A[2][GI][0][0] +// CHECK-NEXT: %[[Ptr_Tmp2_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_0]], ptr noundef @A.str) + +// A[2][GI][0][1] +// CHECK-NEXT: %[[Index_A_2_GI_0_1:.*]] = add i32 %[[Index_A_2_GI_0_0]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_1]], ptr noundef @A.str) + +// A[2][GI][1][0] +// CHECK-NEXT: %[[Index_A_2_GI_1_0:.*]] = add i32 %[[Index_A_2_GI_0_1]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_0]], ptr noundef @A.str) + +// A[2][GI][1][1] +// CHECK-NEXT: %[[Index_A_2_GI_1_1:.*]] = add i32 %[[Index_A_2_GI_1_0]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_1]], ptr noundef @A.str) + +// A[2][GI][2][0] +// CHECK-NEXT: %[[Index_A_2_GI_2_0:.*]] = add i32 %[[Index_A_2_GI_1_1]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_0]], ptr noundef @A.str) + +// A[2][GI][2][1] +// CHECK-NEXT: %[[Index_A_2_GI_2_1:.*]] = add i32 %[[Index_A_2_GI_2_0]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_1]], ptr noundef @A.str) + +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 24, i1 false) +// CHECK-NEXT: %[[FooReturned:.*]] = call {{.*}} float @_Z3fooA3_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %[[Tmp1]]) +// CHECK-NEXT: store float %[[FooReturned]], ptr %b, align 4 + float b = foo(A[2][GI]); + + Out[0] = a + b; +} diff --git a/clang/test/CodeGenHLSL/resources/res-array-global-subarray-one.hlsl b/clang/test/CodeGenHLSL/resources/res-array-global-subarray-one.hlsl new file mode 100644 index 0000000000000..c73873206d54c --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/res-array-global-subarray-one.hlsl @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1 + +RWBuffer<float> A[4][2] : register(u10, space2); +RWStructuredBuffer<float> Out; + +float foo(RWBuffer<float> Arr[2]) { + return Arr[1][0]; +} + +// NOTE: +// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding +// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name). +// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float> + +// CHECK: define internal void @_Z4mainj(i32 noundef %GI) +// CHECK-NEXT: entry: +// CHECK-NEXT: %[[GI_alloca:.*]] = alloca i32, align 4 +// CHECK-NEXT: %Sub = alloca [2 x %"class.hlsl::RWBuffer"], align 4 +// CHECK-NEXT: %[[Tmp0:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4 +// CHECK-NEXT: %a = alloca float, align 4 +// CHECK-NEXT: %b = alloca float, align 4 +// CHECK-NEXT: %[[Tmp1:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4 +// CHECK-NEXT: %[[Tmp2:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4 +// CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4 +[numthreads(4,1,1)] +void main(uint GI : SV_GroupThreadID) { +// Codegen for "A[2]" - create local array [[Tmp0]] of size 2 and initialize +// each element by a call to the resource constructor +// CHECK-NEXT: %[[Ptr_Tmp0_0:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp0]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef 6, ptr noundef @A.str) +// CHECK-NEXT: %[[Ptr_Tmp0_1:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp0]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef 7, ptr noundef @A.str) +// After this Tmp0 values are copied to %Sub using the standard array loop initializaion +// (generated from ArrayInitLoopExpr AST node) + RWBuffer<float> Sub[2] = A[3]; + +// CHECK: %[[Ptr_Sub_1:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %Sub, i32 0, i32 1 +// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Ptr_Sub_1]], i32 noundef 0) +// CHECK-NEXT: %[[Sub_1_0_Value:.*]] = load float, ptr %[[BufPtr]], align 4 +// CHECK-NEXT: store float %[[Sub_1_0_Value]], ptr %a, align 4 +float a = Sub[1][0]; + +// Codegen for "foo(A[GI])" - create local array [[Tmp2]] of size 2 and initialize +// each element by a call to the resource constructor with dynamic index, and then +// copy-in the array as an argument of "foo" +// CHECK: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4 +// CHECK-NEXT: %[[Index_A_GI_0:.*]] = mul i32 %[[GI]], 2 +// CHECK-NEXT: %[[Ptr_Tmp2_GI_0:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 0 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_GI_0]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef %[[Index_A_GI_0]], ptr noundef @A.str) +// CHECK-NEXT: %[[Index_A_GI_1:.*]] = add i32 %[[Index_A_GI_0]], 1 +// CHECK-NEXT: %[[Ptr_Tmp2_GI_1:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_GI_1]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef %[[Index_A_GI_1]], ptr noundef @A.str) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 8, i1 false) +// CHECK-NEXT: %[[FooReturned:.*]] = call {{.*}} float @_Z3fooA2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp1]]) +// CHECK-NEXT: store float %[[FooReturned]], ptr %b, align 4 + float b = foo(A[GI]); + + Out[0] = a + b; +} >From 7afa5cc2fcd1c7690e263e83632232a84cf85ad2 Mon Sep 17 00:00:00 2001 From: Helena Kotas <heko...@microsoft.com> Date: Fri, 22 Aug 2025 17:30:13 -0700 Subject: [PATCH 2/3] [HLSL] Enable unbounded resource arrays at global scope Closes #145427 --- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/AST/Type.cpp | 4 +- clang/lib/Sema/SemaDecl.cpp | 17 ++++- clang/lib/Sema/SemaHLSL.cpp | 18 +++--- .../resources/res-array-global-unbounded.hlsl | 64 +++++++++++++++++++ .../SemaHLSL/unbounded_resource_arrays.hlsl | 21 ++++++ 6 files changed, 114 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGenHLSL/resources/res-array-global-unbounded.hlsl create mode 100644 clang/test/SemaHLSL/unbounded_resource_arrays.hlsl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c733e8823cea6..ed9c772035511 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13159,6 +13159,9 @@ def err_hlsl_resource_range_overlap: Error< "%select{All|Vertex|Hull|Domain|Geometry|Pixel|Amplification|Mesh}9">; def note_hlsl_resource_range_here: Note<"overlapping resource range here">; +def err_hlsl_incomplete_resource_array_in_function_param: Error< + "incomplete resource array in a function parameter">; + // Layout randomization diagnostics. def err_non_designated_init_used : Error< "a randomized struct can only be initialized with a designated initializer">; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 5fbf1999ed725..1815eba4eedba 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -5419,7 +5419,7 @@ bool Type::isHLSLResourceRecordArray() const { const Type *Ty = getUnqualifiedDesugaredType(); if (!Ty->isArrayType()) return false; - while (isa<ConstantArrayType>(Ty)) + while (isa<ArrayType>(Ty)) Ty = Ty->getArrayElementTypeNoTypeQual(); return Ty->isHLSLResourceRecord(); } @@ -5432,7 +5432,7 @@ bool Type::isHLSLIntangibleType() const { return Ty->isHLSLBuiltinIntangibleType(); // unwrap arrays - while (isa<ConstantArrayType>(Ty)) + while (isa<ArrayType>(Ty)) Ty = Ty->getArrayElementTypeNoTypeQual(); const RecordType *RT = diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 98485cf9e72be..09ba88eef741d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -14380,9 +14380,12 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { return; } - // Provide a specific diagnostic for uninitialized variable - // definitions with incomplete array type. - if (Type->isIncompleteArrayType()) { + // Provide a specific diagnostic for uninitialized variable definitions + // with incomplete array type, unless it is a global unbounded HLSL resource + // array. + if (Type->isIncompleteArrayType() && + !(getLangOpts().HLSL && Var->hasGlobalStorage() && + Type->isHLSLResourceRecordArray())) { if (Var->isConstexpr()) Diag(Var->getLocation(), diag::err_constexpr_var_requires_const_init) << Var; @@ -15471,6 +15474,14 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D, } } + // Incomplete resource arrays are not allowed as function parameters in HLSL + if (getLangOpts().HLSL && parmDeclType->isIncompleteArrayType() && + parmDeclType->isHLSLResourceRecordArray()) { + Diag(D.getIdentifierLoc(), + diag::err_hlsl_incomplete_resource_array_in_function_param); + D.setInvalidType(true); + } + // Temporarily put parameter variables in the translation unit, not // the enclosing context. This prevents them from accidentally // looking like class members in C++. diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index f87715950c74c..0028be459d13d 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -351,8 +351,8 @@ getResourceArrayHandleType(VarDecl *VD) { assert(VD->getType()->isHLSLResourceRecordArray() && "expected array of resource records"); const Type *Ty = VD->getType()->getUnqualifiedDesugaredType(); - while (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty)) - Ty = CAT->getArrayElementTypeNoTypeQual()->getUnqualifiedDesugaredType(); + while (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) + Ty = AT->getArrayElementTypeNoTypeQual()->getUnqualifiedDesugaredType(); return HLSLAttributedResourceType::findHandleTypeOnResource(Ty); } @@ -2008,9 +2008,11 @@ static bool DiagnoseHLSLRegisterAttribute(Sema &S, SourceLocation &ArgLoc, } void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) { - if (isa<VarDecl>(TheDecl)) { - if (SemaRef.RequireCompleteType(TheDecl->getBeginLoc(), - cast<ValueDecl>(TheDecl)->getType(), + if (VarDecl *VD = dyn_cast<VarDecl>(TheDecl)) { + QualType Ty = VD->getType(); + if (Ty->isIncompleteArrayType()) + Ty = cast<IncompleteArrayType>(Ty)->getElementType(); + if (SemaRef.RequireCompleteType(TheDecl->getBeginLoc(), Ty, diag::err_incomplete_type)) return; } @@ -3820,9 +3822,9 @@ void SemaHLSL::collectResourceBindingsOnVarDecl(VarDecl *VD) { // Unwrap arrays // FIXME: Calculate array size while unwrapping const Type *Ty = VD->getType()->getUnqualifiedDesugaredType(); - while (Ty->isConstantArrayType()) { - const ConstantArrayType *CAT = cast<ConstantArrayType>(Ty); - Ty = CAT->getElementType()->getUnqualifiedDesugaredType(); + while (Ty->isArrayType()) { + const ArrayType *AT = cast<ArrayType>(Ty); + Ty = AT->getElementType()->getUnqualifiedDesugaredType(); } // Resource (or array of resources) diff --git a/clang/test/CodeGenHLSL/resources/res-array-global-unbounded.hlsl b/clang/test/CodeGenHLSL/resources/res-array-global-unbounded.hlsl new file mode 100644 index 0000000000000..631e4fcc0e4e5 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/res-array-global-unbounded.hlsl @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -check-prefixes=CHECK,DXIL +// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -check-prefixes=CHECK,SPV + +// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1 +// CHECK: @[[BufB:.*]] = private unnamed_addr constant [2 x i8] c"B\00", align 1 + +RWBuffer<float> A[] : register(u10, space1); +RWBuffer<int> B[][5][4]; + +RWStructuredBuffer<float> Out; + +float foo(RWBuffer<int> Arr[4], uint Index) { + return (float)Arr[Index][0]; +} + +// NOTE: +// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding for RWBuffer<float> +// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name). +// - _ZN4hlsl8RWBufferIiEC1EjijjPKc is the constructor call for implicit binding for RWBuffer<int> +// (has "jijj" in the mangled name) and the arguments are (space, range_size, index, order_id, name). +// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator on RWBuffer<float> + +[numthreads(4,1,1)] +void main(uint GI : SV_GroupIndex) { + // CHECK: define internal {{.*}}void @_Z4mainj(i32 noundef %GI) + // CHECK: %[[GI_alloca:.*]] = alloca i32, align 4 + // CHECK-NEXT: %a = alloca float, align 4 + // CHECK-NEXT: %[[Tmp0:.*]] = alloca %"class.hlsl::RWBuffer + // CHECK-NEXT: %b = alloca float, align 4 + // CHECK-NEXT: %[[Tmp1:.*]] = alloca [4 x %"class.hlsl::RWBuffer"] + // CHECK-NEXT: %[[Tmp2:.*]] = alloca [4 x %"class.hlsl::RWBuffer"] + // CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4 + + // Make sure A[100] is translated to a RWBuffer<float> constructor call with range -1 and index 100 + // and explicit binding (u10, space1) + // CHECK: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Tmp0]], i32 noundef 10, i32 noundef 1, i32 noundef -1, i32 noundef 100, ptr noundef @A.str) + // CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr{{.*}} @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Tmp0]], i32 noundef 0) + // CHECK-NEXT: %[[Value1:.*]] = load float, ptr{{.*}} %[[BufPtr]], align 4 + // CHECK-NEXT: store float %[[Value1]], ptr %a, align 4 + float a = A[100][0]; + + // Make sure B[2][3] is translated to a local RWBuffer<int>[4] array where each array element + // is initialized by a constructor call with range -1 and index 52-54 + // and implicit binding (space 0, order_id 0) + // CHECK-NEXT: %[[Ptr_Tmp2_0:.*]] = getelementptr [4 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 0 + // CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1EjijjPKc(ptr {{.*}} %[[Ptr_Tmp2_0]], i32 noundef 0, i32 noundef -1, i32 noundef 52, i32 noundef 0, ptr noundef @B.str) + // CHECK-NEXT: %[[Ptr_Tmp2_1:.*]] = getelementptr [4 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 1 + // CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1EjijjPKc(ptr {{.*}} %[[Ptr_Tmp2_1]], i32 noundef 0, i32 noundef -1, i32 noundef 53, i32 noundef 0, ptr noundef @B.str) + // CHECK-NEXT: %[[Ptr_Tmp2_2:.*]] = getelementptr [4 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 2 + // CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1EjijjPKc(ptr {{.*}} %[[Ptr_Tmp2_2]], i32 noundef 0, i32 noundef -1, i32 noundef 54, i32 noundef 0, ptr noundef @B.str) + // CHECK-NEXT: %[[Ptr_Tmp2_3:.*]] = getelementptr [4 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 3 + // CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1EjijjPKc(ptr {{.*}} %[[Ptr_Tmp2_3]], i32 noundef 0, i32 noundef -1, i32 noundef 55, i32 noundef 0, ptr noundef @B.str) + // DXIL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 16, i1 false) + // SPV-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[Tmp1]], ptr align 8 %[[Tmp2]], i64 32, i1 false) + // CHECK-NEXT: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4 + // DXIL-NEXT: %[[Value2:.*]] = call {{.*}} float @_Z3fooA4_N4hlsl8RWBufferIiEEj(ptr noundef byval([4 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp1]], i32 noundef %[[GI]]) + // SPV-NEXT: %[[Value2:.*]] = call {{.*}} float @_Z3fooA4_N4hlsl8RWBufferIiEEj(ptr noundef byval([4 x %"class.hlsl::RWBuffer"]) align 8 %[[Tmp1]], i32 noundef %[[GI]]) + // CHECK-NEXT: store float %[[Value2]], ptr %b, align 4 + float b = foo(B[2][3], GI); + + Out[0] = a + b; +} diff --git a/clang/test/SemaHLSL/unbounded_resource_arrays.hlsl b/clang/test/SemaHLSL/unbounded_resource_arrays.hlsl new file mode 100644 index 0000000000000..b65deeda9a4ff --- /dev/null +++ b/clang/test/SemaHLSL/unbounded_resource_arrays.hlsl @@ -0,0 +1,21 @@ +// RUN: not %clang_cc1 -triple dxil-pc-shadermodel6.3-compute -finclude-default-header --o - %s -verify + +// unbounded resource array at a global scope +RWBuffer<float> unbounded_array[]; // no_error + +// expected-error@+1 {{incomplete resource array in a function parameter}} +void foo(RWBuffer<float> array_arg[]) {} + +RWBuffer<float> A, B; + +[numthreads(4,1,1)] +void main() { + // expected-error@+1{{definition of variable with array type needs an explicit size or an initializer}} + RWBuffer<float> res_local_array1[]; + + // expected-error@+1{{array initializer must be an initialzer list}} + RWBuffer<float> res_local_array2[] = unbounded_array; + + // local incomplete resource array with initializer + RWBuffer<float> res_local_array3[] = { A, B }; // no error +} >From ad85d72bc6000f5a864dfc6b5930ccfac17748bb Mon Sep 17 00:00:00 2001 From: Helena Kotas <heko...@microsoft.com> Date: Thu, 4 Sep 2025 21:04:59 -0700 Subject: [PATCH 3/3] code review feedback - use dyncast --- clang/lib/Sema/SemaHLSL.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 0028be459d13d..f739b31c04bf0 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2010,8 +2010,8 @@ static bool DiagnoseHLSLRegisterAttribute(Sema &S, SourceLocation &ArgLoc, void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) { if (VarDecl *VD = dyn_cast<VarDecl>(TheDecl)) { QualType Ty = VD->getType(); - if (Ty->isIncompleteArrayType()) - Ty = cast<IncompleteArrayType>(Ty)->getElementType(); + if (const auto *IAT = dyn_cast<IncompleteArrayType>(Ty)) + Ty = IAT->getElementType(); if (SemaRef.RequireCompleteType(TheDecl->getBeginLoc(), Ty, diag::err_incomplete_type)) return; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits