https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/141633
Clang does not instantiate amdgpu_max_num_work_groups attribute with one template argument, causing assertion codegen. Fixes: https://github.com/llvm/llvm-project/issues/139570 >From f4849e095e750195d072484cb282c5f5feb9bb24 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <yaxun....@amd.com> Date: Tue, 27 May 2025 13:03:32 -0400 Subject: [PATCH] [AMDGPU] fix amdgpu_max_num_work_groups in templates Clang does not instantiate amdgpu_max_num_work_groups attribute with one template argument, causing assertion codegen. Fixes: https://github.com/llvm/llvm-project/issues/139570 --- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 35 ++++++++++++------- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu | 6 ++++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 44700a446dfac..174c8fc59e4fa 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -648,21 +648,30 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); - ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs); - if (!ResultX.isUsable()) - return; - ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs); - if (!ResultY.isUsable()) - return; - ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs); - if (!ResultZ.isUsable()) - return; + Expr *XExpr = nullptr; + Expr *YExpr = nullptr; + Expr *ZExpr = nullptr; + + if (Attr.getMaxNumWorkGroupsX()) { + ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs); + if (ResultX.isUsable()) + XExpr = ResultX.getAs<Expr>(); + } + + if (Attr.getMaxNumWorkGroupsY()) { + ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs); + if (ResultY.isUsable()) + YExpr = ResultY.getAs<Expr>(); + } - Expr *XExpr = ResultX.getAs<Expr>(); - Expr *YExpr = ResultY.getAs<Expr>(); - Expr *ZExpr = ResultZ.getAs<Expr>(); + if (Attr.getMaxNumWorkGroupsZ()) { + ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs); + if (ResultZ.isUsable()) + ZExpr = ResultZ.getAs<Expr>(); + } - S.AMDGPU().addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr); + if (XExpr) + S.AMDGPU().addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr); } // This doesn't take any template parameters, but we have a custom action that diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index 253ac0898f546..ced0059e69d9b 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -78,6 +78,12 @@ __global__ void template_32_4_a_max_num_work_groups() {} template __global__ void template_32_4_a_max_num_work_groups<2>(); // CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_4_a_max_num_work_groupsILj2EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]] +template<unsigned a> +__attribute__((amdgpu_max_num_work_groups(a))) +__global__ void template_a_max_num_work_groups() {} +template __global__ void template_a_max_num_work_groups<32>(); +// CHECK: define{{.*}} amdgpu_kernel void @_Z30template_a_max_num_work_groupsILj32EEvv() [[MAX_NUM_WORK_GROUPS_32_1_1]] + // Make sure this is silently accepted on other targets. // NAMD-NOT: "amdgpu-flat-work-group-size" // NAMD-NOT: "amdgpu-waves-per-eu" _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits