https://github.com/skc7 created https://github.com/llvm/llvm-project/pull/188007
Upstreaming clangIR PR: https://github.com/llvm/clangir/pull/2091 This patch adds support for AMDGPU-specific function attributes for HIP kernels Added setTargetAttributes for AMDGPUTargetCIRGenInfo to set kernel attributes Added generic string attribute handler in amendFunction to translate string-values with "cir." prefix function attributes to LLVM function attributes Follows OGCG AMDGPU implementation from "clang/lib/CodeGen/Targets/AMDGPU.cpp". >From 8e53f91820aa1158951de2ae7beac94fcaed0545 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Mon, 23 Mar 2026 15:20:23 +0530 Subject: [PATCH] [CIR][AMDGPU] Add AMDGPU-specific function attributes for HIP kernels --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 14 +- clang/lib/CIR/CodeGen/CMakeLists.txt | 1 + clang/lib/CIR/CodeGen/TargetInfo.cpp | 10 + clang/lib/CIR/CodeGen/TargetInfo.h | 5 + clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp | 256 ++++++++++++++++++ .../Lowering/DirectToLLVM/LowerToLLVMIR.cpp | 27 +- clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip | 82 ++++++ 7 files changed, 386 insertions(+), 9 deletions(-) create mode 100644 clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp create mode 100644 clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index f3ab733bf4c6a..4be669777bb26 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -669,7 +669,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *op) { assert(!cir::MissingFeatures::opFuncCPUAndFeaturesAttributes()); assert(!cir::MissingFeatures::opFuncSection()); - assert(!cir::MissingFeatures::setTargetAttributes()); + getTargetCIRGenInfo().setTargetAttributes(gd.getDecl(), op, *this); } std::optional<cir::SourceLanguage> CIRGenModule::getCIRSourceLanguage() const { @@ -2557,12 +2557,15 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, // represent them in dedicated ops. The correct attributes are ensured during // translation to LLVM. Thus, we don't need to check for them here. + const auto *funcDecl = cast<FunctionDecl>(globalDecl.getDecl()); + if (!isIncompleteFunction) setCIRFunctionAttributes(globalDecl, getTypes().arrangeGlobalDeclaration(globalDecl), func, isThunk); - assert(!cir::MissingFeatures::setTargetAttributes()); + if (!isIncompleteFunction && func.isDeclaration()) + getTargetCIRGenInfo().setTargetAttributes(funcDecl, func, *this); // TODO(cir): This needs a lot of work to better match CodeGen. That // ultimately ends up in setGlobalVisibility, which already has the linkage of @@ -2574,17 +2577,16 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, } // If we plan on emitting this inline builtin, we can't treat it as a builtin. - const auto *fd = cast<FunctionDecl>(globalDecl.getDecl()); - if (fd->isInlineBuiltinDeclaration()) { + if (funcDecl->isInlineBuiltinDeclaration()) { const FunctionDecl *fdBody; - bool hasBody = fd->hasBody(fdBody); + bool hasBody = funcDecl->hasBody(fdBody); (void)hasBody; assert(hasBody && "Inline builtin declarations should always have an " "available body!"); assert(!cir::MissingFeatures::attributeNoBuiltin()); } - if (fd->isReplaceableGlobalAllocationFunction()) { + if (funcDecl->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. func->setAttr(cir::CIRDialect::getNoBuiltinAttrName(), diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 8548cc8424527..9b8fdf551ef10 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -51,6 +51,7 @@ add_clang_library(clangCIR CIRGenTypes.cpp CIRGenVTables.cpp TargetInfo.cpp + Targets/AMDGPU.cpp DEPENDS MLIRCIR diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index ee68d9c329b83..3859588c5cfaf 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -1,6 +1,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" #include "CIRGenFunction.h" +#include "CIRGenModule.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -53,6 +54,15 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { public: AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {} + + void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global, + CIRGenModule &cgm) const override { + auto func = mlir::dyn_cast<cir::FuncOp>(global); + if (!func) + return; + + setAMDGPUTargetFunctionAttributes(decl, func, cgm); + } }; } // namespace diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index b397d8cd7fab8..868af0e8343fb 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include <memory> @@ -135,6 +136,10 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt); +/// Set AMDGPU-specific function attributes for HIP kernels. +void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, + cir::FuncOp func, CIRGenModule &cgm); + std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); diff --git a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp new file mode 100644 index 0000000000000..280cb6ae5865c --- /dev/null +++ b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp @@ -0,0 +1,256 @@ +//===---- AMDGPU.cpp - AMDGPU-specific CIR CodeGen ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides AMDGPU-specific CIR CodeGen logic for function attributes. +// +//===----------------------------------------------------------------------===// + +#include "../CIRGenModule.h" +#include "../TargetInfo.h" + +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::CIRGen; + +namespace { + +/// Check if AMDGPU protected visibility is required. +static bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, + cir::FuncOp func) { + if (func.getGlobalVisibility() != cir::VisibilityKind::Hidden) + return false; + + if (d->hasAttr<OMPDeclareTargetDeclAttr>()) + return false; + + return d->hasAttr<DeviceKernelAttr>() || + (clang::isa<clang::FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()); +} + +/// Handle amdgpu-flat-work-group-size attribute. +static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, + CIRGenModule &cgm, + bool isOpenCLKernel) { + auto &builder = cgm.getBuilder(); + const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + const auto *reqdWGS = + cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + + if (flatWGS || reqdWGS) { + unsigned min = 0, max = 0; + if (flatWGS) { + min = flatWGS->getMin() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + max = flatWGS->getMax() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + } + if (reqdWGS && min == 0 && max == 0) { + min = max = reqdWGS->getXDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() * + reqdWGS->getYDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() * + reqdWGS->getZDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + } + if (min != 0) { + assert(min <= max && "Min must be less than or equal Max"); + std::string attrVal = llvm::utostr(min) + "," + llvm::utostr(max); + func->setAttr("cir.amdgpu-flat-work-group-size", + builder.getStringAttr(attrVal)); + } else { + assert(max == 0 && "Max must be zero"); + } + } else { + const unsigned defaultMax = + isOpenCLKernel ? 256 : cgm.getLangOpts().GPUMaxThreadsPerBlock; + std::string attrVal = std::string("1,") + llvm::utostr(defaultMax); + func->setAttr("cir.amdgpu-flat-work-group-size", + builder.getStringAttr(attrVal)); + } +} + +/// Handle amdgpu-waves-per-eu attribute. +static void handleAMDGPUWavesPerEUAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>(); + if (!attr) + return; + + auto &builder = cgm.getBuilder(); + unsigned min = + attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue(); + unsigned max = attr->getMax() + ? attr->getMax() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 0; + + if (min != 0) { + assert((max == 0 || min <= max) && "Min must be less than or equal Max"); + std::string attrVal = llvm::utostr(min); + if (max != 0) + attrVal = attrVal + "," + llvm::utostr(max); + func->setAttr("cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal)); + } else { + assert(max == 0 && "Max must be zero"); + } +} + +/// Handle amdgpu-num-sgpr attribute. +static void handleAMDGPUNumSGPRAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>(); + if (!attr) + return; + + uint32_t numSGPR = attr->getNumSGPR(); + if (numSGPR != 0) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-num-sgpr", + builder.getStringAttr(llvm::utostr(numSGPR))); + } +} + +/// Handle amdgpu-num-vgpr attribute. +static void handleAMDGPUNumVGPRAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>(); + if (!attr) + return; + + uint32_t numVGPR = attr->getNumVGPR(); + if (numVGPR != 0) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-num-vgpr", + builder.getStringAttr(llvm::utostr(numVGPR))); + } +} + +/// Handle amdgpu-max-num-workgroups attribute. +static void handleAMDGPUMaxNumWorkGroupsAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, + CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>(); + if (!attr) + return; + + auto &builder = cgm.getBuilder(); + uint32_t x = attr->getMaxNumWorkGroupsX() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + uint32_t y = attr->getMaxNumWorkGroupsY() + ? attr->getMaxNumWorkGroupsY() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 1; + uint32_t z = attr->getMaxNumWorkGroupsZ() + ? attr->getMaxNumWorkGroupsZ() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 1; + + llvm::SmallString<32> attrVal; + llvm::raw_svector_ostream os(attrVal); + os << x << ',' << y << ',' << z; + func->setAttr("cir.amdgpu-max-num-workgroups", + builder.getStringAttr(attrVal.str())); +} + +/// Handle amdgpu-cluster-dims attribute. +static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm, + bool isOpenCLKernel) { + auto &builder = cgm.getBuilder(); + + if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) { + auto getExprVal = [&](const Expr *e) { + return e ? e->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue() + : 1; + }; + unsigned x = getExprVal(attr->getX()); + unsigned y = getExprVal(attr->getY()); + unsigned z = getExprVal(attr->getZ()); + + llvm::SmallString<32> attrVal; + llvm::raw_svector_ostream os(attrVal); + os << x << ',' << y << ',' << z; + func->setAttr("cir.amdgpu-cluster-dims", + builder.getStringAttr(attrVal.str())); + } + + const clang::TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo(); + if ((isOpenCLKernel && + targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap, + "clusters")) || + fd->hasAttr<CUDANoClusterAttr>()) { + func->setAttr("cir.amdgpu-cluster-dims", builder.getStringAttr("0,0,0")); + } +} + +/// Handle amdgpu-ieee attribute. +static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm) { + if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false")); + } +} + +} // anonymous namespace + +void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, + cir::FuncOp func, + CIRGenModule &cgm) { + const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl); + if (!fd) + return; + + if (func.isDeclaration()) + return; + + // Set protected visibility for AMDGPU kernels + if (requiresAMDGPUProtectedVisibility(decl, func)) { + func.setGlobalVisibility(cir::VisibilityKind::Protected); + func.setDSOLocal(true); + } + + const bool isOpenCLKernel = + cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>(); + const bool isHIPKernel = + cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>(); + + if (!isOpenCLKernel && !isHIPKernel) + return; + + // Set HIP kernel calling convention + if (isHIPKernel) { + // TODO(CIR) : Add amdgpu calling conv. + func.setVisibility(mlir::SymbolTable::Visibility::Public); + func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( + func.getContext(), cir::GlobalLinkageKind::ExternalLinkage)); + } + + handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUWavesPerEUAttr(fd, func, cgm); + handleAMDGPUNumSGPRAttr(fd, func, cgm); + handleAMDGPUNumVGPRAttr(fd, func, cgm); + handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm); + handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUIEEEAttr(func, cgm); +} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp index 2a95cfb9371b1..dbedbb5647aa5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp @@ -15,9 +15,7 @@ #include "mlir/IR/DialectRegistry.h" #include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" -#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" -#include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Constant.h" #include "llvm/IR/GlobalVariable.h" @@ -54,7 +52,11 @@ class CIRDialectLLVMIRTranslationInterface mlir::Operation *op, llvm::ArrayRef<llvm::Instruction *> instructions, mlir::NamedAttribute attribute, mlir::LLVM::ModuleTranslation &moduleTranslation) const override { - if (auto mod = dyn_cast<mlir::ModuleOp>(op)) { + if (auto func = dyn_cast<mlir::LLVM::LLVMFuncOp>(op)) { + if (mlir::failed( + amendFunction(func, instructions, attribute, moduleTranslation))) + return mlir::failure(); + } else if (auto mod = dyn_cast<mlir::ModuleOp>(op)) { if (mlir::failed(amendModule(mod, attribute, moduleTranslation))) return mlir::failure(); } @@ -62,6 +64,25 @@ class CIRDialectLLVMIRTranslationInterface } private: + // Translate CIR function attributes to LLVM function attributes. + mlir::LogicalResult + amendFunction(mlir::LLVM::LLVMFuncOp func, + llvm::ArrayRef<llvm::Instruction *> instructions, + mlir::NamedAttribute attribute, + mlir::LLVM::ModuleTranslation &moduleTranslation) const { + llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName()); + llvm::StringRef attrName = attribute.getName().strref(); + + // Strip the "cir." prefix to get the LLVM attribute name. + llvm::StringRef llvmAttrName = attrName.substr(strlen("cir.")); + if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue())) { + llvmFunc->addFnAttr(llvmAttrName, strAttr.getValue()); + return mlir::success(); + } + + return mlir::success(); + } + // Translate CIR's module attributes to LLVM's module metadata mlir::LogicalResult amendModule(mlir::ModuleOp mod, mlir::NamedAttribute attribute, diff --git a/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip new file mode 100644 index 0000000000000..5a15f62899cf8 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip @@ -0,0 +1,82 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ogcg.ll + +// Test that AMDGPU-specific attributes are generated for HIP kernels + +// Test: Default attributes for simple kernel +// CIR: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-flat-work-group-size" = "1,1024" +// LLVM: define{{.*}} void @_Z13kernel_simplev(){{.*}} #[[SIMPLE_ATTR:[0-9]+]] +__global__ void kernel_simple() {} + +// Test: Explicit flat work group size attribute +// CIR: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-flat-work-group-size" = "64,128" +// LLVM: define{{.*}} void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[FLAT_WG_ATTR:[0-9]+]] +__attribute__((amdgpu_flat_work_group_size(64, 128))) +__global__ void kernel_flat_wg_size_1() {} + +// Test: Waves per EU attribute +// CIR: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-waves-per-eu" = "2" +// LLVM: define{{.*}} void @_Z19kernel_waves_per_euv(){{.*}} #[[WAVES_ATTR:[0-9]+]] +__attribute__((amdgpu_waves_per_eu(2))) +__global__ void kernel_waves_per_eu() {} + +// Test: Waves per EU with min and max +// CIR: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-waves-per-eu" = "2,4" +// LLVM: define{{.*}} void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[WAVES_MM_ATTR:[0-9]+]] +__attribute__((amdgpu_waves_per_eu(2, 4))) +__global__ void kernel_waves_per_eu_mm() {} + +// Test: Num SGPR attribute +// CIR: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-num-sgpr" = "32" +// LLVM: define{{.*}} void @_Z15kernel_num_sgprv(){{.*}} #[[SGPR_ATTR:[0-9]+]] +__attribute__((amdgpu_num_sgpr(32))) +__global__ void kernel_num_sgpr() {} + +// Test: Num VGPR attribute +// CIR: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-num-vgpr" = "64" +// LLVM: define{{.*}} void @_Z15kernel_num_vgprv(){{.*}} #[[VGPR_ATTR:[0-9]+]] +__attribute__((amdgpu_num_vgpr(64))) +__global__ void kernel_num_vgpr() {} + +// Test: Max num workgroups attribute +// CIR: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-max-num-workgroups" = "8,4,2" +// LLVM: define{{.*}} void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[MAX_WG_ATTR:[0-9]+]] +__attribute__((amdgpu_max_num_work_groups(8, 4, 2))) +__global__ void kernel_max_num_wgroups() {} + +// Test: Combined attributes +// CIR: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-flat-work-group-size" = "256,256"{{.*}}"cir.amdgpu-num-sgpr" = "48"{{.*}}"cir.amdgpu-num-vgpr" = "32"{{.*}}"cir.amdgpu-waves-per-eu" = "1,2" +// LLVM: define{{.*}} void @_Z15kernel_combinedv(){{.*}} #[[COMBINED_ATTR:[0-9]+]] +__attribute__((amdgpu_flat_work_group_size(256, 256))) +__attribute__((amdgpu_waves_per_eu(1, 2))) +__attribute__((amdgpu_num_sgpr(48))) +__attribute__((amdgpu_num_vgpr(32))) +__global__ void kernel_combined() {} + +// Test: Device function should NOT have kernel attributes +// CIR: cir.func{{.*}} @_Z9device_fnv() +// CIR-NOT: cir.amdgpu-flat-work-group-size +// LLVM: define{{.*}} void @_Z9device_fnv() +__device__ void device_fn() {} + +// Verify LLVM attributes +// LLVM-DAG: attributes #[[SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// LLVM-DAG: attributes #[[FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128" +// LLVM-DAG: attributes #[[WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2" +// LLVM-DAG: attributes #[[WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4" +// LLVM-DAG: attributes #[[SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32" +// LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64" +// LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2" +// LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
