https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/188007
>From 8e53f91820aa1158951de2ae7beac94fcaed0545 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Mon, 23 Mar 2026 15:20:23 +0530 Subject: [PATCH 1/3] [CIR][AMDGPU] Add AMDGPU-specific function attributes for HIP kernels --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 14 +- clang/lib/CIR/CodeGen/CMakeLists.txt | 1 + clang/lib/CIR/CodeGen/TargetInfo.cpp | 10 + clang/lib/CIR/CodeGen/TargetInfo.h | 5 + clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp | 256 ++++++++++++++++++ .../Lowering/DirectToLLVM/LowerToLLVMIR.cpp | 27 +- clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip | 82 ++++++ 7 files changed, 386 insertions(+), 9 deletions(-) create mode 100644 clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp create mode 100644 clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index f3ab733bf4c6a..4be669777bb26 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -669,7 +669,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *op) { assert(!cir::MissingFeatures::opFuncCPUAndFeaturesAttributes()); assert(!cir::MissingFeatures::opFuncSection()); - assert(!cir::MissingFeatures::setTargetAttributes()); + getTargetCIRGenInfo().setTargetAttributes(gd.getDecl(), op, *this); } std::optional<cir::SourceLanguage> CIRGenModule::getCIRSourceLanguage() const { @@ -2557,12 +2557,15 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, // represent them in dedicated ops. The correct attributes are ensured during // translation to LLVM. Thus, we don't need to check for them here. + const auto *funcDecl = cast<FunctionDecl>(globalDecl.getDecl()); + if (!isIncompleteFunction) setCIRFunctionAttributes(globalDecl, getTypes().arrangeGlobalDeclaration(globalDecl), func, isThunk); - assert(!cir::MissingFeatures::setTargetAttributes()); + if (!isIncompleteFunction && func.isDeclaration()) + getTargetCIRGenInfo().setTargetAttributes(funcDecl, func, *this); // TODO(cir): This needs a lot of work to better match CodeGen. That // ultimately ends up in setGlobalVisibility, which already has the linkage of @@ -2574,17 +2577,16 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, } // If we plan on emitting this inline builtin, we can't treat it as a builtin. - const auto *fd = cast<FunctionDecl>(globalDecl.getDecl()); - if (fd->isInlineBuiltinDeclaration()) { + if (funcDecl->isInlineBuiltinDeclaration()) { const FunctionDecl *fdBody; - bool hasBody = fd->hasBody(fdBody); + bool hasBody = funcDecl->hasBody(fdBody); (void)hasBody; assert(hasBody && "Inline builtin declarations should always have an " "available body!"); assert(!cir::MissingFeatures::attributeNoBuiltin()); } - if (fd->isReplaceableGlobalAllocationFunction()) { + if (funcDecl->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. func->setAttr(cir::CIRDialect::getNoBuiltinAttrName(), diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 8548cc8424527..9b8fdf551ef10 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -51,6 +51,7 @@ add_clang_library(clangCIR CIRGenTypes.cpp CIRGenVTables.cpp TargetInfo.cpp + Targets/AMDGPU.cpp DEPENDS MLIRCIR diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index ee68d9c329b83..3859588c5cfaf 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -1,6 +1,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" #include "CIRGenFunction.h" +#include "CIRGenModule.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -53,6 +54,15 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { public: AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {} + + void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global, + CIRGenModule &cgm) const override { + auto func = mlir::dyn_cast<cir::FuncOp>(global); + if (!func) + return; + + setAMDGPUTargetFunctionAttributes(decl, func, cgm); + } }; } // namespace diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index b397d8cd7fab8..868af0e8343fb 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "clang/Basic/AddressSpaces.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include <memory> @@ -135,6 +136,10 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt); +/// Set AMDGPU-specific function attributes for HIP kernels. +void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, + cir::FuncOp func, CIRGenModule &cgm); + std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt); std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt); diff --git a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp new file mode 100644 index 0000000000000..280cb6ae5865c --- /dev/null +++ b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp @@ -0,0 +1,256 @@ +//===---- AMDGPU.cpp - AMDGPU-specific CIR CodeGen ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides AMDGPU-specific CIR CodeGen logic for function attributes. +// +//===----------------------------------------------------------------------===// + +#include "../CIRGenModule.h" +#include "../TargetInfo.h" + +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::CIRGen; + +namespace { + +/// Check if AMDGPU protected visibility is required. +static bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, + cir::FuncOp func) { + if (func.getGlobalVisibility() != cir::VisibilityKind::Hidden) + return false; + + if (d->hasAttr<OMPDeclareTargetDeclAttr>()) + return false; + + return d->hasAttr<DeviceKernelAttr>() || + (clang::isa<clang::FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()); +} + +/// Handle amdgpu-flat-work-group-size attribute. +static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, + CIRGenModule &cgm, + bool isOpenCLKernel) { + auto &builder = cgm.getBuilder(); + const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + const auto *reqdWGS = + cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + + if (flatWGS || reqdWGS) { + unsigned min = 0, max = 0; + if (flatWGS) { + min = flatWGS->getMin() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + max = flatWGS->getMax() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + } + if (reqdWGS && min == 0 && max == 0) { + min = max = reqdWGS->getXDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() * + reqdWGS->getYDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() * + reqdWGS->getZDim() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + } + if (min != 0) { + assert(min <= max && "Min must be less than or equal Max"); + std::string attrVal = llvm::utostr(min) + "," + llvm::utostr(max); + func->setAttr("cir.amdgpu-flat-work-group-size", + builder.getStringAttr(attrVal)); + } else { + assert(max == 0 && "Max must be zero"); + } + } else { + const unsigned defaultMax = + isOpenCLKernel ? 256 : cgm.getLangOpts().GPUMaxThreadsPerBlock; + std::string attrVal = std::string("1,") + llvm::utostr(defaultMax); + func->setAttr("cir.amdgpu-flat-work-group-size", + builder.getStringAttr(attrVal)); + } +} + +/// Handle amdgpu-waves-per-eu attribute. +static void handleAMDGPUWavesPerEUAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>(); + if (!attr) + return; + + auto &builder = cgm.getBuilder(); + unsigned min = + attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue(); + unsigned max = attr->getMax() + ? attr->getMax() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 0; + + if (min != 0) { + assert((max == 0 || min <= max) && "Min must be less than or equal Max"); + std::string attrVal = llvm::utostr(min); + if (max != 0) + attrVal = attrVal + "," + llvm::utostr(max); + func->setAttr("cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal)); + } else { + assert(max == 0 && "Max must be zero"); + } +} + +/// Handle amdgpu-num-sgpr attribute. +static void handleAMDGPUNumSGPRAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>(); + if (!attr) + return; + + uint32_t numSGPR = attr->getNumSGPR(); + if (numSGPR != 0) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-num-sgpr", + builder.getStringAttr(llvm::utostr(numSGPR))); + } +} + +/// Handle amdgpu-num-vgpr attribute. +static void handleAMDGPUNumVGPRAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>(); + if (!attr) + return; + + uint32_t numVGPR = attr->getNumVGPR(); + if (numVGPR != 0) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-num-vgpr", + builder.getStringAttr(llvm::utostr(numVGPR))); + } +} + +/// Handle amdgpu-max-num-workgroups attribute. +static void handleAMDGPUMaxNumWorkGroupsAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, + CIRGenModule &cgm) { + const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>(); + if (!attr) + return; + + auto &builder = cgm.getBuilder(); + uint32_t x = attr->getMaxNumWorkGroupsX() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue(); + uint32_t y = attr->getMaxNumWorkGroupsY() + ? attr->getMaxNumWorkGroupsY() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 1; + uint32_t z = attr->getMaxNumWorkGroupsZ() + ? attr->getMaxNumWorkGroupsZ() + ->EvaluateKnownConstInt(cgm.getASTContext()) + .getExtValue() + : 1; + + llvm::SmallString<32> attrVal; + llvm::raw_svector_ostream os(attrVal); + os << x << ',' << y << ',' << z; + func->setAttr("cir.amdgpu-max-num-workgroups", + builder.getStringAttr(attrVal.str())); +} + +/// Handle amdgpu-cluster-dims attribute. +static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd, + cir::FuncOp func, CIRGenModule &cgm, + bool isOpenCLKernel) { + auto &builder = cgm.getBuilder(); + + if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) { + auto getExprVal = [&](const Expr *e) { + return e ? e->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue() + : 1; + }; + unsigned x = getExprVal(attr->getX()); + unsigned y = getExprVal(attr->getY()); + unsigned z = getExprVal(attr->getZ()); + + llvm::SmallString<32> attrVal; + llvm::raw_svector_ostream os(attrVal); + os << x << ',' << y << ',' << z; + func->setAttr("cir.amdgpu-cluster-dims", + builder.getStringAttr(attrVal.str())); + } + + const clang::TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo(); + if ((isOpenCLKernel && + targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap, + "clusters")) || + fd->hasAttr<CUDANoClusterAttr>()) { + func->setAttr("cir.amdgpu-cluster-dims", builder.getStringAttr("0,0,0")); + } +} + +/// Handle amdgpu-ieee attribute. +static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm) { + if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false")); + } +} + +} // anonymous namespace + +void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, + cir::FuncOp func, + CIRGenModule &cgm) { + const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl); + if (!fd) + return; + + if (func.isDeclaration()) + return; + + // Set protected visibility for AMDGPU kernels + if (requiresAMDGPUProtectedVisibility(decl, func)) { + func.setGlobalVisibility(cir::VisibilityKind::Protected); + func.setDSOLocal(true); + } + + const bool isOpenCLKernel = + cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>(); + const bool isHIPKernel = + cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>(); + + if (!isOpenCLKernel && !isHIPKernel) + return; + + // Set HIP kernel calling convention + if (isHIPKernel) { + // TODO(CIR) : Add amdgpu calling conv. + func.setVisibility(mlir::SymbolTable::Visibility::Public); + func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( + func.getContext(), cir::GlobalLinkageKind::ExternalLinkage)); + } + + handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUWavesPerEUAttr(fd, func, cgm); + handleAMDGPUNumSGPRAttr(fd, func, cgm); + handleAMDGPUNumVGPRAttr(fd, func, cgm); + handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm); + handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUIEEEAttr(func, cgm); +} diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp index 2a95cfb9371b1..dbedbb5647aa5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp @@ -15,9 +15,7 @@ #include "mlir/IR/DialectRegistry.h" #include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" -#include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" -#include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Constant.h" #include "llvm/IR/GlobalVariable.h" @@ -54,7 +52,11 @@ class CIRDialectLLVMIRTranslationInterface mlir::Operation *op, llvm::ArrayRef<llvm::Instruction *> instructions, mlir::NamedAttribute attribute, mlir::LLVM::ModuleTranslation &moduleTranslation) const override { - if (auto mod = dyn_cast<mlir::ModuleOp>(op)) { + if (auto func = dyn_cast<mlir::LLVM::LLVMFuncOp>(op)) { + if (mlir::failed( + amendFunction(func, instructions, attribute, moduleTranslation))) + return mlir::failure(); + } else if (auto mod = dyn_cast<mlir::ModuleOp>(op)) { if (mlir::failed(amendModule(mod, attribute, moduleTranslation))) return mlir::failure(); } @@ -62,6 +64,25 @@ class CIRDialectLLVMIRTranslationInterface } private: + // Translate CIR function attributes to LLVM function attributes. + mlir::LogicalResult + amendFunction(mlir::LLVM::LLVMFuncOp func, + llvm::ArrayRef<llvm::Instruction *> instructions, + mlir::NamedAttribute attribute, + mlir::LLVM::ModuleTranslation &moduleTranslation) const { + llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName()); + llvm::StringRef attrName = attribute.getName().strref(); + + // Strip the "cir." prefix to get the LLVM attribute name. + llvm::StringRef llvmAttrName = attrName.substr(strlen("cir.")); + if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue())) { + llvmFunc->addFnAttr(llvmAttrName, strAttr.getValue()); + return mlir::success(); + } + + return mlir::success(); + } + // Translate CIR's module attributes to LLVM's module metadata mlir::LogicalResult amendModule(mlir::ModuleOp mod, mlir::NamedAttribute attribute, diff --git a/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip new file mode 100644 index 0000000000000..5a15f62899cf8 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip @@ -0,0 +1,82 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll +// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ogcg.ll + +// Test that AMDGPU-specific attributes are generated for HIP kernels + +// Test: Default attributes for simple kernel +// CIR: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-flat-work-group-size" = "1,1024" +// LLVM: define{{.*}} void @_Z13kernel_simplev(){{.*}} #[[SIMPLE_ATTR:[0-9]+]] +__global__ void kernel_simple() {} + +// Test: Explicit flat work group size attribute +// CIR: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-flat-work-group-size" = "64,128" +// LLVM: define{{.*}} void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[FLAT_WG_ATTR:[0-9]+]] +__attribute__((amdgpu_flat_work_group_size(64, 128))) +__global__ void kernel_flat_wg_size_1() {} + +// Test: Waves per EU attribute +// CIR: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-waves-per-eu" = "2" +// LLVM: define{{.*}} void @_Z19kernel_waves_per_euv(){{.*}} #[[WAVES_ATTR:[0-9]+]] +__attribute__((amdgpu_waves_per_eu(2))) +__global__ void kernel_waves_per_eu() {} + +// Test: Waves per EU with min and max +// CIR: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-waves-per-eu" = "2,4" +// LLVM: define{{.*}} void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[WAVES_MM_ATTR:[0-9]+]] +__attribute__((amdgpu_waves_per_eu(2, 4))) +__global__ void kernel_waves_per_eu_mm() {} + +// Test: Num SGPR attribute +// CIR: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-num-sgpr" = "32" +// LLVM: define{{.*}} void @_Z15kernel_num_sgprv(){{.*}} #[[SGPR_ATTR:[0-9]+]] +__attribute__((amdgpu_num_sgpr(32))) +__global__ void kernel_num_sgpr() {} + +// Test: Num VGPR attribute +// CIR: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-num-vgpr" = "64" +// LLVM: define{{.*}} void @_Z15kernel_num_vgprv(){{.*}} #[[VGPR_ATTR:[0-9]+]] +__attribute__((amdgpu_num_vgpr(64))) +__global__ void kernel_num_vgpr() {} + +// Test: Max num workgroups attribute +// CIR: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-max-num-workgroups" = "8,4,2" +// LLVM: define{{.*}} void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[MAX_WG_ATTR:[0-9]+]] +__attribute__((amdgpu_max_num_work_groups(8, 4, 2))) +__global__ void kernel_max_num_wgroups() {} + +// Test: Combined attributes +// CIR: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-flat-work-group-size" = "256,256"{{.*}}"cir.amdgpu-num-sgpr" = "48"{{.*}}"cir.amdgpu-num-vgpr" = "32"{{.*}}"cir.amdgpu-waves-per-eu" = "1,2" +// LLVM: define{{.*}} void @_Z15kernel_combinedv(){{.*}} #[[COMBINED_ATTR:[0-9]+]] +__attribute__((amdgpu_flat_work_group_size(256, 256))) +__attribute__((amdgpu_waves_per_eu(1, 2))) +__attribute__((amdgpu_num_sgpr(48))) +__attribute__((amdgpu_num_vgpr(32))) +__global__ void kernel_combined() {} + +// Test: Device function should NOT have kernel attributes +// CIR: cir.func{{.*}} @_Z9device_fnv() +// CIR-NOT: cir.amdgpu-flat-work-group-size +// LLVM: define{{.*}} void @_Z9device_fnv() +__device__ void device_fn() {} + +// Verify LLVM attributes +// LLVM-DAG: attributes #[[SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// LLVM-DAG: attributes #[[FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128" +// LLVM-DAG: attributes #[[WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2" +// LLVM-DAG: attributes #[[WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4" +// LLVM-DAG: attributes #[[SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32" +// LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64" +// LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2" +// LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2" >From 682aedb605f1c8f36c2be1c9003a9f5603671230 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Mon, 23 Mar 2026 16:04:13 +0530 Subject: [PATCH 2/3] add support for amdgpu-expand-waitcnt-profiling --- clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp | 76 +++++++++++-------- .../Lowering/DirectToLLVM/LowerToLLVMIR.cpp | 5 +- clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip | 16 ++++ 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp index 280cb6ae5865c..b1d92c40d6178 100644 --- a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp @@ -42,7 +42,8 @@ static bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, cir::FuncOp func, CIRGenModule &cgm, - bool isOpenCLKernel) { + bool isOpenCLKernel, + bool isHIPKernel) { auto &builder = cgm.getBuilder(); const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); const auto *reqdWGS = @@ -77,10 +78,15 @@ static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, } else { assert(max == 0 && "Max must be zero"); } - } else { - const unsigned defaultMax = - isOpenCLKernel ? 256 : cgm.getLangOpts().GPUMaxThreadsPerBlock; - std::string attrVal = std::string("1,") + llvm::utostr(defaultMax); + } else if (isOpenCLKernel || isHIPKernel) { + // By default, restrict the maximum size to a value specified by + // --gpu-max-threads-per-block=n or its default value for HIP. + const unsigned openCLDefaultMaxWorkGroupSize = 256; + const unsigned defaultMaxWorkGroupSize = + isOpenCLKernel ? openCLDefaultMaxWorkGroupSize + : cgm.getLangOpts().GPUMaxThreadsPerBlock; + std::string attrVal = + std::string("1,") + llvm::utostr(defaultMaxWorkGroupSize); func->setAttr("cir.amdgpu-flat-work-group-size", builder.getStringAttr(attrVal)); } @@ -212,45 +218,51 @@ static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm) { } } +/// Handle amdgpu-expand-waitcnt-profiling attribute. +static void handleAMDGPUExpandWaitcntProfilingAttr(cir::FuncOp func, + CIRGenModule &cgm) { + if (cgm.getCodeGenOpts().AMDGPUExpandWaitcntProfiling) { + auto &builder = cgm.getBuilder(); + func->setAttr("cir.amdgpu-expand-waitcnt-profiling", + builder.getStringAttr("")); + } +} + } // anonymous namespace void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, cir::FuncOp func, CIRGenModule &cgm) { - const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl); - if (!fd) - return; - - if (func.isDeclaration()) - return; - - // Set protected visibility for AMDGPU kernels if (requiresAMDGPUProtectedVisibility(decl, func)) { func.setGlobalVisibility(cir::VisibilityKind::Protected); func.setDSOLocal(true); } - const bool isOpenCLKernel = - cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>(); - const bool isHIPKernel = - cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>(); - - if (!isOpenCLKernel && !isHIPKernel) + if (func.isDeclaration()) return; - // Set HIP kernel calling convention - if (isHIPKernel) { - // TODO(CIR) : Add amdgpu calling conv. - func.setVisibility(mlir::SymbolTable::Visibility::Public); - func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( - func.getContext(), cir::GlobalLinkageKind::ExternalLinkage)); - } + const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl); + if (fd) { + const bool isOpenCLKernel = + cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>(); + const bool isHIPKernel = + cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>(); + + if (isHIPKernel) { + // TODO(CIR) : Add amdgpu calling conv. + func.setVisibility(mlir::SymbolTable::Visibility::Public); + func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( + func.getContext(), cir::GlobalLinkageKind::ExternalLinkage)); + } - handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel); - handleAMDGPUWavesPerEUAttr(fd, func, cgm); - handleAMDGPUNumSGPRAttr(fd, func, cgm); - handleAMDGPUNumVGPRAttr(fd, func, cgm); - handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm); - handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel, + isHIPKernel); + handleAMDGPUWavesPerEUAttr(fd, func, cgm); + handleAMDGPUNumSGPRAttr(fd, func, cgm); + handleAMDGPUNumVGPRAttr(fd, func, cgm); + handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm); + handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel); + } handleAMDGPUIEEEAttr(func, cgm); + handleAMDGPUExpandWaitcntProfilingAttr(func, cgm); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp index dbedbb5647aa5..dbcd0aed88056 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp @@ -75,11 +75,8 @@ class CIRDialectLLVMIRTranslationInterface // Strip the "cir." prefix to get the LLVM attribute name. llvm::StringRef llvmAttrName = attrName.substr(strlen("cir.")); - if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue())) { + if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue())) llvmFunc->addFnAttr(llvmAttrName, strAttr.getValue()); - return mlir::success(); - } - return mlir::success(); } diff --git a/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip index 5a15f62899cf8..8c020ccf9c58b 100644 --- a/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip +++ b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip @@ -13,6 +13,11 @@ // RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll // RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ogcg.ll +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -mamdgpu-expand-waitcnt-profiling \ +// RUN: -emit-cir %s -o %t.prof.cir +// RUN: FileCheck --check-prefix=PROF %s --input-file=%t.prof.cir + // Test that AMDGPU-specific attributes are generated for HIP kernels // Test: Default attributes for simple kernel @@ -80,3 +85,14 @@ __device__ void device_fn() {} // LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64" // LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2" // LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2" + +// Test: amdgpu-expand-waitcnt-profiling is set on all functions when enabled +// PROF: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" +// PROF: cir.func{{.*}} @_Z9device_fnv(){{.*}}"cir.amdgpu-expand-waitcnt-profiling" >From ad1bd2085fb1af97c24978736ab6f682d94ca029 Mon Sep 17 00:00:00 2001 From: skc7 <[email protected]> Date: Tue, 24 Mar 2026 11:13:37 +0530 Subject: [PATCH 3/3] update requiresAMDGPUProtectedVisibility and other minor fixes --- clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/TargetInfo.cpp | 17 +++- clang/lib/CIR/CodeGen/TargetInfo.h | 4 + clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp | 115 ++++++++++------------ 4 files changed, 67 insertions(+), 70 deletions(-) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 68db08a5580ca..5051044eb5d50 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -327,7 +327,6 @@ struct MissingFeatures { static bool setDLLStorageClass() { return false; } static bool setNonGC() { return false; } static bool setObjCGCLValueClass() { return false; } - static bool setTargetAttributes() { return false; } static bool shouldSplitConstantStore() { return false; } static bool shouldUseBZeroPlusStoresToInitialize() { return false; } static bool shouldUseMemSetToInitialize() { return false; } diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 3859588c5cfaf..a5cc74b18a8a0 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -57,11 +57,18 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo { void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global, CIRGenModule &cgm) const override { - auto func = mlir::dyn_cast<cir::FuncOp>(global); - if (!func) - return; - - setAMDGPUTargetFunctionAttributes(decl, func, cgm); + if (auto func = mlir::dyn_cast<cir::FuncOp>(global)) { + if (requiresAMDGPUProtectedVisibility(decl, func.getGlobalVisibility())) { + func.setGlobalVisibility(cir::VisibilityKind::Protected); + func.setDSOLocal(true); + } + setAMDGPUTargetFunctionAttributes(decl, func, cgm); + } else if (auto gv = mlir::dyn_cast<cir::GlobalOp>(global)) { + if (requiresAMDGPUProtectedVisibility(decl, gv.getGlobalVisibility())) { + gv.setGlobalVisibility(cir::VisibilityKind::Protected); + gv.setDSOLocal(true); + } + } } }; diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 868af0e8343fb..5e0103093827b 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -136,6 +136,10 @@ class TargetCIRGenInfo { std::unique_ptr<TargetCIRGenInfo> createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt); +/// Check if AMDGPU protected visibility is required. +bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, + cir::VisibilityKind visibility); + /// Set AMDGPU-specific function attributes for HIP kernels. void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, cir::FuncOp func, CIRGenModule &cgm); diff --git a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp index b1d92c40d6178..566bca14c4d59 100644 --- a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp @@ -23,28 +23,27 @@ using namespace clang; using namespace clang::CIRGen; -namespace { - -/// Check if AMDGPU protected visibility is required. -static bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, - cir::FuncOp func) { - if (func.getGlobalVisibility() != cir::VisibilityKind::Hidden) - return false; - - if (d->hasAttr<OMPDeclareTargetDeclAttr>()) +bool clang::CIRGen::requiresAMDGPUProtectedVisibility( + const Decl *d, cir::VisibilityKind visibility) { + if (visibility != cir::VisibilityKind::Hidden) return false; - return d->hasAttr<DeviceKernelAttr>() || - (clang::isa<clang::FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()); + return !d->hasAttr<OMPDeclareTargetDeclAttr>() && + (d->hasAttr<DeviceKernelAttr>() || + (isa<FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()) || + (isa<VarDecl>(d) && + (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() || + cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinSurfaceType() || + cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinTextureType()))); } +namespace { + /// Handle amdgpu-flat-work-group-size attribute. -static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, - cir::FuncOp func, - CIRGenModule &cgm, - bool isOpenCLKernel, - bool isHIPKernel) { - auto &builder = cgm.getBuilder(); +static void +handleAMDGPUFlatWorkGroupSizeAttr(const FunctionDecl *fd, cir::FuncOp func, + CIRGenModule &cgm, CIRGenBuilderTy &builder, + bool isOpenCLKernel, bool isHIPKernel) { const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); const auto *reqdWGS = cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; @@ -93,13 +92,12 @@ static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd, } /// Handle amdgpu-waves-per-eu attribute. -static void handleAMDGPUWavesPerEUAttr(const clang::FunctionDecl *fd, - cir::FuncOp func, CIRGenModule &cgm) { +static void handleAMDGPUWavesPerEUAttr(const FunctionDecl *fd, cir::FuncOp func, + CIRGenModule &cgm, + CIRGenBuilderTy &builder) { const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>(); if (!attr) return; - - auto &builder = cgm.getBuilder(); unsigned min = attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue(); unsigned max = attr->getMax() @@ -120,44 +118,43 @@ static void handleAMDGPUWavesPerEUAttr(const clang::FunctionDecl *fd, } /// Handle amdgpu-num-sgpr attribute. -static void handleAMDGPUNumSGPRAttr(const clang::FunctionDecl *fd, - cir::FuncOp func, CIRGenModule &cgm) { +static void handleAMDGPUNumSGPRAttr(const FunctionDecl *fd, cir::FuncOp func, + CIRGenModule &cgm, + CIRGenBuilderTy &builder) { const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>(); if (!attr) return; uint32_t numSGPR = attr->getNumSGPR(); if (numSGPR != 0) { - auto &builder = cgm.getBuilder(); func->setAttr("cir.amdgpu-num-sgpr", builder.getStringAttr(llvm::utostr(numSGPR))); } } /// Handle amdgpu-num-vgpr attribute. -static void handleAMDGPUNumVGPRAttr(const clang::FunctionDecl *fd, - cir::FuncOp func, CIRGenModule &cgm) { +static void handleAMDGPUNumVGPRAttr(const FunctionDecl *fd, cir::FuncOp func, + CIRGenModule &cgm, + CIRGenBuilderTy &builder) { const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>(); if (!attr) return; uint32_t numVGPR = attr->getNumVGPR(); if (numVGPR != 0) { - auto &builder = cgm.getBuilder(); func->setAttr("cir.amdgpu-num-vgpr", builder.getStringAttr(llvm::utostr(numVGPR))); } } /// Handle amdgpu-max-num-workgroups attribute. -static void handleAMDGPUMaxNumWorkGroupsAttr(const clang::FunctionDecl *fd, +static void handleAMDGPUMaxNumWorkGroupsAttr(const FunctionDecl *fd, cir::FuncOp func, - CIRGenModule &cgm) { + CIRGenModule &cgm, + CIRGenBuilderTy &builder) { const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>(); if (!attr) return; - - auto &builder = cgm.getBuilder(); uint32_t x = attr->getMaxNumWorkGroupsX() ->EvaluateKnownConstInt(cgm.getASTContext()) .getExtValue(); @@ -180,10 +177,10 @@ static void handleAMDGPUMaxNumWorkGroupsAttr(const clang::FunctionDecl *fd, } /// Handle amdgpu-cluster-dims attribute. -static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd, +static void handleAMDGPUClusterDimsAttr(const FunctionDecl *fd, cir::FuncOp func, CIRGenModule &cgm, + CIRGenBuilderTy &builder, bool isOpenCLKernel) { - auto &builder = cgm.getBuilder(); if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) { auto getExprVal = [&](const Expr *e) { @@ -201,7 +198,7 @@ static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd, builder.getStringAttr(attrVal.str())); } - const clang::TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo(); + const TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo(); if ((isOpenCLKernel && targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap, "clusters")) || @@ -211,58 +208,48 @@ static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd, } /// Handle amdgpu-ieee attribute. -static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm) { - if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts) { - auto &builder = cgm.getBuilder(); +static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm, + CIRGenBuilderTy &builder) { + if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts) func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false")); - } } /// Handle amdgpu-expand-waitcnt-profiling attribute. static void handleAMDGPUExpandWaitcntProfilingAttr(cir::FuncOp func, - CIRGenModule &cgm) { - if (cgm.getCodeGenOpts().AMDGPUExpandWaitcntProfiling) { - auto &builder = cgm.getBuilder(); + CIRGenModule &cgm, + CIRGenBuilderTy &builder) { + if (cgm.getCodeGenOpts().AMDGPUExpandWaitcntProfiling) func->setAttr("cir.amdgpu-expand-waitcnt-profiling", builder.getStringAttr("")); - } } -} // anonymous namespace +} // namespace -void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, +void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const Decl *decl, cir::FuncOp func, CIRGenModule &cgm) { - if (requiresAMDGPUProtectedVisibility(decl, func)) { - func.setGlobalVisibility(cir::VisibilityKind::Protected); - func.setDSOLocal(true); - } - if (func.isDeclaration()) return; - const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl); + CIRGenBuilderTy &builder = cgm.getBuilder(); + + const auto *fd = dyn_cast_or_null<FunctionDecl>(decl); if (fd) { const bool isOpenCLKernel = cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>(); const bool isHIPKernel = cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>(); - if (isHIPKernel) { - // TODO(CIR) : Add amdgpu calling conv. - func.setVisibility(mlir::SymbolTable::Visibility::Public); - func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( - func.getContext(), cir::GlobalLinkageKind::ExternalLinkage)); - } + // TODO(CIR): Set amdgpu_kernel calling convention for HIP kernels. - handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel, + handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, builder, isOpenCLKernel, isHIPKernel); - handleAMDGPUWavesPerEUAttr(fd, func, cgm); - handleAMDGPUNumSGPRAttr(fd, func, cgm); - handleAMDGPUNumVGPRAttr(fd, func, cgm); - handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm); - handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel); + handleAMDGPUWavesPerEUAttr(fd, func, cgm, builder); + handleAMDGPUNumSGPRAttr(fd, func, cgm, builder); + handleAMDGPUNumVGPRAttr(fd, func, cgm, builder); + handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm, builder); + handleAMDGPUClusterDimsAttr(fd, func, cgm, builder, isOpenCLKernel); } - handleAMDGPUIEEEAttr(func, cgm); - handleAMDGPUExpandWaitcntProfilingAttr(func, cgm); + handleAMDGPUIEEEAttr(func, cgm, builder); + handleAMDGPUExpandWaitcntProfilingAttr(func, cgm, builder); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
