https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/115821
>From 107f8dbc6b2fa157ce3936a086093882012b9d62 Mon Sep 17 00:00:00 2001 From: anikelal <anike...@amd.com> Date: Mon, 25 Nov 2024 14:18:36 +0530 Subject: [PATCH 1/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel This feature is currently not supported in the compiler. To facilitate this we emit a stub version of each kernel function body with different name mangling scheme, and replaces the respective kernel call-sites appropriately. Fixes https://github.com/llvm/llvm-project/issues/60313 D120566 was an earlier attempt made to upstream a solution for this issue. --- clang/include/clang/AST/GlobalDecl.h | 37 +++++++++---- clang/lib/AST/Expr.cpp | 3 +- clang/lib/AST/ItaniumMangle.cpp | 15 ++++++ clang/lib/AST/Mangle.cpp | 2 +- clang/lib/AST/MicrosoftMangle.cpp | 6 +++ clang/lib/CodeGen/CGBlocks.cpp | 16 +++--- clang/lib/CodeGen/CGCall.cpp | 11 +++- clang/lib/CodeGen/CGExpr.cpp | 5 +- clang/lib/CodeGen/CGOpenCLRuntime.cpp | 11 +++- clang/lib/CodeGen/CGOpenCLRuntime.h | 4 +- clang/lib/CodeGen/CodeGenModule.cpp | 7 +++ .../test/CodeGenOpenCL/opencl-kernel-call.cl | 43 +++++++++++++++ clang/test/CodeGenOpenCL/reflect.cl | 2 +- clang/test/CodeGenOpenCL/spir-calling-conv.cl | 4 +- clang/test/CodeGenOpenCL/visibility.cl | 53 ++++++++++++++----- 15 files changed, 178 insertions(+), 41 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/opencl-kernel-call.cl diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index 386693cabb1fbb..8a9f4b4c60e5e5 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -71,6 +71,10 @@ class GlobalDecl { GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0) : MultiVersionIndex(MVIndex) { if (!D->hasAttr<CUDAGlobalAttr>()) { + if (D->hasAttr<OpenCLKernelAttr>()) { + Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel)); + return; + } Init(D); return; } @@ -78,7 +82,8 @@ class GlobalDecl { } GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind) : Value(D, unsigned(Kind)) { - assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!"); + assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") || + (D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!")); } GlobalDecl(const NamedDecl *D) { Init(D); } GlobalDecl(const BlockDecl *D) { Init(D); } @@ -130,13 +135,15 @@ class GlobalDecl { } KernelReferenceKind getKernelReferenceKind() const { - assert(((isa<FunctionDecl>(getDecl()) && - cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) || - (isa<FunctionTemplateDecl>(getDecl()) && - cast<FunctionTemplateDecl>(getDecl()) - ->getTemplatedDecl() - ->hasAttr<CUDAGlobalAttr>())) && - "Decl is not a GPU kernel!"); + assert((((isa<FunctionDecl>(getDecl()) && + cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) || + (isa<FunctionTemplateDecl>(getDecl()) && + cast<FunctionTemplateDecl>(getDecl()) + ->getTemplatedDecl() + ->hasAttr<CUDAGlobalAttr>())) && + "Decl is not a GPU kernel!") || + (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!")); + return static_cast<KernelReferenceKind>(Value.getInt()); } @@ -196,13 +203,21 @@ class GlobalDecl { } GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) { - assert(isa<FunctionDecl>(getDecl()) && - cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && - "Decl is not a GPU kernel!"); + assert((isa<FunctionDecl>(getDecl()) && + cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && + "Decl is not a GPU kernel!") || + (isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!")); GlobalDecl Result(*this); Result.Value.setInt(unsigned(Kind)); return Result; } + + bool isDeclOpenCLKernel() const { + auto FD = dyn_cast<FunctionDecl>(getDecl()); + if (FD) + return FD->hasAttr<OpenCLKernelAttr>(); + return FD; + } }; } // namespace clang diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index a4fb4d5a1f2ec4..ddd88ac6a21050 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -693,7 +693,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, GD = GlobalDecl(CD, Ctor_Base); else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND)) GD = GlobalDecl(DD, Dtor_Base); - else if (ND->hasAttr<CUDAGlobalAttr>()) + else if (ND->hasAttr<CUDAGlobalAttr>() || + ND->hasAttr<OpenCLKernelAttr>()) GD = GlobalDecl(cast<FunctionDecl>(ND)); else GD = GlobalDecl(ND); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 27a993a631dae9..7e46d6c520fb69 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -526,6 +526,7 @@ class CXXNameMangler { void mangleSourceName(const IdentifierInfo *II); void mangleRegCallName(const IdentifierInfo *II); void mangleDeviceStubName(const IdentifierInfo *II); + void mangleOCLDeviceStubName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(GlobalDecl GD, @@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName( bool IsDeviceStub = FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; + bool IsOCLDeviceStub = + FD && FD->hasAttr<OpenCLKernelAttr>() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleDeviceStubName(II); + else if (IsOCLDeviceStub) + mangleOCLDeviceStubName(II); else if (IsRegCall) mangleRegCallName(II); else @@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { << II->getName(); } +void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) { + // <source-name> ::= <positive length number> __clang_ocl_kern_imp_ + // <identifier> <number> ::= [n] <non-negative decimal integer> <identifier> + // ::= <unqualified source code identifier> + StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_"; + Out << II->getLength() + OCLDeviceStubNamePrefix.size() - 1 + << OCLDeviceStubNamePrefix << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // <source-name> ::= <positive length number> <identifier> // <number> ::= [n] <non-negative decimal integer> diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 15be9c62bf8880..1d1c4dd0e39b7a 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -539,7 +539,7 @@ class ASTNameGenerator::Implementation { GD = GlobalDecl(CtorD, Ctor_Complete); else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D)) GD = GlobalDecl(DtorD, Dtor_Complete); - else if (D->hasAttr<CUDAGlobalAttr>()) + else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>()) GD = GlobalDecl(cast<FunctionDecl>(D)); else GD = GlobalDecl(D); diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 94a7ce6c1321d3..e439875a2538ba 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1161,9 +1161,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, ->getTemplatedDecl() ->hasAttr<CUDAGlobalAttr>())) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; + bool IsOCLDeviceStub = + ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleSourceName( (llvm::Twine("__device_stub__") + II->getName()).str()); + else if (IsOCLDeviceStub) + mangleSourceName( + (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str()); else mangleSourceName(II->getName()); break; diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index a7584a95c8ca7b..7f98a897c36907 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -48,7 +48,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name) BlockByrefHelpers::~BlockByrefHelpers() {} /// Build the given block as a global block. -static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, +static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD, const CGBlockInfo &blockInfo, llvm::Constant *blockFn); @@ -1085,8 +1085,10 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); if (IsOpenCL) { - CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, - result, blockInfo.StructureType); + CGM.getOpenCLRuntime().recordBlockInfo( + blockInfo.BlockExpression, InvokeFn, result, blockInfo.StructureType, + CurGD && CurGD.isDeclOpenCLKernel() && + (CurGD.getKernelReferenceKind() == KernelReferenceKind::Kernel)); } return result; @@ -1285,7 +1287,7 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE, return getAddrOfGlobalBlockIfEmitted(BE); } -static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, +static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, GlobalDecl GD, const CGBlockInfo &blockInfo, llvm::Constant *blockFn) { assert(blockInfo.CanBeGlobal); @@ -1378,7 +1380,9 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, CGM.getOpenCLRuntime().recordBlockInfo( blockInfo.BlockExpression, cast<llvm::Function>(blockFn->stripPointerCasts()), Result, - literal->getValueType()); + literal->getValueType(), + GD && GD.isDeclOpenCLKernel() && + (GD.getKernelReferenceKind() == KernelReferenceKind::Kernel)); return Result; } @@ -1487,7 +1491,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction( auto GenVoidPtrTy = getContext().getLangOpts().OpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; - buildGlobalBlock(CGM, blockInfo, + buildGlobalBlock(CGM, CurGD, blockInfo, llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy)); } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 20455dbb820914..215fe5bd9da8dd 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2343,6 +2343,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // Collect function IR attributes from the CC lowering. // We'll collect the paramete and result attributes later. CallingConv = FI.getEffectiveCallingConvention(); + GlobalDecl GD = CalleeInfo.getCalleeDecl(); + const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); + if (TargetDecl) { + if (auto FD = dyn_cast<FunctionDecl>(TargetDecl)) { + if (FD->hasAttr<OpenCLKernelAttr>() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) + CallingConv = llvm::CallingConv::C; + } + } if (FI.isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); if (FI.isCmseNSCall()) @@ -2352,8 +2361,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); - const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); - // Attach assumption attributes to the declaration. If this is a call // site, attach assumptions from the caller to the call as well. AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 229f0e29f02341..e2222a6393b9b1 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5692,7 +5692,10 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { // Resolve direct calls. } else if (auto DRE = dyn_cast<DeclRefExpr>(E)) { if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { - return EmitDirectCallee(*this, FD); + auto CalleeDecl = FD->hasAttr<OpenCLKernelAttr>() + ? GlobalDecl(FD, KernelReferenceKind::Stub) + : FD; + return EmitDirectCallee(*this, CalleeDecl); } } else if (auto ME = dyn_cast<MemberExpr>(E)) { if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) { diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp index 115b618056a445..a78d783831293e 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -126,14 +126,21 @@ static const BlockExpr *getBlockExpr(const Expr *E) { /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, - llvm::Value *Block, llvm::Type *BlockTy) { - assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice"); + llvm::Value *Block, llvm::Type *BlockTy, + bool isBlkExprInOCLKern) { + + // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub + // version), its constituent BlockExpr will also be emitted twice. + assert((!EnqueuedBlockMap.contains(E) || + EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) && + "Block expression emitted twice"); assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function"); assert(Block->getType()->isPointerTy() && "Invalid block literal type"); EnqueuedBlockMap[E].InvokeFunc = InvokeF; EnqueuedBlockMap[E].BlockArg = Block; EnqueuedBlockMap[E].BlockTy = BlockTy; EnqueuedBlockMap[E].KernelHandle = nullptr; + EnqueuedBlockMap[E].isBlkExprInOCLKern = isBlkExprInOCLKern; } llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h index 34613c3516f374..78bb5980cd87dc 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.h +++ b/clang/lib/CodeGen/CGOpenCLRuntime.h @@ -46,6 +46,7 @@ class CGOpenCLRuntime { llvm::Value *KernelHandle; /// Enqueued block kernel reference. llvm::Value *BlockArg; /// The first argument to enqueued block kernel. llvm::Type *BlockTy; /// Type of the block argument. + bool isBlkExprInOCLKern; /// Does the BlockExpr reside in an OpenCL Kernel. }; /// Maps block expression to block information. llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap; @@ -93,7 +94,8 @@ class CGOpenCLRuntime { /// \param InvokeF invoke function emitted for the block expression. /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, - llvm::Value *Block, llvm::Type *BlockTy); + llvm::Value *Block, llvm::Type *BlockTy, + bool isBlkExprInOCLKern); /// \return LLVM block invoke function emitted for an expression derived from /// the block expression. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 7189a4689e8156..162752241a45e6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1887,6 +1887,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); + } else if (FD && FD->hasAttr<OpenCLKernelAttr>() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { + Out << "__clang_ocl_kern_imp_" << II->getName(); } else { Out << II->getName(); } @@ -3850,6 +3853,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { + + if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody()) + addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub)); + // Update deferred annotations with the latest declaration if the function // function was already used or defined. if (FD->hasAttr<AnnotateAttr>()) { diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl new file mode 100644 index 00000000000000..f575728f237630 --- /dev/null +++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +// CHECK: define dso_local amdgpu_kernel void @callee_kern({{.*}}) +__attribute__((noinline)) kernel void callee_kern(global int *A){ + *A = 1; +} + +__attribute__((noinline)) kernel void ext_callee_kern(global int *A); + +// CHECK: define dso_local void @callee_func({{.*}}) +__attribute__((noinline)) void callee_func(global int *A){ + *A = 2; +} + +// CHECK: define dso_local amdgpu_kernel void @caller_kern({{.*}}) +kernel void caller_kern(global int* A){ + callee_kern(A); + // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) + ext_callee_kern(A); + // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) + callee_func(A); + // CHECK: tail call void @callee_func({{.*}}) + +} + +// CHECK: define dso_local void @__clang_ocl_kern_imp_callee_kern({{.*}}) + +// CHECK: declare void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) + +// CHECK: define dso_local void @caller_func({{.*}}) +void caller_func(global int* A){ + callee_kern(A); + // CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) #7 + ext_callee_kern(A); + // CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) #8 + callee_func(A); + // CHECK: tail call void @callee_func({{.*}}) +} + +// CHECK: define dso_local void @__clang_ocl_kern_imp_caller_kern({{.*}}) +// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) +// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) +// CHECK: tail call void @callee_func({{.*}}) diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl index 9ae4a5f027d358..0e3e50be9745eb 100644 --- a/clang/test/CodeGenOpenCL/reflect.cl +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -13,7 +13,7 @@ bool device_function() { } // CHECK-LABEL: define dso_local spir_kernel void @kernel_function( -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 { // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl index 569ea0cbe1af60..6c8f20511b8bd6 100644 --- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl +++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl @@ -11,8 +11,8 @@ kernel void foo(global int *A) // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0) A[id] = id; bar(A); - // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A) + // CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A) } // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef) -// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4) +// CHECK: declare void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4) diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl index addfe33377f939..e5dc5b29c5140b 100644 --- a/clang/test/CodeGenOpenCL/visibility.cl +++ b/clang/test/CodeGenOpenCL/visibility.cl @@ -85,31 +85,42 @@ __attribute__((visibility("default"))) extern void ext_func_default(); void use() { glob = ext + ext_hidden + ext_protected + ext_default; ext_kern(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern() ext_kern_hidden(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() ext_kern_protected(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected() ext_kern_default(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default() ext_func(); ext_func_hidden(); ext_func_protected(); ext_func_default(); } -// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern() +// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern() -// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden() +// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() -// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected() - -// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default() -// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default() -// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default() +// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default() +// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default() +// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default() // FVIS-DEFAULT: declare void @ext_func() // FVIS-PROTECTED: declare protected void @ext_func() @@ -126,3 +137,19 @@ void use() { // FVIS-DEFAULT: declare void @ext_func_default() // FVIS-PROTECTED: declare void @ext_func_default() // FVIS-HIDDEN: declare void @ext_func_default() + +// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern() + +// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden() + +// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected() + +// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default() +// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default() +// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default() >From 013801b1fc82ec6806b876d93da24d4f0f2ed098 Mon Sep 17 00:00:00 2001 From: anikelal <anike...@amd.com> Date: Fri, 29 Nov 2024 14:03:57 +0530 Subject: [PATCH 2/2] [Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel Simplifying isDeclOpenCLKernel() and removing resolved comments --- clang/include/clang/AST/GlobalDecl.h | 5 ++--- clang/lib/CodeGen/CGOpenCLRuntime.cpp | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index 8a9f4b4c60e5e5..342232dcf56418 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -213,10 +213,9 @@ class GlobalDecl { } bool isDeclOpenCLKernel() const { - auto FD = dyn_cast<FunctionDecl>(getDecl()); - if (FD) + if (auto FD = dyn_cast<FunctionDecl>(getDecl())) return FD->hasAttr<OpenCLKernelAttr>(); - return FD; + return false; } }; diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp index a78d783831293e..31d40a8774b7b1 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -129,8 +129,6 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, llvm::Value *Block, llvm::Type *BlockTy, bool isBlkExprInOCLKern) { - // FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub - // version), its constituent BlockExpr will also be emitted twice. assert((!EnqueuedBlockMap.contains(E) || EnqueuedBlockMap[E].isBlkExprInOCLKern != isBlkExprInOCLKern) && "Block expression emitted twice"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits