[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
@@ -65,7 +65,7 @@ class CXXFieldCollector { /// getCurFields - Pointer to array of fields added to the currently parsed /// class. - FieldDecl **getCurFields() { return &*(Fields.end() - getCurNumFields()); } Keenuts wrote: Thanks for the review and linked pointers! This seems to be the case. and generated code does only the pointer arithmetic, no actual load. Closing this as this is not an issue. btw, I like your C++ trivia on twitter 😊 https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
https://github.com/Keenuts closed https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
@@ -1328,6 +1331,31 @@ VersionTuple Triple::getDriverKitVersion() const { } } +VersionTuple Triple::getVulkanVersion() const { + if (getArch() != spirv || getOS() != Vulkan) +llvm_unreachable("invalid Vulkan SPIR-V triple"); + + VersionTuple VulkanVersion = getOSVersion(); + SubArchType SpirvVersion = getSubArch(); + + llvm::DenseMap ValidVersionMap = { Keenuts wrote: Shall we have a way to differentiate unsupported vulkan version, and unsupported spirv/vulkan mix? Or have a longer message saying we only support v1.2+spv1.5 and v1.3+spv1.6?) https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
@@ -4236,20 +4236,35 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, // TODO: Revisit restricting SPIR-V to logical once we've figured out how to // handle PhysicalStorageBuffer64 memory model if (T.isDXIL() || T.isSPIRVLogical()) { - enum { ShaderModel, ShaderStage }; + enum { ShaderModel, VulkanEnv, ShaderStage }; + enum { OS, Environment }; + + int ExpectedOS = T.isSPIRVLogical() ? VulkanEnv : ShaderModel; + if (T.getOSName().empty()) { Diags.Report(diag::err_drv_hlsl_bad_shader_required_in_target) -<< ShaderModel << T.str(); - } else if (!T.isShaderModelOS() || T.getOSVersion() == VersionTuple(0)) { -Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported) -<< ShaderModel << T.getOSName() << T.str(); +<< ExpectedOS << OS << T.str(); } else if (T.getEnvironmentName().empty()) { Diags.Report(diag::err_drv_hlsl_bad_shader_required_in_target) -<< ShaderStage << T.str(); +<< ShaderStage << Environment << T.str(); } else if (!T.isShaderStageEnvironment()) { Diags.Report(diag::err_drv_hlsl_bad_shader_unsupported) << ShaderStage << T.getEnvironmentName() << T.str(); } + + if (T.isDXIL()) { Keenuts wrote: Shall this be an: ```cpp if (T.isDXIL()) { else if (T.isSPIRVLogical()) { } else { llvm_unreachable(); } ``` (In case the condition line 4239 gets changed but not this) https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
@@ -3,29 +3,39 @@ // Supported targets // // RUN: %clang -target dxil-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s -// RUN: %clang -target spirv-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv-unknown-vulkan-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv-unknown-vulkan1.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv-unknown-vulkan1.3-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv1.5-unknown-vulkan1.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv1.6-unknown-vulkan1.3-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s -// Empty shader model +// Empty Vulkan environment // // RUN: not %clang -target spirv %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-NO-OS %s -// Invalid shader models +// Invalid Vulkan environment // -// RUN: not %clang -target spirv--unknown %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-BAD-OS %s +// RUN: not %clang -target spirv--shadermodel %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-BAD-OS %s +// RUN: not %clang -target spirv-unknown-vulkan1.0-compute %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-BAD-OS %s Keenuts wrote: I wonder where we should document that we explicitly don't want to support vulkan < 1.2 for now, hence this limit. Maybe here a comment to say "vulkan1.0 is valid, but we chose not to support it?) Same for the tests which checks invalid spirv/vk mix? https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
https://github.com/Keenuts commented: LGTM for the logic, and choices. Just some small nits https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][SPIR-V] Add Vulkan to target triple (PR #76749)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/76749 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From 818ccfd0258602fdd0630823bb2b8af0507749d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/6] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 20c35757939152..ba5e27a5d4668c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5698,6 +5787,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1295,11 +1295,13 @@ double4 trunc(double4); /// true, across all active lanes in the current wave. _HLSL_AVAILABILITY(shadermodel, 6.0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) +__attribute__((convergent)) Keenuts wrote: @llvm-beanz FYI. Seems like I'll remove it then. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From 818ccfd0258602fdd0630823bb2b8af0507749d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/7] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 20c35757939152..ba5e27a5d4668c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5698,6 +5787,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1295,11 +1295,13 @@ double4 trunc(double4); /// true, across all active lanes in the current wave. _HLSL_AVAILABILITY(shadermodel, 6.0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) +__attribute__((convergent)) Keenuts wrote: Right, so in that case, I'll add the convergent attribute again, and later down the road, once we have the noconvergent-default in place, we'll be able to flip this back. Thanks all for the context and explanations! https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From afbe709931942b3970f92884022e250c1e7eb84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/8] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2eaceeba617700..9cc630cd05785a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5801,6 +5890,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: Rebases on main (almost, HEAD is slightly broken), and added back the convergence attribute. The backend changes are ready for this intrinsic. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: @arsenm would you be fine with those codegen changes as-is? Given that the convergent/no-convergent switch will be done later, depending on when the required IR change is merged? https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: @ssahasra it is up to you then 😊 https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From afbe709931942b3970f92884022e250c1e7eb84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/9] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2eaceeba617700..9cc630cd05785a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5801,6 +5890,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,92 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullptr otherwise. +llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return nullptr; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, Keenuts wrote: Thanks, done, Added `Token` to function name end, changed `Attr` to `Token`, and fixed case the first letters. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: Thanks all the the reviews! We have 3 LGTMs and an ack from Arsenm, so I'm going to rebase on main, wait for the bots & tests, and if all is green, merge this. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From dc008167980ca0a479d2cdceeeb1ab6cd4983ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/9] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index fdb517eb254d3b..76fe8c1d40a419 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1131,8 +1131,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5803,6 +5892,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: Local tests for SPIR-V & DXIL pass https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts closed https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][clang] Move hlsl_wave_get_lane_index to EmitHLSLBuiltinExpr (PR #87131)
Keenuts wrote: Hi! Thanks @farzonl and @marcauberer for making sure SPIR-V backend didn't break, really appreciate it! 😊 https://github.com/llvm/llvm-project/pull/87131 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
@@ -0,0 +1,44 @@ + +//===- CGHLSLUtils.h - Utility functions for HLSL CodeGen ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This File Provides utility function for HLSL code generation. +// It is used to abstract away implementation details of backends. +// +//===--===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLUTILS_H +#define LLVM_CLANG_LIB_CODEGEN_CGHLSLUTILS_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/IntrinsicsSPIRV.h" + +// Define the function generator macro +#define GENERATE_HLSL_INTRINSIC_FUNCTION(name) \ + static llvm::Intrinsic::ID get_hlsl_##name##_intrinsic( \ + const llvm::Triple::ArchType Arch) { \ +switch (Arch) { \ +case llvm::Triple::dxil: \ + return llvm::Intrinsic::dx_##name; \ +case llvm::Triple::spirv: \ + return llvm::Intrinsic::spv_##name; \ +default: \ + llvm_unreachable("Input semantic not supported by target"); \ Keenuts wrote: Shall the error be `"Intrinsic " #name " not supported by target architecture"`? https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
@@ -0,0 +1,95 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; Note: The validator is wrong it wants the return to be a bool vector when it is bool scalar return Keenuts wrote: AFAIK the validator is not wrong, it complains for 2 reasons: `OpAll` cannot be used with a single boolean as input: It **must** be a vector of boolean. And SPIR-V doesn't allow vector of 1 value. Hence, `all(my_bool)` shall be translated to `return my_bool`. Then, for int/long intrinsics: HLSL only checks if the value (any type) is non-zero for all vector items. for SPIR-V, `OpAll` only works for boolean vectors. This means the translation is not that straightforward. What is being done in DXC is the following: ``` %const = OpConstantComposite %v2int %int_0 %int_0 %tmp = OpINotEqual %v2bool %variable %const %res = OpAll %bool %tmp ``` https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
@@ -100,6 +100,118 @@ double3 abs(double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) double4 abs(double4); +//===--===// +// all builtins +//===--===// + +/// \fn bool all(T x) +/// \brief Returns True if all components of the \a x parameter are non-zero; +/// otherwise, false. \param x The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t4); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool4); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t4); Keenuts wrote: Not sure if that's considered cleaner, but could those repetition be avoid with something like: ```cpp #define _HLSL_ALIAS_BUILTIN_FOR_VECTOR(BuiltIn, BaseType) \ BuiltIn(BaseType); \ BuiltIn(BaseType ## 2); \ BuiltIn(BaseType ## 3); \ BuiltIn(BaseType ## 4) ``` Which would be used like so: ```cpp #define _DEFINE_BUILTIN_HLSL_ELEMENTWISE_ALL(Type) \ _HLSL_AVAILABILITY(shadermodel, 6.2)\ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) \ bool all(Type) _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ALL, int); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ALL, uint); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ALL, float); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ALL, double); #define _DEFINE_BUILTIN_HLSL_ELEMENTWISE_ANY(Type) \ _HLSL_AVAILABILITY(shadermodel, 6.2)\ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_any) \ bool all(Type) _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ANY, int); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ANY, uint); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ANY, float); _HLSL_ALIAS_BUILTIN_FOR_VECTOR(_DEFINE_BUILTIN_HLSL_ELEMENTWISE_ANY, double); ``` https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
@@ -0,0 +1,44 @@ + +//===- CGHLSLUtils.h - Utility functions for HLSL CodeGen ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This File Provides utility function for HLSL code generation. Keenuts wrote: nit: ```suggestion // This file provides utility functions for HLSL code generation. ``` https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
@@ -0,0 +1,44 @@ + +//===- CGHLSLUtils.h - Utility functions for HLSL CodeGen ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This File Provides utility function for HLSL code generation. +// It is used to abstract away implementation details of backends. +// +//===--===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLUTILS_H +#define LLVM_CLANG_LIB_CODEGEN_CGHLSLUTILS_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/IntrinsicsSPIRV.h" + +// Define the function generator macro +#define GENERATE_HLSL_INTRINSIC_FUNCTION(name) \ + static llvm::Intrinsic::ID get_hlsl_##name##_intrinsic( \ + const llvm::Triple::ArchType Arch) { \ +switch (Arch) { \ +case llvm::Triple::dxil: \ + return llvm::Intrinsic::dx_##name; \ +case llvm::Triple::spirv: \ + return llvm::Intrinsic::spv_##name; \ +default: \ + llvm_unreachable("Input semantic not supported by target"); \ +} \ + } + +class CGHLSLUtils { +public: + GENERATE_HLSL_INTRINSIC_FUNCTION(all) Keenuts wrote: Why are those functions in a class and not a namespace? Does the class provides something a namespace wouldn't? https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][DXIL][SPIRV] Implementation of an abstraction for intrinsic selection of HLSL backends (PR #87171)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/87171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][SPIR-V] Add create.handle intrinsic (PR #81038)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/81038 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From 818ccfd0258602fdd0630823bb2b8af0507749d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/3] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 20c35757939152..ba5e27a5d4668c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5698,6 +5787,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts ready_for_review https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
Keenuts wrote: Hi, thanks for the reviews so far! I believe we are ready to move forward on our side. Adding @llvm-beanz for the HLSL part. The builtin I added is mostly to get something we can generate those intrinsics for. I am fine changing the name, or the implementation around it, it's just to have a wave an easy intrinsic to use/test with (no input, simplest one) https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { Keenuts wrote: No strong opinion. My rational is if you read "optional", you know it's not always there. But I'm fine with the pointer, changed to pointer. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { Keenuts wrote: Yep, but I'm not sure I have any other option outside from some kind of caching(either attribute of a BB, or BB->I map). If it's this kind of alternative you are thinking about, maybe this should be left in a subsequent PR as an optimization? https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; Keenuts wrote: yep, thanks https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; Keenuts wrote: well, no... tried, and there is an overload which takes either an ArrayRef of a vector, so the inline array will cause an ambiguity :/ https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); Keenuts wrote: good catch, I have no idea why this is here. I recall something around validation, but today reading it, it does not make sense. Thanks! https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -5686,6 +5686,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!CI->getType()->isVoidTy()) CI->setName("call"); + if (getTarget().getTriple().isSPIRVLogical() && + CI->getCalledFunction()->isConvergent()) Keenuts wrote: Yes, thanks https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From 818ccfd0258602fdd0630823bb2b8af0507749d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/4] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 20c35757939152..ba5e27a5d4668c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5698,6 +5787,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -4554,6 +4554,13 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> { let Prototype = "unsigned int(bool)"; } +// HLSL +def HLSLWaveGetLaneIndex : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_wave_get_lane_index"]; + let Attributes = [NoThrow, Const]; Keenuts wrote: Yes, but this implies touching to the mangling pattern.. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From 818ccfd0258602fdd0630823bb2b8af0507749d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/5] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 20c35757939152..ba5e27a5d4668c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1130,8 +1130,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5698,6 +5787,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1297,5 +1297,10 @@ _HLSL_AVAILABILITY(shadermodel, 6.0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) uint WaveActiveCountBits(bool Val); +/// \brief Returns the index of the current lane within the current wave. +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_index) +uint WaveGetLaneIndex(); Keenuts wrote: Yes, good catch, done https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1130,8 +1130,96 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { Keenuts wrote: Good to know! I'd be in favor to implement this map once we have a merged use case. -> This way, we can have an independent NFC PR which focuses on this optimization/refactoring by adding either an analysis or the bits in `llvm::BasicBlock` to quickly fetch those. https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts created https://github.com/llvm/llvm-project/pull/80680 HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. From 8d653d1af6f624f341e88997682fc271195d8a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f17e4a83305bf..0de350dc65485 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1129,8 +1129,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::
[clang] [HLSL][SPIR-V] Add support -fspv-target-env opt (PR #78611)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/78611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
https://github.com/Keenuts created https://github.com/llvm/llvm-project/pull/70193 This was found by doing bound-checking on SmallVector iterator usage. When the count is 0, the end iterator is dereferenced to get its address. This doesn't seem to be an issue in practice as most of the time, and we are allowed to deref this address, but I don't think this is correct. From cde1bc9613fa384e4355d39ea29b705b1140dc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Wed, 25 Oct 2023 12:40:22 +0200 Subject: [PATCH] [NFC][Clang] Fix potential deref of end iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was found by doing bound-checking on SmallVector iterator usage. When the count is 0, the end iterator is dereferenced to get its address. This doesn't seem to be an issue in practice as most of the time, and we are allowed to deref this address, but I don't think this is correct. Signed-off-by: Nathan Gauër --- clang/include/clang/Sema/CXXFieldCollector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Sema/CXXFieldCollector.h b/clang/include/clang/Sema/CXXFieldCollector.h index f6ecd9f46e5ebdb..ce066581c93fda7 100644 --- a/clang/include/clang/Sema/CXXFieldCollector.h +++ b/clang/include/clang/Sema/CXXFieldCollector.h @@ -65,7 +65,7 @@ class CXXFieldCollector { /// getCurFields - Pointer to array of fields added to the currently parsed /// class. - FieldDecl **getCurFields() { return &*(Fields.end() - getCurNumFields()); } + FieldDecl **getCurFields() { return Fields.end() - getCurNumFields(); } /// FinishClass - Called by Sema::ActOnFinishCXXClassDef. void FinishClass() { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
Keenuts wrote: CI seems OK, except clang format which complains about an unrelated line. Marking as ready. https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
https://github.com/Keenuts ready_for_review https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC][Clang] Fix potential deref of end iterator (PR #70193)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/70193 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][SPIR-V] Fix clang driver lang target test (PR #70330)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/70330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][SPIR-V] Fix clang driver lang target test (PR #70330)
@@ -2,8 +2,8 @@ // Supported targets // -// RUN: %clang -target dxil-unknown-shadermodel6.2-pixel %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s -// RUN: %clang -target spirv-unknown-shadermodel6.2-library %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s +// RUN: %clang -target dxil-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s Keenuts wrote: why is the `--allow-empty` required? Isn't the `CHECK-VALID-NOT` enough to not require this option? https://github.com/llvm/llvm-project/pull/70330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][SPIR-V] Fix clang driver lang target test (PR #70330)
https://github.com/Keenuts commented: One question, otherwise LGTM, thanks for this! https://github.com/llvm/llvm-project/pull/70330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][SPIR-V] Fix clang driver lang target test (PR #70330)
@@ -2,8 +2,8 @@ // Supported targets // -// RUN: %clang -target dxil-unknown-shadermodel6.2-pixel %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s -// RUN: %clang -target spirv-unknown-shadermodel6.2-library %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s +// RUN: %clang -target dxil-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s Keenuts wrote: Ahh, thanks for the explanation. Yes, sounds better not to rely on an unchecked warning! Thanks! https://github.com/llvm/llvm-project/pull/70330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL][SPIR-V] Fix clang driver lang target test (PR #70330)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/70330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [SPIRV] Add -spirv option to DXC driver (PR #65989)
@@ -2,8 +2,8 @@ // Supported targets // -// RUN: %clang -target dxil-unknown-shadermodel6.2-pixel %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s -// RUN: %clang -target spirv-unknown-shadermodel6.2-library %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-VALID %s +// RUN: %clang -target dxil-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s +// RUN: %clang -target spirv-unknown-shadermodel6.2-compute %s -S -o /dev/null 2>&1 | FileCheck --allow-empty --check-prefix=CHECK-VALID %s Keenuts wrote: Shall shadermodel version be replaced with unknown on the spirv line? (and triple checked to refuse it when explicitly used with spirv for now?) https://github.com/llvm/llvm-project/pull/65989 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 53b6a16 - [SPIR-V] Add SPIR-V logical triple.
Author: Nathan Gauër Date: 2023-09-11T10:15:24+02:00 New Revision: 53b6a169e453a2a91d3713ca16fa089853c670a8 URL: https://github.com/llvm/llvm-project/commit/53b6a169e453a2a91d3713ca16fa089853c670a8 DIFF: https://github.com/llvm/llvm-project/commit/53b6a169e453a2a91d3713ca16fa089853c670a8.diff LOG: [SPIR-V] Add SPIR-V logical triple. Clang implements SPIR-V with both Physical32 and Physical64 addressing models. This commit adds a new triple value for the Logical addressing model. Differential Revision: https://reviews.llvm.org/D155978 Added: Modified: clang/lib/Basic/Targets.cpp clang/lib/Basic/Targets/SPIR.cpp clang/lib/Basic/Targets/SPIR.h clang/lib/Frontend/CompilerInvocation.cpp llvm/include/llvm/TargetParser/Triple.h llvm/lib/TargetParser/Triple.cpp llvm/unittests/TargetParser/TripleTest.cpp Removed: diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 2afffc463d18be1..69576dbc458d9a1 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -665,6 +665,9 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, return nullptr; return std::make_unique(Triple, Opts); } + case llvm::Triple::spirv: { +return std::make_unique(Triple, Opts); + } case llvm::Triple::spirv32: { if (os != llvm::Triple::UnknownOS || Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index 09d482a8b9ef594..dc920177d3a9107 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -33,19 +33,24 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts, DefineStd(Builder, "SPIR64", Opts); } +void BaseSPIRVTargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + DefineStd(Builder, "SPIRV", Opts); +} + void SPIRVTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { - DefineStd(Builder, "SPIRV", Opts); + BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder); } void SPIRV32TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { - SPIRVTargetInfo::getTargetDefines(Opts, Builder); + BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder); DefineStd(Builder, "SPIRV32", Opts); } void SPIRV64TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { - SPIRVTargetInfo::getTargetDefines(Opts, Builder); + BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder); DefineStd(Builder, "SPIRV64", Opts); } diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index a7ea03e7a5dd32b..9ab2b7c60936392 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -93,10 +93,6 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { : TargetInfo(Triple) { assert((Triple.isSPIR() || Triple.isSPIRV()) && "Invalid architecture for SPIR or SPIR-V."); -assert(getTriple().getOS() == llvm::Triple::UnknownOS && - "SPIR(-V) target must use unknown OS"); -assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && - "SPIR(-V) target must use unknown environment type"); TLSSupported = false; VLASupported = false; LongWidth = LongAlign = 64; @@ -284,31 +280,53 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo { MacroBuilder &Builder) const override; }; -class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRTargetInfo { +class LLVM_LIBRARY_VISIBILITY BaseSPIRVTargetInfo : public BaseSPIRTargetInfo { public: - SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + BaseSPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : BaseSPIRTargetInfo(Triple, Opts) { assert(Triple.isSPIRV() && "Invalid architecture for SPIR-V."); -assert(getTriple().getOS() == llvm::Triple::UnknownOS && - "SPIR-V target must use unknown OS"); -assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && - "SPIR-V target must use unknown environment type"); + } + + bool hasFeature(StringRef Feature) const override { +return Feature == "spirv"; } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; +}; - bool hasFeature(StringRef Feature) const override { -return Feature == "spirv"; +class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo { +public: + SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : BaseSPIRVTargetInfo(Triple,
[clang] [SPIRV] Add -spirv option to DXC driver (PR #65989)
https://github.com/Keenuts review_requested https://github.com/llvm/llvm-project/pull/65989 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
@@ -1129,8 +1129,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); Keenuts wrote: Oh right, thanks! https://github.com/llvm/llvm-project/pull/80680 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From f43134787dc5b1120e3722e03028d0071de22331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/2] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d8b2115f1e5e3c..82e652959238b4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1129,8 +1129,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5696,6 +5785,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/80680 From f43134787dc5b1120e3722e03028d0071de22331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 2 Feb 2024 16:38:46 +0100 Subject: [PATCH 1/2] [clang][HLSL][SPRI-V] Add convergence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HLSL has wave operations and other kind of function which required the control flow to either be converged, or respect certain constraints as where and how to re-converge. At the HLSL level, the convergence are mostly obvious: the control flow is expected to re-converge at the end of a scope. Once translated to IR, HLSL scopes disapear. This means we need a way to communicate convergence restrictions down to the backend. For this, the SPIR-V backend uses convergence intrinsics. So this commit adds some code to generate convergence intrinsics when required. This commit is not to be submitted as-is (lacks testing), but should serve as a basis for an upcoming RFC. Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 102 +++ clang/lib/CodeGen/CGCall.cpp | 4 ++ clang/lib/CodeGen/CGLoopInfo.h | 8 ++- clang/lib/CodeGen/CodeGenFunction.h | 19 + llvm/include/llvm/IR/IntrinsicInst.h | 13 5 files changed, 145 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d8b2115f1e5e3c..82e652959238b4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1129,8 +1129,97 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + +// Returns the first convergence entry/loop/anchor instruction found in |BB|. +// std::nullopt otherwise. +std::optional getConvergenceToken(llvm::BasicBlock *BB) { + for (auto &I : *BB) { +auto *II = dyn_cast(&I); +if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) + return II; + } + return std::nullopt; +} + } // namespace +llvm::CallBase * +CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input, + llvm::Value *ParentToken) { + llvm::Value *bundleArgs[] = {ParentToken}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + auto Output = llvm::CallBase::addOperandBundle( + Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); + Input->replaceAllUsesWith(Output); + Input->eraseFromParent(); + return Output; +} + +llvm::IntrinsicInst * +CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB, + llvm::Value *ParentToken) { + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto CB = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_loop, {}, {}); + Builder.restoreIP(IP); + + auto I = AddConvergenceControlAttr(CB, ParentToken); + // Controlled convergence is incompatible with uncontrolled convergence. + // Removing any old attributes. + I->setNotConvergent(); + + assert(isa(I)); + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { + auto *BB = &F->getEntryBlock(); + auto token = getConvergenceToken(BB); + if (token.has_value()) +return token.value(); + + // Adding a convergence token requires the function to be marked as + // convergent. + F->setConvergent(); + + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(&BB->front()); + auto I = Builder.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + assert(isa(I)); + Builder.restoreIP(IP); + + return dyn_cast(I); +} + +llvm::IntrinsicInst * +CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { + assert(LI != nullptr); + + auto token = getConvergenceToken(LI->getHeader()); + if (token.has_value()) +return *token; + + llvm::IntrinsicInst *PII = + LI->getParent() + ? EmitConvergenceLoop(LI->getHeader(), +getOrEmitConvergenceLoopToken(LI->getParent())) + : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); + + return EmitConvergenceLoop(LI->getHeader(), PII); +} + +llvm::CallBase * +CodeGenFunction::AddControlledConvergenceAttr(llvm::CallBase *Input) { + llvm::Value *ParentToken = + LoopStack.hasInfo() + ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) + : getOrEmitConvergenceEntryToken(Input->getFunction()); + return AddConvergenceControlAttr(Input, ParentToken); +} + BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -5696,6 +5785,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, {NDRange, Kernel, Block})); } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { +llvm::Typ
[clang] [llvm] [SPIRV][HLSL] Add lowering of frac to SPIR-V (PR #97111)
https://github.com/Keenuts approved this pull request. LGTM on the SPIR-V side, leaving MS look at the HLSL side https://github.com/llvm/llvm-project/pull/97111 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
Keenuts wrote: Merging to unblock the structurizer work. Let me know if you had a specific SEMA check in mind to add in the end! https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
https://github.com/Keenuts closed https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/107408 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
https://github.com/Keenuts deleted https://github.com/llvm/llvm-project/pull/107408 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -744,79 +744,139 @@ static void insertSpirvDecorations(MachineFunction &MF, MachineIRBuilder MIB) { MI->eraseFromParent(); } -// Find basic blocks of the switch and replace registers in spv_switch() by its -// MBB equivalent. -static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, -MachineIRBuilder MIB) { - DenseMap BB2MBB; - SmallVector>> - Switches; +// LLVM allows the switches to use registers as cases, while SPIR-V required +// those to be immediate values. This function replaces such operands with the +// equivalent immediate constant. +static void processSwitchesConstants(MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineBasicBlock &MBB : MF) { -MachineRegisterInfo &MRI = MF.getRegInfo(); -BB2MBB[MBB.getBasicBlock()] = &MBB; for (MachineInstr &MI : MBB) { if (!isSpvIntrinsic(MI, Intrinsic::spv_switch)) continue; - // Calls to spv_switch intrinsics representing IR switches. - SmallVector NewOps; - for (unsigned i = 2; i < MI.getNumOperands(); ++i) { + + SmallVector NewOperands; + NewOperands.push_back(MI.getOperand(0)); // Opcode + NewOperands.push_back(MI.getOperand(1)); // Condition + NewOperands.push_back(MI.getOperand(2)); // Default + for (unsigned i = 3; i < MI.getNumOperands(); i += 2) { Register Reg = MI.getOperand(i).getReg(); -if (i % 2 == 1) { - MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); - NewOps.push_back(ConstInstr); -} else { - MachineInstr *BuildMBB = MRI.getVRegDef(Reg); - assert(BuildMBB && - BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR && - BuildMBB->getOperand(1).isBlockAddress() && - BuildMBB->getOperand(1).getBlockAddress()); - NewOps.push_back(BuildMBB); -} +MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); +NewOperands.push_back( +MachineOperand::CreateCImm(ConstInstr->getOperand(1).getCImm())); + +NewOperands.push_back(MI.getOperand(i + 1)); } - Switches.push_back(std::make_pair(&MI, NewOps)); + + assert(MI.getNumOperands() == NewOperands.size()); + while (MI.getNumOperands() > 0) +MI.removeOperand(0); + for (auto &MO : NewOperands) +MI.addOperand(MO); } } +} +// Some instructions are used during CodeGen but should never be emitted. +// Cleaning up those. +static void cleanupHelperInstructions(MachineFunction &MF) { SmallPtrSet ToEraseMI; + for (MachineBasicBlock &MBB : MF) { +for (MachineInstr &MI : MBB) { + if (isSpvIntrinsic(MI, Intrinsic::spv_track_constant) || + MI.getOpcode() == TargetOpcode::G_BRINDIRECT) +ToEraseMI.insert(&MI); +} + } + + for (MachineInstr *MI : ToEraseMI) +MI->eraseFromParent(); +} + +// Find all usages of G_BLOCK_ADDR in our intrinsics and replace those +// operands/registers by the actual MBB it references. +static void processBlockAddr(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + // Gather the reverse-mapping BB -> MBB. + DenseMap BB2MBB; + for (MachineBasicBlock &MBB : MF) +BB2MBB[MBB.getBasicBlock()] = &MBB; + + // Gather instructions requiring patching. For now, only those can use + // G_BLOCK_ADDR. + SmallVector InstructionsToPatch; + for (MachineBasicBlock &MBB : MF) { +for (MachineInstr &MI : MBB) { + if (isSpvIntrinsic(MI, Intrinsic::spv_switch) || + isSpvIntrinsic(MI, Intrinsic::spv_loop_merge) || + isSpvIntrinsic(MI, Intrinsic::spv_selection_merge)) +InstructionsToPatch.push_back(&MI); +} + } + + // For each instruction to fix, we replace all the G_BLOCK_ADDR operands by + // the actual MBB it references. Once those references updated, we can cleanup + // remaining G_BLOCK_ADDR references. SmallPtrSet ClearAddressTaken; - for (auto &SwIt : Switches) { -MachineInstr &MI = *SwIt.first; -MachineBasicBlock *MBB = MI.getParent(); -SmallVector &Ins = SwIt.second; + SmallPtrSet ToEraseMI; + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineInstr *MI : InstructionsToPatch) { SmallVector NewOps; -for (unsigned i = 0; i < Ins.size(); ++i) { - if (Ins[i]->getOpcode() == TargetOpcode::G_BLOCK_ADDR) { -BasicBlock *CaseBB = -Ins[i]->getOperand(1).getBlockAddress()->getBasicBlock(); -auto It = BB2MBB.find(CaseBB); -if (It == BB2MBB.end()) - report_fatal_error("cannot find a machine basic block by a basic " - "block in a switch statement"); -MachineBasicBlock *Succ = It->second; -ClearAddressTaken.insert(Succ); -Ne
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -744,79 +744,139 @@ static void insertSpirvDecorations(MachineFunction &MF, MachineIRBuilder MIB) { MI->eraseFromParent(); } -// Find basic blocks of the switch and replace registers in spv_switch() by its -// MBB equivalent. -static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, -MachineIRBuilder MIB) { - DenseMap BB2MBB; - SmallVector>> - Switches; +// LLVM allows the switches to use registers as cases, while SPIR-V required +// those to be immediate values. This function replaces such operands with the +// equivalent immediate constant. +static void processSwitchesConstants(MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineBasicBlock &MBB : MF) { -MachineRegisterInfo &MRI = MF.getRegInfo(); -BB2MBB[MBB.getBasicBlock()] = &MBB; for (MachineInstr &MI : MBB) { if (!isSpvIntrinsic(MI, Intrinsic::spv_switch)) continue; - // Calls to spv_switch intrinsics representing IR switches. - SmallVector NewOps; - for (unsigned i = 2; i < MI.getNumOperands(); ++i) { + + SmallVector NewOperands; + NewOperands.push_back(MI.getOperand(0)); // Opcode + NewOperands.push_back(MI.getOperand(1)); // Condition + NewOperands.push_back(MI.getOperand(2)); // Default + for (unsigned i = 3; i < MI.getNumOperands(); i += 2) { Register Reg = MI.getOperand(i).getReg(); -if (i % 2 == 1) { - MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); - NewOps.push_back(ConstInstr); -} else { - MachineInstr *BuildMBB = MRI.getVRegDef(Reg); - assert(BuildMBB && - BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR && - BuildMBB->getOperand(1).isBlockAddress() && - BuildMBB->getOperand(1).getBlockAddress()); - NewOps.push_back(BuildMBB); -} +MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); +NewOperands.push_back( +MachineOperand::CreateCImm(ConstInstr->getOperand(1).getCImm())); + +NewOperands.push_back(MI.getOperand(i + 1)); } - Switches.push_back(std::make_pair(&MI, NewOps)); + + assert(MI.getNumOperands() == NewOperands.size()); + while (MI.getNumOperands() > 0) +MI.removeOperand(0); + for (auto &MO : NewOperands) +MI.addOperand(MO); } } +} +// Some instructions are used during CodeGen but should never be emitted. +// Cleaning up those. +static void cleanupHelperInstructions(MachineFunction &MF) { SmallPtrSet ToEraseMI; Keenuts wrote: That's correct! updated. https://github.com/llvm/llvm-project/pull/107408 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -0,0 +1,1410 @@ +//===-- SPIRVStructurizer.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +//===--===// + +#include "Analysis/SPIRVConvergenceRegionAnalysis.h" +#include "SPIRV.h" +#include "SPIRVSubtarget.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include +#include + +using namespace llvm; +using namespace SPIRV; + +namespace llvm { + +void initializeSPIRVStructurizerPass(PassRegistry &); + +namespace { + +using BlockSet = std::unordered_set; +using Edge = std::pair; + +// This class implements a partial ordering visitor, which visits a cyclic graph +// in natural topological-like ordering. Topological ordering is not defined for +// directed graphs with cycles, so this assumes cycles are a single node, and +// ignores back-edges. The cycle is visited from the entry in the same +// topological-like ordering. +// +// This means once we visit a node, we know all the possible ancestors have been +// visited. +// +// clang-format off +// +// Given this graph: +// +// ,-> B -\ +// A -++---> D > E -> F -> G -> H +// `-> C -/ ^ | +// +-+ +// +// Visit order is: +// A, [B, C in any order], D, E, F, G, H +// +// clang-format on +// +// Changing the function CFG between the construction of the visitor and +// visiting is undefined. The visitor can be reused, but if the CFG is updated, +// the visitor must be rebuilt. +class PartialOrderingVisitor { + DomTreeBuilder::BBDomTree DT; + LoopInfo LI; + BlockSet Visited; + std::unordered_map B2R; + std::vector> Order; + + // Get all basic-blocks reachable from Start. + BlockSet getReachableFrom(BasicBlock *Start) { +std::queue ToVisit; +ToVisit.push(Start); + +BlockSet Output; +while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); + + if (Output.count(BB) != 0) +continue; + Output.insert(BB); + + for (BasicBlock *Successor : successors(BB)) { +if (DT.dominates(Successor, BB)) + continue; +ToVisit.push(Successor); + } +} + +return Output; + } + + size_t visit(BasicBlock *BB, size_t Rank) { +if (Visited.count(BB) != 0) + return Rank; + +Loop *L = LI.getLoopFor(BB); +const bool isLoopHeader = LI.isLoopHeader(BB); + +if (B2R.count(BB) == 0) { + B2R.emplace(BB, Rank); +} else { + B2R[BB] = std::max(B2R[BB], Rank); +} + +for (BasicBlock *Predecessor : predecessors(BB)) { + if (isLoopHeader && L->contains(Predecessor)) { +continue; + } + + if (B2R.count(Predecessor) == 0) { +return Rank; + } +} + +Visited.insert(BB); + +SmallVector OtherSuccessors; +BasicBlock *LoopSuccessor = nullptr; + +for (BasicBlock *Successor : successors(BB)) { + // Ignoring back-edges. + if (DT.dominates(Successor, BB)) +continue; + + if (isLoopHeader && L->contains(Successor)) { +assert(LoopSuccessor == nullptr); +LoopSuccessor = Successor; + } else +OtherSuccessors.push_back(Successor); +} + +if (LoopSuccessor) + Rank = visit(LoopSuccessor, Rank + 1); + +size_t OutputRank = Rank; +for (BasicBlock *Item : OtherSuccessors) + OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); +return OutputRank; + }; + +public: + // Build the visitor to operate on the function F. + PartialOrderingVisitor(Function &F) { +DT.recalculate(F); +LI = LoopInfo(DT); + +visit(&*F.begin(), 0); + +for (auto &[BB, Rank] : B2R) + Order.emplace_back(BB, Rank); + +std::sort(Order.begin(), Order.end(), [](const auto &LHS, const auto &RHS) { + return LHS.second < RHS.second; +}); + +for (size_t i = 0; i < Order.size(); i++) + B2R[Order[i].first] = i; + } + + // Visit the function starting from the basic block |Start|, and calling |Op| + // on each visited BB. This traversal ignores back-edges, meaning this won't + // visit a node to which |Start| is not an ancestor. + void partialOrder
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
Keenuts wrote: Given the ongoing discussion around spirv-sim, I updated all the hlsl tests to be llvm-ir -> SPIR-V tests. All tests now use both FileCheck and spirv-sim. https://github.com/llvm/llvm-project/pull/107408 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIR-V] Add SPIR-V structurizer (PR #107408)
@@ -744,79 +744,139 @@ static void insertSpirvDecorations(MachineFunction &MF, MachineIRBuilder MIB) { MI->eraseFromParent(); } -// Find basic blocks of the switch and replace registers in spv_switch() by its -// MBB equivalent. -static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, -MachineIRBuilder MIB) { - DenseMap BB2MBB; - SmallVector>> - Switches; +// LLVM allows the switches to use registers as cases, while SPIR-V required +// those to be immediate values. This function replaces such operands with the +// equivalent immediate constant. +static void processSwitchesConstants(MachineFunction &MF, + SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineBasicBlock &MBB : MF) { -MachineRegisterInfo &MRI = MF.getRegInfo(); -BB2MBB[MBB.getBasicBlock()] = &MBB; for (MachineInstr &MI : MBB) { if (!isSpvIntrinsic(MI, Intrinsic::spv_switch)) continue; - // Calls to spv_switch intrinsics representing IR switches. - SmallVector NewOps; - for (unsigned i = 2; i < MI.getNumOperands(); ++i) { + + SmallVector NewOperands; + NewOperands.push_back(MI.getOperand(0)); // Opcode + NewOperands.push_back(MI.getOperand(1)); // Condition + NewOperands.push_back(MI.getOperand(2)); // Default + for (unsigned i = 3; i < MI.getNumOperands(); i += 2) { Register Reg = MI.getOperand(i).getReg(); -if (i % 2 == 1) { - MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); - NewOps.push_back(ConstInstr); -} else { - MachineInstr *BuildMBB = MRI.getVRegDef(Reg); - assert(BuildMBB && - BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR && - BuildMBB->getOperand(1).isBlockAddress() && - BuildMBB->getOperand(1).getBlockAddress()); - NewOps.push_back(BuildMBB); -} +MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); +NewOperands.push_back( +MachineOperand::CreateCImm(ConstInstr->getOperand(1).getCImm())); + +NewOperands.push_back(MI.getOperand(i + 1)); } - Switches.push_back(std::make_pair(&MI, NewOps)); + + assert(MI.getNumOperands() == NewOperands.size()); + while (MI.getNumOperands() > 0) +MI.removeOperand(0); + for (auto &MO : NewOperands) +MI.addOperand(MO); } } +} +// Some instructions are used during CodeGen but should never be emitted. +// Cleaning up those. +static void cleanupHelperInstructions(MachineFunction &MF) { SmallPtrSet ToEraseMI; + for (MachineBasicBlock &MBB : MF) { +for (MachineInstr &MI : MBB) { + if (isSpvIntrinsic(MI, Intrinsic::spv_track_constant) || + MI.getOpcode() == TargetOpcode::G_BRINDIRECT) +ToEraseMI.insert(&MI); +} + } + + for (MachineInstr *MI : ToEraseMI) +MI->eraseFromParent(); +} + +// Find all usages of G_BLOCK_ADDR in our intrinsics and replace those +// operands/registers by the actual MBB it references. +static void processBlockAddr(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + // Gather the reverse-mapping BB -> MBB. + DenseMap BB2MBB; + for (MachineBasicBlock &MBB : MF) +BB2MBB[MBB.getBasicBlock()] = &MBB; + + // Gather instructions requiring patching. For now, only those can use + // G_BLOCK_ADDR. + SmallVector InstructionsToPatch; + for (MachineBasicBlock &MBB : MF) { +for (MachineInstr &MI : MBB) { + if (isSpvIntrinsic(MI, Intrinsic::spv_switch) || + isSpvIntrinsic(MI, Intrinsic::spv_loop_merge) || + isSpvIntrinsic(MI, Intrinsic::spv_selection_merge)) +InstructionsToPatch.push_back(&MI); +} + } + + // For each instruction to fix, we replace all the G_BLOCK_ADDR operands by + // the actual MBB it references. Once those references updated, we can cleanup + // remaining G_BLOCK_ADDR references. SmallPtrSet ClearAddressTaken; - for (auto &SwIt : Switches) { -MachineInstr &MI = *SwIt.first; -MachineBasicBlock *MBB = MI.getParent(); -SmallVector &Ins = SwIt.second; + SmallPtrSet ToEraseMI; + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineInstr *MI : InstructionsToPatch) { SmallVector NewOps; -for (unsigned i = 0; i < Ins.size(); ++i) { - if (Ins[i]->getOpcode() == TargetOpcode::G_BLOCK_ADDR) { -BasicBlock *CaseBB = -Ins[i]->getOperand(1).getBlockAddress()->getBasicBlock(); -auto It = BB2MBB.find(CaseBB); -if (It == BB2MBB.end()) - report_fatal_error("cannot find a machine basic block by a basic " - "block in a switch statement"); -MachineBasicBlock *Succ = It->second; -ClearAddressTaken.insert(Succ); -Ne
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
https://github.com/Keenuts created https://github.com/llvm/llvm-project/pull/103299 This commits add the WaveIsFirstLane() hlsl intrinsinc. This intrinsic uses the convergence intrinsincs for the SPIR-V backend. On the DXIL side, I'm not sure what the strategy is. (DXC didn't used convergence intrinsincs for DXIL). From 3c65e014ff038d20fe1fb8229157737306bb89e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Tue, 13 Aug 2024 14:39:03 +0200 Subject: [PATCH] [clang][HLSL] Add WaveIsLaneFirst() intrinsic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commits add the WaveIsLaneFirst() hlsl intrinsinc. This intrinsic uses the convergence intrinsincs for the SPIR-V backend. On the DXIL side, I'm not sure what the strategy is. (DXC didn't used convergence intrinsincs for DXIL). Signed-off-by: Nathan Gauër --- clang/include/clang/Basic/Builtins.td | 6 +++ clang/lib/CodeGen/CGBuiltin.cpp | 4 ++ clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 4 ++ .../builtins/wave_is_first_lane.hlsl | 34 llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 + llvm/lib/Target/DirectX/DXIL.td | 9 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 8 +++ .../SPIRV/SPIRVStripConvergentIntrinsics.cpp | 53 +++ .../CodeGen/DirectX/wave_is_first_lane.ll | 13 + .../SPIRV/hlsl-intrinsics/WaveIsFirstLane.ll | 27 ++ 12 files changed, 141 insertions(+), 22 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl create mode 100644 llvm/test/CodeGen/DirectX/wave_is_first_lane.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveIsFirstLane.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index b025a7681bfac3..b047669ff3c53f 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4677,6 +4677,12 @@ def HLSLWaveGetLaneIndex : LangBuiltin<"HLSL_LANG"> { let Prototype = "unsigned int()"; } +def HLSLWaveIsFirstLane : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_wave_is_first_lane"]; + let Attributes = [NoThrow, Const]; + let Prototype = "bool()"; +} + def HLSLClamp : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_clamp"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7fe80b0cbdfbfa..0b96fe9d29b595 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18660,6 +18660,10 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", {}, false, true)); } + case Builtin::BI__builtin_hlsl_wave_is_first_lane: { +Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic(); +return EmitRuntimeCall(Intrinsic::getDeclaration(&CGM.getModule(), ID)); + } } return nullptr; } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 527e73a0e21fc4..d856b03debc063 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -79,6 +79,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) + GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) //===--===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index e35a5262f92809..d7b5d8c40a0889 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1725,5 +1725,9 @@ _HLSL_AVAILABILITY(shadermodel, 6.0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_get_lane_index) __attribute__((convergent)) uint WaveGetLaneIndex(); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_is_first_lane) +__attribute__((convergent)) bool WaveIsFirstLane(); + } // namespace hlsl #endif //_HLSL_HLSL_INTRINSICS_H_ diff --git a/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl new file mode 100644 index 00..18860c321eb912 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shade
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
Keenuts wrote: > We have this work tracked here: #99158 > > there should be some dxil specific tasks. Seems like most boxes are checked, except Sema checks: - what kind of Sema checks would be required for this one? Also, the intrinsic name in the issue is using camel case vs snake case for this PR. But seems like existing ones like thread_id are using snake case (same thing on the SPIR-V backend). So shouldn't we remain consistent? https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
Keenuts wrote: > > We have this work tracked here: #99158 > > there should be some dxil specific tasks. > > Seems like most boxes would be checked by this PR, except Sema checks: > > * what kind of Sema checks would be required for this one? > Also, the intrinsic name in the issue is using camel case vs snake case for > this PR. But seems like existing ones like thread_id are using snake case > (same thing on the SPIR-V backend). So shouldn't we remain consistent? https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
Keenuts wrote: > This commits add the WaveIsFirstLane() hlsl intrinsinc. This intrinsic uses > the convergence intrinsincs for the SPIR-V backend. On the DXIL side, I'm not > sure what the strategy is so this is implemented like in DXC: a simple > builtin function. (DXC didn't used convergence intrinsincs for DXIL). https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang][HLSL] Add WaveIsFirstLane() intrinsic (PR #103299)
https://github.com/Keenuts edited https://github.com/llvm/llvm-project/pull/103299 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SPIRV][HLSL] Add lowering of `rsqrt` to SPIRV (PR #95849)
https://github.com/Keenuts approved this pull request. Thanks! https://github.com/llvm/llvm-project/pull/95849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CodeGen] Add AS for Globals to SPIR & SPIRV datalayouts (PR #88455)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/88455 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL][SPIRV] Add any intrinsic lowering (PR #88325)
https://github.com/Keenuts approved this pull request. https://github.com/llvm/llvm-project/pull/88325 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
https://github.com/Keenuts created https://github.com/llvm/llvm-project/pull/88918 PR #80680 added bits in the codegen to lazily add convergence intrinsics when required. This logic relied on the LoopStack. The issue is when parsing the condition, the loopstack doesn't yet reflect the correct values, as expected since we are not yet in the loop. However, convergence tokens should sometimes already be available. The solution which seemed the simplest is to greedily generate the tokens when we generate SPIR-V. Fixes #88144 From 94d76dcdfac88d1d50fe705406c0280c33766e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 15 Apr 2024 17:05:40 +0200 Subject: [PATCH] [clang][SPIR-V] Always add convervence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #80680 added bits in the codegen to lazily add convergence intrinsics when required. This logic relied on the LoopStack. The issue is when parsing the condition, the loopstack doesn't yet reflect the correct values, as expected since we are not yet in the loop. However, convergence tokens should sometimes already be available. The solution which seemed the simplest is to greedily generate the tokens when we generate SPIR-V. Fixes #88144 Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 88 + clang/lib/CodeGen/CGCall.cpp | 3 + clang/lib/CodeGen/CGStmt.cpp | 94 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 9 ++ clang/lib/CodeGen/CodeGenFunction.h | 9 +- .../builtins/RWBuffer-constructor.hlsl| 1 - .../CodeGenHLSL/convergence/do.while.hlsl | 90 + clang/test/CodeGenHLSL/convergence/for.hlsl | 121 ++ clang/test/CodeGenHLSL/convergence/while.hlsl | 119 + 9 files changed, 445 insertions(+), 89 deletions(-) create mode 100644 clang/test/CodeGenHLSL/convergence/do.while.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/for.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/while.hlsl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index df7502b8def531..f5d40a1555fcb5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1133,91 +1133,8 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; -// Returns the first convergence entry/loop/anchor instruction found in |BB|. -// std::nullptr otherwise. -llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { - for (auto &I : *BB) { -auto *II = dyn_cast(&I); -if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) - return II; - } - return nullptr; -} - } // namespace -llvm::CallBase * -CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, -llvm::Value *ParentToken) { - llvm::Value *bundleArgs[] = {ParentToken}; - llvm::OperandBundleDef OB("convergencectrl", bundleArgs); - auto Output = llvm::CallBase::addOperandBundle( - Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); - Input->replaceAllUsesWith(Output); - Input->eraseFromParent(); - return Output; -} - -llvm::IntrinsicInst * -CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken) { - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto CB = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_loop, {}, {}); - Builder.restoreIP(IP); - - auto I = addConvergenceControlToken(CB, ParentToken); - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { - auto *BB = &F->getEntryBlock(); - auto *token = getConvergenceToken(BB); - if (token) -return token; - - // Adding a convergence token requires the function to be marked as - // convergent. - F->setConvergent(); - - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto I = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_entry, {}, {}); - assert(isa(I)); - Builder.restoreIP(IP); - - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { - assert(LI != nullptr); - - auto *token = getConvergenceToken(LI->getHeader()); - if (token) -return token; - - llvm::IntrinsicInst *PII = - LI->getParent() - ? emitConvergenceLoopToken( -LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent())) - : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); - - return emitConvergenceLoopToken(LI->getHeader(), PII); -} - -llvm::CallBase * -CodeGenFunction::addControlledConvergenceToken(llvm::CallBase *Input) { - llvm::Value *ParentToken = - LoopStack.hasInfo
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
@@ -4987,7 +4990,11 @@ class CodeGenFunction : public CodeGenTypeCache { const llvm::Twine &Name = ""); // Adds a convergence_ctrl token to |Input| and emits the required parent // convergence instructions. - llvm::CallBase *addControlledConvergenceToken(llvm::CallBase *Input); + template + CallType *addControlledConvergenceToken(CallType *Input) { +return dyn_cast( Keenuts wrote: Forgot to fix the use, and yes, should be a cast. Bad habits... https://github.com/llvm/llvm-project/pull/88918 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/88918 From 94d76dcdfac88d1d50fe705406c0280c33766e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 15 Apr 2024 17:05:40 +0200 Subject: [PATCH 1/2] [clang][SPIR-V] Always add convervence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #80680 added bits in the codegen to lazily add convergence intrinsics when required. This logic relied on the LoopStack. The issue is when parsing the condition, the loopstack doesn't yet reflect the correct values, as expected since we are not yet in the loop. However, convergence tokens should sometimes already be available. The solution which seemed the simplest is to greedily generate the tokens when we generate SPIR-V. Fixes #88144 Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 88 + clang/lib/CodeGen/CGCall.cpp | 3 + clang/lib/CodeGen/CGStmt.cpp | 94 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 9 ++ clang/lib/CodeGen/CodeGenFunction.h | 9 +- .../builtins/RWBuffer-constructor.hlsl| 1 - .../CodeGenHLSL/convergence/do.while.hlsl | 90 + clang/test/CodeGenHLSL/convergence/for.hlsl | 121 ++ clang/test/CodeGenHLSL/convergence/while.hlsl | 119 + 9 files changed, 445 insertions(+), 89 deletions(-) create mode 100644 clang/test/CodeGenHLSL/convergence/do.while.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/for.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/while.hlsl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index df7502b8def531..f5d40a1555fcb5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1133,91 +1133,8 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; -// Returns the first convergence entry/loop/anchor instruction found in |BB|. -// std::nullptr otherwise. -llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { - for (auto &I : *BB) { -auto *II = dyn_cast(&I); -if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) - return II; - } - return nullptr; -} - } // namespace -llvm::CallBase * -CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, -llvm::Value *ParentToken) { - llvm::Value *bundleArgs[] = {ParentToken}; - llvm::OperandBundleDef OB("convergencectrl", bundleArgs); - auto Output = llvm::CallBase::addOperandBundle( - Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); - Input->replaceAllUsesWith(Output); - Input->eraseFromParent(); - return Output; -} - -llvm::IntrinsicInst * -CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken) { - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto CB = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_loop, {}, {}); - Builder.restoreIP(IP); - - auto I = addConvergenceControlToken(CB, ParentToken); - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { - auto *BB = &F->getEntryBlock(); - auto *token = getConvergenceToken(BB); - if (token) -return token; - - // Adding a convergence token requires the function to be marked as - // convergent. - F->setConvergent(); - - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto I = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_entry, {}, {}); - assert(isa(I)); - Builder.restoreIP(IP); - - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { - assert(LI != nullptr); - - auto *token = getConvergenceToken(LI->getHeader()); - if (token) -return token; - - llvm::IntrinsicInst *PII = - LI->getParent() - ? emitConvergenceLoopToken( -LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent())) - : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); - - return emitConvergenceLoopToken(LI->getHeader(), PII); -} - -llvm::CallBase * -CodeGenFunction::addControlledConvergenceToken(llvm::CallBase *Input) { - llvm::Value *ParentToken = - LoopStack.hasInfo() - ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) - : getOrEmitConvergenceEntryToken(Input->getFunction()); - return addConvergenceControlToken(Input, ParentToken); -} - BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -18306,12 +18223,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef{Op0}, nullptr, "dx.rsqrt"
[clang] [clang][SPIR-V] Set AS for the SPIR-V logical triple (PR #88939)
Keenuts wrote: Thanks all! Agree with Bogner, let's unblock the tests first. As for the address space for globals, this isn't something we have looked into yet, so I'd be in favor of keeping the same behavior as the SPIRN flavor until we have a reason to diverge (as in "thought about this issue" 😊) https://github.com/llvm/llvm-project/pull/88939 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
@@ -1109,6 +1124,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, llvm::BasicBlock *CondBlock = CondDest.getBlock(); EmitBlock(CondBlock); + if (getTarget().getTriple().isSPIRVLogical()) Keenuts wrote: Right, something at the module level, which hides this into a more genering function, like the `shouldEmitRTTI`. Will fix. https://github.com/llvm/llvm-project/pull/88918 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
https://github.com/Keenuts updated https://github.com/llvm/llvm-project/pull/88918 From 94d76dcdfac88d1d50fe705406c0280c33766e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 15 Apr 2024 17:05:40 +0200 Subject: [PATCH 1/3] [clang][SPIR-V] Always add convervence intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #80680 added bits in the codegen to lazily add convergence intrinsics when required. This logic relied on the LoopStack. The issue is when parsing the condition, the loopstack doesn't yet reflect the correct values, as expected since we are not yet in the loop. However, convergence tokens should sometimes already be available. The solution which seemed the simplest is to greedily generate the tokens when we generate SPIR-V. Fixes #88144 Signed-off-by: Nathan Gauër --- clang/lib/CodeGen/CGBuiltin.cpp | 88 + clang/lib/CodeGen/CGCall.cpp | 3 + clang/lib/CodeGen/CGStmt.cpp | 94 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 9 ++ clang/lib/CodeGen/CodeGenFunction.h | 9 +- .../builtins/RWBuffer-constructor.hlsl| 1 - .../CodeGenHLSL/convergence/do.while.hlsl | 90 + clang/test/CodeGenHLSL/convergence/for.hlsl | 121 ++ clang/test/CodeGenHLSL/convergence/while.hlsl | 119 + 9 files changed, 445 insertions(+), 89 deletions(-) create mode 100644 clang/test/CodeGenHLSL/convergence/do.while.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/for.hlsl create mode 100644 clang/test/CodeGenHLSL/convergence/while.hlsl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index df7502b8def531..f5d40a1555fcb5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1133,91 +1133,8 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; -// Returns the first convergence entry/loop/anchor instruction found in |BB|. -// std::nullptr otherwise. -llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) { - for (auto &I : *BB) { -auto *II = dyn_cast(&I); -if (II && isConvergenceControlIntrinsic(II->getIntrinsicID())) - return II; - } - return nullptr; -} - } // namespace -llvm::CallBase * -CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input, -llvm::Value *ParentToken) { - llvm::Value *bundleArgs[] = {ParentToken}; - llvm::OperandBundleDef OB("convergencectrl", bundleArgs); - auto Output = llvm::CallBase::addOperandBundle( - Input, llvm::LLVMContext::OB_convergencectrl, OB, Input); - Input->replaceAllUsesWith(Output); - Input->eraseFromParent(); - return Output; -} - -llvm::IntrinsicInst * -CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB, - llvm::Value *ParentToken) { - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto CB = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_loop, {}, {}); - Builder.restoreIP(IP); - - auto I = addConvergenceControlToken(CB, ParentToken); - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { - auto *BB = &F->getEntryBlock(); - auto *token = getConvergenceToken(BB); - if (token) -return token; - - // Adding a convergence token requires the function to be marked as - // convergent. - F->setConvergent(); - - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(&BB->front()); - auto I = Builder.CreateIntrinsic( - llvm::Intrinsic::experimental_convergence_entry, {}, {}); - assert(isa(I)); - Builder.restoreIP(IP); - - return cast(I); -} - -llvm::IntrinsicInst * -CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) { - assert(LI != nullptr); - - auto *token = getConvergenceToken(LI->getHeader()); - if (token) -return token; - - llvm::IntrinsicInst *PII = - LI->getParent() - ? emitConvergenceLoopToken( -LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent())) - : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent()); - - return emitConvergenceLoopToken(LI->getHeader(), PII); -} - -llvm::CallBase * -CodeGenFunction::addControlledConvergenceToken(llvm::CallBase *Input) { - llvm::Value *ParentToken = - LoopStack.hasInfo() - ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo()) - : getOrEmitConvergenceEntryToken(Input->getFunction()); - return addConvergenceControlToken(Input, ParentToken); -} - BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. @@ -18306,12 +18223,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef{Op0}, nullptr, "dx.rsqrt"
[clang] [clang][SPIR-V] Always add convervence intrinsics (PR #88918)
@@ -1109,6 +1124,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, llvm::BasicBlock *CondBlock = CondDest.getBlock(); EmitBlock(CondBlock); + if (getTarget().getTriple().isSPIRVLogical()) Keenuts wrote: Changed those for a module-level function which hides the target specific bit behind a more generic check. Let me know if that's not what you had in mind! https://github.com/llvm/llvm-project/pull/88918 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits