[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)
https://github.com/owenca created https://github.com/llvm/llvm-project/pull/107531 This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports commit 616a8ce6203d8c7569266bfaf163e74df1f440ad. >From 4d8827c9b63ecbc9de984e19621cafad025e4380 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Thu, 5 Sep 2024 23:59:11 -0700 Subject: [PATCH] [clang-format] Correctly annotate braces in macro definition (#107352) This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports commit 616a8ce6203d8c7569266bfaf163e74df1f440ad. --- clang/lib/Format/UnwrappedLineParser.cpp | 6 -- clang/unittests/Format/TokenAnnotatorTest.cpp | 15 +++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 60e65aaa83e9c1..7813d86ff0ea10 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)); ProbablyBracedList = - ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren)); + ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || + NextTok->is(tok::l_paren))); // If there is a comma, semicolon or right paren after the closing // brace, we assume this is a braced initializer list. @@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = NextTok->isNot(tok::l_square); } - // Cpp macro definition body containing nonempty braced list or block: + // Cpp macro definition body that is a nonempty braced list or block: if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && + !FormatTok->Previous && NextTok->is(tok::eof) && // A statement can end with only `;` (simple statement), a block // closing brace (compound statement), or `:` (label statement). // If PrevTok is a block opening brace, Tok ends an empty block. diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index db580d70058811..dd58fbc70cb91e 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3219,6 +3219,21 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace); EXPECT_BRACE_KIND(Tokens[11], BK_Block); + Tokens = annotate("#define MACRO\\\n" +" struct hash {\\\n" +"void f() { return; } \\\n" +" };"); + ASSERT_EQ(Tokens.size(), 20u) << Tokens; + EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace); + EXPECT_BRACE_KIND(Tokens[8], BK_Block); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName); + EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); + EXPECT_BRACE_KIND(Tokens[13], BK_Block); + EXPECT_BRACE_KIND(Tokens[16], BK_Block); + EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace); + EXPECT_BRACE_KIND(Tokens[17], BK_Block); + Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)
llvmbot wrote: @llvm/pr-subscribers-clang-format Author: Owen Pan (owenca) Changes This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports commit 616a8ce6203d8c7569266bfaf163e74df1f440ad. --- Full diff: https://github.com/llvm/llvm-project/pull/107531.diff 2 Files Affected: - (modified) clang/lib/Format/UnwrappedLineParser.cpp (+4-2) - (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+15) ``diff diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 60e65aaa83e9c1..7813d86ff0ea10 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)); ProbablyBracedList = - ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren)); + ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || + NextTok->is(tok::l_paren))); // If there is a comma, semicolon or right paren after the closing // brace, we assume this is a braced initializer list. @@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = NextTok->isNot(tok::l_square); } - // Cpp macro definition body containing nonempty braced list or block: + // Cpp macro definition body that is a nonempty braced list or block: if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && + !FormatTok->Previous && NextTok->is(tok::eof) && // A statement can end with only `;` (simple statement), a block // closing brace (compound statement), or `:` (label statement). // If PrevTok is a block opening brace, Tok ends an empty block. diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index db580d70058811..dd58fbc70cb91e 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3219,6 +3219,21 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace); EXPECT_BRACE_KIND(Tokens[11], BK_Block); + Tokens = annotate("#define MACRO\\\n" +" struct hash {\\\n" +"void f() { return; } \\\n" +" };"); + ASSERT_EQ(Tokens.size(), 20u) << Tokens; + EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace); + EXPECT_BRACE_KIND(Tokens[8], BK_Block); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName); + EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); + EXPECT_BRACE_KIND(Tokens[13], BK_Block); + EXPECT_BRACE_KIND(Tokens[16], BK_Block); + EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace); + EXPECT_BRACE_KIND(Tokens[17], BK_Block); + Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit); `` https://github.com/llvm/llvm-project/pull/107531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definition (#106662) (PR #107058)
owenca wrote: @tru, I've created #107531 which reverts 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports #107352. https://github.com/llvm/llvm-project/pull/107058 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)
https://github.com/owenca milestoned https://github.com/llvm/llvm-project/pull/107531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/106941 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/106941 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Add TTI support for cpop with LSX (PR #106961)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/106961 >From 456935df7a65147dce6fbb8da8e60094ed647161 Mon Sep 17 00:00:00 2001 From: wanglei Date: Mon, 2 Sep 2024 17:59:38 +0800 Subject: [PATCH] remove debug msg Created using spr 1.3.5-bogner --- llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index 3b227fd7e4345c..5fbc7c734168d1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -86,7 +86,6 @@ const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const { TargetTransformInfo::PopcntSupportKind LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - llvm::errs() << "XXX: " << TyWidth << "\n"; return ST->hasExtLSX() ? TTI::PSK_FastHardware : TTI::PSK_Software; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Add TTI support for cpop with LSX (PR #106961)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/106961 >From 456935df7a65147dce6fbb8da8e60094ed647161 Mon Sep 17 00:00:00 2001 From: wanglei Date: Mon, 2 Sep 2024 17:59:38 +0800 Subject: [PATCH] remove debug msg Created using spr 1.3.5-bogner --- llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index 3b227fd7e4345c..5fbc7c734168d1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -86,7 +86,6 @@ const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const { TargetTransformInfo::PopcntSupportKind LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - llvm::errs() << "XXX: " << TyWidth << "\n"; return ST->hasExtLSX() ? TTI::PSK_FastHardware : TTI::PSK_Software; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102599 >From 4558e8ed9d3f57c10d626c081bcae87e6d3ce41e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 14:51:41 +0400 Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics This will be needed to continue generating the raw instruction in the flat case. --- llvm/lib/IR/AutoUpgrade.cpp| 13 - llvm/test/Bitcode/amdgcn-atomic.ll | 45 -- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 69dae5e32dbbe8..e6ecb0936a4a07 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4107,13 +4109,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { MDNode *EmptyMD = MDNode::get(F->getContext(), {}); RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { +MDBuilder MDB(F->getContext()); +MDNode *RangeNotPrivate = +MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), +APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); +RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index d642372799f56b..87ca1e3a617ed9 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK:
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ilya-biryukov wrote: We did manage to run another round of testing and it fails, with somewhat familiar module-related issues: ```cpp [third_party/absl/container/internal/compressed_tuple.h:250]:24: error: 'absl::container_internal::CompressedTuple>>::get' from module '//third_party/absl/container:compressed_tuple.third_party/absl/container/internal/compressed_tuple.h' is not present in definition of 'absl::container_internal::CompressedTuple>>' provided earlier 250 | constexpr ElemT&& get() && { ``` I am progressing towards a reproducer, hope to share something early next week. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/107329 >From 22e94e4f30c0b3f4c895e789961bff03db745980 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 3 Sep 2024 21:28:05 -0700 Subject: [PATCH] [ctx_prof] Flattened profile lowering pass --- llvm/include/llvm/ProfileData/ProfileCommon.h | 6 +- .../Instrumentation/PGOCtxProfFlattening.h| 25 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/PGOCtxProfFlattening.cpp | 350 ++ .../flatten-always-removes-instrumentation.ll | 12 + .../CtxProfAnalysis/flatten-and-annotate.ll | 112 ++ 9 files changed, 506 insertions(+), 3 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h index eaab59484c947a..edd8e1f644ad12 100644 --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -79,13 +79,13 @@ class ProfileSummaryBuilder { class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { uint64_t MaxInternalBlockCount = 0; - inline void addEntryCount(uint64_t Count); - inline void addInternalCount(uint64_t Count); - public: InstrProfSummaryBuilder(std::vector Cutoffs) : ProfileSummaryBuilder(std::move(Cutoffs)) {} + void addEntryCount(uint64_t Count); + void addInternalCount(uint64_t Count); + void addRecord(const InstrProfRecord &); std::unique_ptr getSummary(); }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h new file mode 100644 index 00..0eab3aaf6fcad3 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h @@ -0,0 +1,25 @@ +//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file declares the PGOCtxProfFlattening class. +// +//===--===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H + +#include "llvm/IR/PassManager.h" +namespace llvm { + +class PGOCtxProfFlatteningPass +: public PassInfoMixin { +public: + explicit PGOCtxProfFlatteningPass() = default; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a22abed8051a11..d87e64eff08966 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -198,6 +198,7 @@ #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 1fd7ef929c87d5..38297dc02b8be6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -76,6 +76,7 @@ #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d6067089c6b5c1..2b0624cb9874da 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("ctx-instr-gen", PGOInstrumentationGen(PGOInstrum
[llvm-branch-commits] [llvm] [ctx_prof] Insert the ctx prof flattener after the module inliner (PR #107499)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/107499 >From 3cd88ecfa05613ce4f8e4d9671ca3e1d4169fe82 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 5 Sep 2024 12:52:56 -0700 Subject: [PATCH] [ctx_prof] Insert the ctx prof flattener after the module inliner --- llvm/lib/Passes/PassBuilderPipelines.cpp | 18 +- llvm/lib/Transforms/IPO/ModuleInliner.cpp| 6 -- llvm/test/Analysis/CtxProfAnalysis/inline.ll | 17 + llvm/test/Other/opt-hot-cold-split.ll| 2 +- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 38297dc02b8be6..f9b5f584e00c07 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1017,6 +1017,11 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, IP.EnableDeferral = false; MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); + if (!UseCtxProfile.empty()) { +MPM.addPass(GlobalOptPass()); +MPM.addPass(GlobalDCEPass()); +MPM.addPass(PGOCtxProfFlatteningPass()); + } MPM.addPass(createModuleToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase), @@ -1744,11 +1749,14 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( MPM.addPass(GlobalDCEPass()); return MPM; } - - // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline( - Level, ThinOrFullLTOPhase::ThinLTOPostLink)); - + if (!UseCtxProfile.empty()) { +MPM.addPass( +buildModuleInlinerPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink)); + } else { +// Add the core simplification pipeline. +MPM.addPass(buildModuleSimplificationPipeline( +Level, ThinOrFullLTOPhase::ThinLTOPostLink)); + } // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline( Level, ThinOrFullLTOPhase::ThinLTOPostLink)); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index b7e4531c8e390d..542c319b880747 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -241,8 +241,10 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // the post-inline cleanup and the next DevirtSCCRepeatedPass // iteration because the next iteration may not happen and we may // miss inlining it. - if (tryPromoteCall(*ICB)) -NewCallee = ICB->getCalledFunction(); + // FIXME: enable for ctxprof. + if (!CtxProf) +if (tryPromoteCall(*ICB)) + NewCallee = ICB->getCalledFunction(); } if (NewCallee) if (!NewCallee->isDeclaration()) diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll index 875bc4938653b9..9381418c4e3f12 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll @@ -31,6 +31,23 @@ ; CHECK-NEXT:%call2 = call i32 @a(i32 %x) #1 ; CHECK-NEXT:br label %exit +; Make sure the postlink thinlto pipeline is aware of ctxprof +; RUN: opt -passes='thinlto' -use-ctx-profile=%t/profile.ctxprofdata \ +; RUN: %t/module.ll -S -o - | FileCheck %s --check-prefix=PIPELINE + +; PIPELINE-LABEL: define i32 @entrypoint +; PIPELINE-SAME: !prof ![[ENTRYPOINT_COUNT:[0-9]+]] +; PIPELINE-LABEL: loop.i: +; PIPELINE: br i1 %cond.i, label %loop.i, label %exit, !prof ![[LOOP_BW_INL:[0-9]+]] +; PIPELINE-LABEL: define i32 @a +; PIPELINE-LABEL: loop: +; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]] + +; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10} +; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop) +; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2} +; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop) +; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8} ;--- module.ll define i32 @entrypoint(i32 %x) !guid !0 { diff --git a/llvm/test/Other/opt-hot-cold-split.ll b/llvm/test/Other/opt-hot-cold-split.ll index 21c713d35bb746..cd290dcc306570 100644 --- a/llvm/test/Other/opt-hot-cold-split.ll +++ b/llvm/test/Other/opt-hot-cold-split.ll @@ -2,7 +2,7 @@ ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='thinlto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-POSTLINK-Os -;
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
zygoloid wrote: > what the code does is: when we write a on-disk hash table, try to write the > imported merged hash table in the same process so that we don't need to read > these tables again. However, in line 329 the function will try to omit the > data from imported table with the same key which already emitted by the > current module file. This is the root cause of the problem. It's been a while since I looked at this, but as I recall, a fundamental assumption of MultiObDiskHashTable is that if we have a lookup result for a key K in the current file, that result supersedes any results from dependency files. So lookup won't look in those files if we have a local result (they are overridden) and merging doesn't take results from those files either. So I think the problem probably is that when we form a local result, we need to (but presumably don't) add all the imported results with the same key to the local result. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add pseudo probe inline tree to YAML profile (PR #107137)
aaupov wrote: Update on profile size reduction: - What I reported as a baseline (221M) is with pseudo probes but no inline tree (produced by BOLT trunk). - What I reported as new size (404M) is with pseudo probes and inline tree encoded for each top-level function (this diff at [85c8e9e](https://github.com/llvm/llvm-project/pull/107137/commits/85c8e9e851ca26e853b57504b18a2816cc4a5d67)) - The proper **baseline** is the **profile without pseudo probe information** (61M). - With better pseudo probe encoding, I've reduced the size of profile without inline tree to 117M. - With better inline tree encoding, the total size is 174Mb (2.85x). Compressed is down to 24M (1.2x). https://github.com/llvm/llvm-project/pull/107137 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)
https://github.com/david-xl approved this pull request. https://github.com/llvm/llvm-project/pull/107329 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)
mtrofin wrote: ### Merge activity * **Sep 6, 4:40 PM EDT**: @mtrofin started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/107329). https://github.com/llvm/llvm-project/pull/107329 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/107659 Update the GPU to NVVM lowerings to correctly propagate range information on IDs and dimension queries, etiher from known_{block,grid}_size attributes or from `upperBound` annotations on the operations themselves. >From f50dcd32b4ce02dc5046f8a3df3628b4b2096030 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Fri, 6 Sep 2024 23:45:52 + Subject: [PATCH] [mlir][GPU] Plumb range information through the NVVM lowterings Update the GPU to NVVM lowerings to correctly propagate range information on IDs and dimension queries, etiher from known_{block,grid}_size attributes or from `upperBound` annotations on the operations themselves. --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 283 +- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp| 50 +++- .../Dialect/NVVM/LLVMIRToNVVMTranslation.cpp | 1 + .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 18 +- mlir/test/Target/LLVMIR/Import/nvvmir.ll | 3 + mlir/test/Target/LLVMIR/nvvmir.mlir | 7 +- 6 files changed, 207 insertions(+), 155 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 709dd922b8fa2f..66ac9f289d233b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -86,8 +86,8 @@ class NVVM_Op traits = []> : LLVM_OpBase { } -/// Base class that defines BasicPtxBuilderOpInterface. -class NVVM_PTXBuilder_Op traits = [DeclareOpInterfaceMethods]> : LLVM_OpBase { } @@ -123,52 +123,67 @@ class NVVM_SpecialRegisterOp traits = []> : let assemblyFormat = "attr-dict `:` type($res)"; } +class NVVM_SpecialRangeableRegisterOp traits = []> : + NVVM_SpecialRegisterOp { + let arguments = (ins OptionalAttr:$range); + let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)"; + let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda; + let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda; + + // Backwards-compatibility builder for an unspecified range. + let builders = [ +OpBuilder<(ins "Type":$resultType), [{ + build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{}); +}]> + ]; +} + //===--===// // Lane index and range -def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">; -def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">; +def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">; +def NVVM_WarpSizeOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">; //===--===// // Thread index and range -def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">; -def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">; -def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">; -def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">; -def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">; -def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">; +def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">; +def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">; +def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">; +def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">; +def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">; +def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">; //===--===// // Block index and range -def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">; -def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">; -def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">; -def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">; -def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">; -def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">; +def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">; +def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">; +def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">; +def NVVM_GridDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">; +def NVVM_GridDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">; +def NVVM_GridDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">; //===--===// // CTA Cluster index and range -def NVVM_ClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.x">;
[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)
krzysz00 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/107659?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#107659** https://app.graphite.dev/github/pr/llvm/llvm-project/107659?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#107658** https://app.graphite.dev/github/pr/llvm/llvm-project/107658?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @krzysz00 and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/107659 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)
https://github.com/krzysz00 ready_for_review https://github.com/llvm/llvm-project/pull/107659 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)
llvmbot wrote: @llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Krzysztof Drewniak (krzysz00) Changes Update the GPU to NVVM lowerings to correctly propagate range information on IDs and dimension queries, etiher from known_{block,grid}_size attributes or from `upperBound` annotations on the operations themselves. --- Patch is 37.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107659.diff 6 Files Affected: - (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+149-134) - (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+34-16) - (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp (+1) - (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+15-3) - (modified) mlir/test/Target/LLVMIR/Import/nvvmir.ll (+3) - (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (+5-2) ``diff diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 709dd922b8fa2f..66ac9f289d233b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -86,8 +86,8 @@ class NVVM_Op traits = []> : LLVM_OpBase { } -/// Base class that defines BasicPtxBuilderOpInterface. -class NVVM_PTXBuilder_Op traits = [DeclareOpInterfaceMethods]> : LLVM_OpBase { } @@ -123,52 +123,67 @@ class NVVM_SpecialRegisterOp traits = []> : let assemblyFormat = "attr-dict `:` type($res)"; } +class NVVM_SpecialRangeableRegisterOp traits = []> : + NVVM_SpecialRegisterOp { + let arguments = (ins OptionalAttr:$range); + let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)"; + let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda; + let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda; + + // Backwards-compatibility builder for an unspecified range. + let builders = [ +OpBuilder<(ins "Type":$resultType), [{ + build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{}); +}]> + ]; +} + //===--===// // Lane index and range -def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">; -def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">; +def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">; +def NVVM_WarpSizeOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">; //===--===// // Thread index and range -def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">; -def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">; -def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">; -def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">; -def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">; -def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">; +def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">; +def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">; +def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">; +def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">; +def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">; +def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">; //===--===// // Block index and range -def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">; -def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">; -def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">; -def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">; -def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">; -def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">; +def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">; +def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">; +def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">; +def NVVM_GridDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">; +def NVVM_GridDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">; +def NVVM_GridDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">; //===--===// // CTA Cluster index and range -def NVVM_ClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.x">; -def NVVM_ClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.y">; -def NVVM_ClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.z">; -def NVVM_ClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ncl
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/104252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)
@@ -0,0 +1,102 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +declare void @scalar_user(float) +declare void @vector_user(<4 x float>) + +define void @loadfloats() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding + ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; The temporary casts should all have been cleaned up + ; CHECK-NOT: %dx.cast_handle + + ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) + %data0 = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0) + + ; The extract order depends on the users, so don't enforce that here. + ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 0 + %data0_0 = extractelement <4 x float> %data0, i32 0 + ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 2 + %data0_2 = extractelement <4 x float> %data0, i32 2 + + ; If all of the uses are extracts, we skip creating a vector + ; CHECK-NOT: insertelement + call void @scalar_user(float %data0_0) + call void @scalar_user(float %data0_2) + + ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef) + %data4 = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4) + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + call void @vector_user(<4 x float> %data4) + + ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef) + %data12 = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12) + + ; CHECK: [[DATA12_3:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA12]], 3 + %data12_3 = extractelement <4 x float> %data12, i32 3 bogner wrote: There is now a test with a non-immediate access (and a bunch of logic so that that matches DXC...) https://github.com/llvm/llvm-project/pull/104252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)
@@ -0,0 +1,102 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +declare void @scalar_user(float) +declare void @vector_user(<4 x float>) + +define void @loadfloats() { + ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding + ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] + %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; The temporary casts should all have been cleaned up + ; CHECK-NOT: %dx.cast_handle + + ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) + %data0 = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0) + + ; The extract order depends on the users, so don't enforce that here. + ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 0 + %data0_0 = extractelement <4 x float> %data0, i32 0 + ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 2 + %data0_2 = extractelement <4 x float> %data0, i32 2 + + ; If all of the uses are extracts, we skip creating a vector + ; CHECK-NOT: insertelement + call void @scalar_user(float %data0_0) + call void @scalar_user(float %data0_2) + + ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef) + %data4 = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4) + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + call void @vector_user(<4 x float> %data4) + + ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef) + %data12 = call <4 x float> @llvm.dx.typedBufferLoad( bogner wrote: This changed in the latest ;) https://github.com/llvm/llvm-project/pull/104252 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] wip: [DirectX] Return a struct from llvm.dx.typedBufferLoad (PR #106645)
https://github.com/bogner closed https://github.com/llvm/llvm-project/pull/106645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] wip: [DirectX] Return a struct from llvm.dx.typedBufferLoad (PR #106645)
bogner wrote: This isn't the direction we ended up going. https://github.com/llvm/llvm-project/pull/106645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] a7a4eb8 - Revert "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form …"
Author: dyung Date: 2024-09-06T19:52:24-07:00 New Revision: a7a4eb83468683ae6115bf0c153f54cf46eec45e URL: https://github.com/llvm/llvm-project/commit/a7a4eb83468683ae6115bf0c153f54cf46eec45e DIFF: https://github.com/llvm/llvm-project/commit/a7a4eb83468683ae6115bf0c153f54cf46eec45e.diff LOG: Revert "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form …" This reverts commit 2cb4d1b1bd7bde2724b79976e859684bd3f5c771. Added: Modified: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll Removed: diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index f966ccaa838422..3ca3818938fd26 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2186,12 +2186,6 @@ class LSRInstance { /// Induction variables that were generated and inserted by the SCEV Expander. SmallVector ScalarEvolutionIVs; - // Inserting instructions in the loop and using them as PHI's input could - // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the - // corresponding incoming block is not loop exiting). So collect all such - // instructions to form LCSSA for them later. - SmallSetVector InsertedNonLCSSAInsts; - void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -2282,9 +2276,9 @@ class LSRInstance { SmallVectorImpl &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts) const; void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts) const; void ImplementSolution(const SmallVectorImpl &Solution); public: @@ -5864,11 +5858,17 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, -const LSRFixup &LF, const Formula &F, -SmallVectorImpl &DeadInsts) { +void LSRInstance::RewriteForPHI( +PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, +SmallVectorImpl &DeadInsts) const { DenseMap Inserted; + // Inserting instructions in the loop and using them as PHI's input could + // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the + // corresponding incoming block is not loop exiting). So collect all such + // instructions to form LCSSA for them later. + SmallVector InsertedNonLCSSAInsts; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { bool needUpdateFixups = false; @@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, // the inserted value. if (auto *I = dyn_cast(FullV)) if (L->contains(I) && !L->contains(BB)) -InsertedNonLCSSAInsts.insert(I); +InsertedNonLCSSAInsts.push_back(I); PN->setIncomingValue(i, FullV); Pair.first->second = FullV; @@ -5983,6 +5983,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, } } } + + formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE); } /// Emit instructions for the leading candidate expression for this LSRUse (this @@ -5990,7 +5992,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast(LF.UserInst)) { @@ -6078,9 +6080,6 @@ void LSRInstance::ImplementSolution( Changed = true; } - auto InsertedInsts = InsertedNonLCSSAInsts.take