[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)

2024-09-06 Thread Owen Pan via llvm-branch-commits

https://github.com/owenca created 
https://github.com/llvm/llvm-project/pull/107531

This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports 
commit 616a8ce6203d8c7569266bfaf163e74df1f440ad.

>From 4d8827c9b63ecbc9de984e19621cafad025e4380 Mon Sep 17 00:00:00 2001
From: Owen Pan 
Date: Thu, 5 Sep 2024 23:59:11 -0700
Subject: [PATCH] [clang-format] Correctly annotate braces in macro definition
 (#107352)

This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports
commit 616a8ce6203d8c7569266bfaf163e74df1f440ad.
---
 clang/lib/Format/UnwrappedLineParser.cpp  |  6 --
 clang/unittests/Format/TokenAnnotatorTest.cpp | 15 +++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp 
b/clang/lib/Format/UnwrappedLineParser.cpp
index 60e65aaa83e9c1..7813d86ff0ea10 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool 
ExpectClassBody) {
 NextTok->isOneOf(Keywords.kw_of, 
Keywords.kw_in,
  Keywords.kw_as));
   ProbablyBracedList =
-  ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+  ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+   NextTok->is(tok::l_paren)));
 
   // If there is a comma, semicolon or right paren after the closing
   // brace, we assume this is a braced initializer list.
@@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool 
ExpectClassBody) {
 ProbablyBracedList = NextTok->isNot(tok::l_square);
   }
 
-  // Cpp macro definition body containing nonempty braced list or 
block:
+  // Cpp macro definition body that is a nonempty braced list or block:
   if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
+  !FormatTok->Previous && NextTok->is(tok::eof) &&
   // A statement can end with only `;` (simple statement), a block
   // closing brace (compound statement), or `:` (label statement).
   // If PrevTok is a block opening brace, Tok ends an empty block.
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp 
b/clang/unittests/Format/TokenAnnotatorTest.cpp
index db580d70058811..dd58fbc70cb91e 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3219,6 +3219,21 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
   EXPECT_BRACE_KIND(Tokens[11], BK_Block);
 
+  Tokens = annotate("#define MACRO\\\n"
+"  struct hash {\\\n"
+"void f() { return; } \\\n"
+"  };");
+  ASSERT_EQ(Tokens.size(), 20u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace);
+  EXPECT_BRACE_KIND(Tokens[8], BK_Block);
+  EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace);
+  EXPECT_BRACE_KIND(Tokens[13], BK_Block);
+  EXPECT_BRACE_KIND(Tokens[16], BK_Block);
+  EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace);
+  EXPECT_BRACE_KIND(Tokens[17], BK_Block);
+
   Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}");
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)

2024-09-06 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-format

Author: Owen Pan (owenca)


Changes

This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports 
commit 616a8ce6203d8c7569266bfaf163e74df1f440ad.

---
Full diff: https://github.com/llvm/llvm-project/pull/107531.diff


2 Files Affected:

- (modified) clang/lib/Format/UnwrappedLineParser.cpp (+4-2) 
- (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+15) 


``diff
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp 
b/clang/lib/Format/UnwrappedLineParser.cpp
index 60e65aaa83e9c1..7813d86ff0ea10 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool 
ExpectClassBody) {
 NextTok->isOneOf(Keywords.kw_of, 
Keywords.kw_in,
  Keywords.kw_as));
   ProbablyBracedList =
-  ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+  ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+   NextTok->is(tok::l_paren)));
 
   // If there is a comma, semicolon or right paren after the closing
   // brace, we assume this is a braced initializer list.
@@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool 
ExpectClassBody) {
 ProbablyBracedList = NextTok->isNot(tok::l_square);
   }
 
-  // Cpp macro definition body containing nonempty braced list or 
block:
+  // Cpp macro definition body that is a nonempty braced list or block:
   if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
+  !FormatTok->Previous && NextTok->is(tok::eof) &&
   // A statement can end with only `;` (simple statement), a block
   // closing brace (compound statement), or `:` (label statement).
   // If PrevTok is a block opening brace, Tok ends an empty block.
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp 
b/clang/unittests/Format/TokenAnnotatorTest.cpp
index db580d70058811..dd58fbc70cb91e 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3219,6 +3219,21 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
   EXPECT_BRACE_KIND(Tokens[11], BK_Block);
 
+  Tokens = annotate("#define MACRO\\\n"
+"  struct hash {\\\n"
+"void f() { return; } \\\n"
+"  };");
+  ASSERT_EQ(Tokens.size(), 20u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace);
+  EXPECT_BRACE_KIND(Tokens[8], BK_Block);
+  EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace);
+  EXPECT_BRACE_KIND(Tokens[13], BK_Block);
+  EXPECT_BRACE_KIND(Tokens[16], BK_Block);
+  EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace);
+  EXPECT_BRACE_KIND(Tokens[17], BK_Block);
+
   Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}");
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit);

``




https://github.com/llvm/llvm-project/pull/107531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Correctly annotate braces in macro definition (#106662) (PR #107058)

2024-09-06 Thread Owen Pan via llvm-branch-commits

owenca wrote:

@tru, I've created #107531 which reverts 
2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports #107352.

https://github.com/llvm/llvm-project/pull/107058
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-format] Correctly annotate braces in macro definition (#107352) (PR #107531)

2024-09-06 Thread Owen Pan via llvm-branch-commits

https://github.com/owenca milestoned 
https://github.com/llvm/llvm-project/pull/107531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)

2024-09-06 Thread via llvm-branch-commits

https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/106941


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)

2024-09-06 Thread via llvm-branch-commits

https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/106941


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch] Add TTI support for cpop with LSX (PR #106961)

2024-09-06 Thread via llvm-branch-commits

https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/106961

>From 456935df7a65147dce6fbb8da8e60094ed647161 Mon Sep 17 00:00:00 2001
From: wanglei 
Date: Mon, 2 Sep 2024 17:59:38 +0800
Subject: [PATCH] remove debug msg

Created using spr 1.3.5-bogner
---
 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp 
b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 3b227fd7e4345c..5fbc7c734168d1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -86,7 +86,6 @@ const char *LoongArchTTIImpl::getRegisterClassName(unsigned 
ClassID) const {
 TargetTransformInfo::PopcntSupportKind
 LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  llvm::errs() << "XXX: " << TyWidth << "\n";
   return ST->hasExtLSX() ? TTI::PSK_FastHardware : TTI::PSK_Software;
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch] Add TTI support for cpop with LSX (PR #106961)

2024-09-06 Thread via llvm-branch-commits

https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/106961

>From 456935df7a65147dce6fbb8da8e60094ed647161 Mon Sep 17 00:00:00 2001
From: wanglei 
Date: Mon, 2 Sep 2024 17:59:38 +0800
Subject: [PATCH] remove debug msg

Created using spr 1.3.5-bogner
---
 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp 
b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 3b227fd7e4345c..5fbc7c734168d1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -86,7 +86,6 @@ const char *LoongArchTTIImpl::getRegisterClassName(unsigned 
ClassID) const {
 TargetTransformInfo::PopcntSupportKind
 LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  llvm::errs() << "XXX: " << TyWidth << "\n";
   return ST->hasExtLSX() ? TTI::PSK_FastHardware : TTI::PSK_Software;
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-09-06 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102599

>From 4558e8ed9d3f57c10d626c081bcae87e6d3ce41e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 14:51:41 +0400
Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading
 atomic intrinsics

This will be needed to continue generating the raw instruction in the flat case.
---
 llvm/lib/IR/AutoUpgrade.cpp| 13 -
 llvm/test/Bitcode/amdgcn-atomic.ll | 45 --
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 69dae5e32dbbe8..e6ecb0936a4a07 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4107,13 +4109,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
   RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index d642372799f56b..87ca1e3a617ed9 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK:

[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)

2024-09-06 Thread Ilya Biryukov via llvm-branch-commits

ilya-biryukov wrote:

We did manage to run another round of testing and it fails, with somewhat 
familiar module-related issues:

```cpp
[third_party/absl/container/internal/compressed_tuple.h:250]:24: error: 
'absl::container_internal::CompressedTuple>>::get' from module 
'//third_party/absl/container:compressed_tuple.third_party/absl/container/internal/compressed_tuple.h'
 is not present in definition of 
'absl::container_internal::CompressedTuple>>' provided earlier
  250 |   constexpr ElemT&& get() && {
```

I am progressing towards a reproducer, hope to share something early next week.

https://github.com/llvm/llvm-project/pull/83237
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)

2024-09-06 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/107329

>From 22e94e4f30c0b3f4c895e789961bff03db745980 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 3 Sep 2024 21:28:05 -0700
Subject: [PATCH] [ctx_prof] Flattened profile lowering pass

---
 llvm/include/llvm/ProfileData/ProfileCommon.h |   6 +-
 .../Instrumentation/PGOCtxProfFlattening.h|  25 ++
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |   1 +
 llvm/lib/Passes/PassRegistry.def  |   1 +
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 350 ++
 .../flatten-always-removes-instrumentation.ll |  12 +
 .../CtxProfAnalysis/flatten-and-annotate.ll   | 112 ++
 9 files changed, 506 insertions(+), 3 deletions(-)
 create mode 100644 
llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
 create mode 100644 
llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll

diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h 
b/llvm/include/llvm/ProfileData/ProfileCommon.h
index eaab59484c947a..edd8e1f644ad12 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -79,13 +79,13 @@ class ProfileSummaryBuilder {
 class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
   uint64_t MaxInternalBlockCount = 0;
 
-  inline void addEntryCount(uint64_t Count);
-  inline void addInternalCount(uint64_t Count);
-
 public:
   InstrProfSummaryBuilder(std::vector Cutoffs)
   : ProfileSummaryBuilder(std::move(Cutoffs)) {}
 
+  void addEntryCount(uint64_t Count);
+  void addInternalCount(uint64_t Count);
+
   void addRecord(const InstrProfRecord &);
   std::unique_ptr getSummary();
 };
diff --git 
a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h 
b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
new file mode 100644
index 00..0eab3aaf6fcad3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
@@ -0,0 +1,25 @@
+//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file declares the PGOCtxProfFlattening class.
+//
+//===--===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+
+#include "llvm/IR/PassManager.h"
+namespace llvm {
+
+class PGOCtxProfFlatteningPass
+: public PassInfoMixin {
+public:
+  explicit PGOCtxProfFlatteningPass() = default;
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a22abed8051a11..d87e64eff08966 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -198,6 +198,7 @@
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1fd7ef929c87d5..38297dc02b8be6 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d6067089c6b5c1..2b0624cb9874da 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
 MODULE_PASS("ctx-instr-gen",
 PGOInstrumentationGen(PGOInstrum

[llvm-branch-commits] [llvm] [ctx_prof] Insert the ctx prof flattener after the module inliner (PR #107499)

2024-09-06 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/107499

>From 3cd88ecfa05613ce4f8e4d9671ca3e1d4169fe82 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Thu, 5 Sep 2024 12:52:56 -0700
Subject: [PATCH] [ctx_prof] Insert the ctx prof flattener after the module
 inliner

---
 llvm/lib/Passes/PassBuilderPipelines.cpp | 18 +-
 llvm/lib/Transforms/IPO/ModuleInliner.cpp|  6 --
 llvm/test/Analysis/CtxProfAnalysis/inline.ll | 17 +
 llvm/test/Other/opt-hot-cold-split.ll|  2 +-
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 38297dc02b8be6..f9b5f584e00c07 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1017,6 +1017,11 @@ 
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
   IP.EnableDeferral = false;
 
   MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
+  if (!UseCtxProfile.empty()) {
+MPM.addPass(GlobalOptPass());
+MPM.addPass(GlobalDCEPass());
+MPM.addPass(PGOCtxProfFlatteningPass());
+  }
 
   MPM.addPass(createModuleToFunctionPassAdaptor(
   buildFunctionSimplificationPipeline(Level, Phase),
@@ -1744,11 +1749,14 @@ ModulePassManager 
PassBuilder::buildThinLTODefaultPipeline(
 MPM.addPass(GlobalDCEPass());
 return MPM;
   }
-
-  // Add the core simplification pipeline.
-  MPM.addPass(buildModuleSimplificationPipeline(
-  Level, ThinOrFullLTOPhase::ThinLTOPostLink));
-
+  if (!UseCtxProfile.empty()) {
+MPM.addPass(
+buildModuleInlinerPipeline(Level, 
ThinOrFullLTOPhase::ThinLTOPostLink));
+  } else {
+// Add the core simplification pipeline.
+MPM.addPass(buildModuleSimplificationPipeline(
+Level, ThinOrFullLTOPhase::ThinLTOPostLink));
+  }
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(
   Level, ThinOrFullLTOPhase::ThinLTOPostLink));
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp 
b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index b7e4531c8e390d..542c319b880747 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -241,8 +241,10 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
   // the post-inline cleanup and the next DevirtSCCRepeatedPass
   // iteration because the next iteration may not happen and we may
   // miss inlining it.
-  if (tryPromoteCall(*ICB))
-NewCallee = ICB->getCalledFunction();
+  // FIXME: enable for ctxprof.
+  if (!CtxProf)
+if (tryPromoteCall(*ICB))
+  NewCallee = ICB->getCalledFunction();
 }
 if (NewCallee)
   if (!NewCallee->isDeclaration())
diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll 
b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
index 875bc4938653b9..9381418c4e3f12 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
@@ -31,6 +31,23 @@
 ; CHECK-NEXT:%call2 = call i32 @a(i32 %x) #1
 ; CHECK-NEXT:br label %exit
 
+; Make sure the postlink thinlto pipeline is aware of ctxprof
+; RUN: opt -passes='thinlto' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:   %t/module.ll -S -o - | FileCheck %s --check-prefix=PIPELINE
+
+; PIPELINE-LABEL: define i32 @entrypoint
+; PIPELINE-SAME: !prof ![[ENTRYPOINT_COUNT:[0-9]+]]
+; PIPELINE-LABEL: loop.i:
+; PIPELINE: br i1 %cond.i, label %loop.i, label %exit, !prof 
![[LOOP_BW_INL:[0-9]+]]
+; PIPELINE-LABEL: define i32 @a
+; PIPELINE-LABEL: loop:
+; PIPELINE: br i1 %cond, label %loop, label %exit, !prof 
![[LOOP_BW_ORIG:[0-9]+]]
+
+; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10}
+; These are the weights of the inlined @a, where the counters were 2, 100 (2 
for entry, 100 for loop)
+; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2}
+; These are the weights of the un-inlined @a, where the counters were 8, 500 
(8 for entry, 500 for loop)
+; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8}
 
 ;--- module.ll
 define i32 @entrypoint(i32 %x) !guid !0 {
diff --git a/llvm/test/Other/opt-hot-cold-split.ll 
b/llvm/test/Other/opt-hot-cold-split.ll
index 21c713d35bb746..cd290dcc306570 100644
--- a/llvm/test/Other/opt-hot-cold-split.ll
+++ b/llvm/test/Other/opt-hot-cold-split.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto-pre-link' 
-debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s 
-check-prefix=LTO-PRELINK-Os
 ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true 
-passes='thinlto-pre-link' -debug-pass-manager < %s -o /dev/null 2>&1 | 
FileCheck %s -check-prefix=THINLTO-PRELINK-Os
 ; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto' 
-debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s 
-check-prefix=LTO-POSTLINK-Os
-; 

[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)

2024-09-06 Thread Richard Smith via llvm-branch-commits

zygoloid wrote:

> what the code does is: when we write a on-disk hash table, try to write the 
> imported merged hash table in the same process so that we don't need to read 
> these tables again. However, in line 329 the function will try to omit the 
> data from imported table with the same key which already emitted by the 
> current module file. This is the root cause of the problem.

It's been a while since I looked at this, but as I recall, a fundamental 
assumption of MultiObDiskHashTable is that if we have a lookup result for a key 
K in the current file, that result supersedes any results from dependency 
files. So lookup won't look in those files if we have a local result (they are 
overridden) and merging doesn't take results from those files either.

So I think the problem probably is that when we form a local result, we need to 
(but presumably don't) add all the imported results with the same key to the 
local result.

https://github.com/llvm/llvm-project/pull/83237
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Add pseudo probe inline tree to YAML profile (PR #107137)

2024-09-06 Thread Amir Ayupov via llvm-branch-commits

aaupov wrote:

Update on profile size reduction:
- What I reported as a baseline (221M) is with pseudo probes but no inline tree 
(produced by BOLT trunk).
- What I reported as new size (404M) is with pseudo probes and inline tree 
encoded for each top-level function (this diff at 
[85c8e9e](https://github.com/llvm/llvm-project/pull/107137/commits/85c8e9e851ca26e853b57504b18a2816cc4a5d67))
- The proper **baseline** is the **profile without pseudo probe information** 
(61M).
- With better pseudo probe encoding, I've reduced the size of profile without 
inline tree to 117M.
- With better inline tree encoding, the total size is 174Mb (2.85x). Compressed 
is down to 24M (1.2x).

https://github.com/llvm/llvm-project/pull/107137
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)

2024-09-06 Thread David Li via llvm-branch-commits

https://github.com/david-xl approved this pull request.


https://github.com/llvm/llvm-project/pull/107329
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)

2024-09-06 Thread Mircea Trofin via llvm-branch-commits

mtrofin wrote:

### Merge activity

* **Sep 6, 4:40 PM EDT**: @mtrofin started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/107329).


https://github.com/llvm/llvm-project/pull/107329
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)

2024-09-06 Thread Krzysztof Drewniak via llvm-branch-commits

https://github.com/krzysz00 created 
https://github.com/llvm/llvm-project/pull/107659

Update the GPU to NVVM lowerings to correctly propagate range
information on IDs and dimension queries, etiher from
known_{block,grid}_size attributes or from `upperBound` annotations on
the operations themselves.

>From f50dcd32b4ce02dc5046f8a3df3628b4b2096030 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak 
Date: Fri, 6 Sep 2024 23:45:52 +
Subject: [PATCH] [mlir][GPU] Plumb range information through the NVVM
 lowterings

Update the GPU to NVVM lowerings to correctly propagate range
information on IDs and dimension queries, etiher from
known_{block,grid}_size attributes or from `upperBound` annotations on
the operations themselves.
---
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td   | 283 +-
 .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp|  50 +++-
 .../Dialect/NVVM/LLVMIRToNVVMTranslation.cpp  |   1 +
 .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir |  18 +-
 mlir/test/Target/LLVMIR/Import/nvvmir.ll  |   3 +
 mlir/test/Target/LLVMIR/nvvmir.mlir   |   7 +-
 6 files changed, 207 insertions(+), 155 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td 
b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 709dd922b8fa2f..66ac9f289d233b 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -86,8 +86,8 @@ class NVVM_Op traits = []> :
   LLVM_OpBase {
 }
 
-/// Base class that defines BasicPtxBuilderOpInterface. 
-class NVVM_PTXBuilder_Op traits = 
[DeclareOpInterfaceMethods]> :
   LLVM_OpBase {
 }
@@ -123,52 +123,67 @@ class NVVM_SpecialRegisterOp 
traits = []> :
   let assemblyFormat = "attr-dict `:` type($res)";
 }
 
+class NVVM_SpecialRangeableRegisterOp traits = 
[]> :
+  NVVM_SpecialRegisterOp {
+  let arguments = (ins OptionalAttr:$range);
+  let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
+  let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # 
baseLlvmBuilderCoda;
+  let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # 
baseMlirBuilderCoda;
+
+  // Backwards-compatibility builder for an unspecified range.
+  let builders = [
+OpBuilder<(ins "Type":$resultType), [{
+  build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{});
+}]>
+  ];
+}
+
 
//===--===//
 // Lane index and range
-def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">;
-def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">;
+def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">;
+def NVVM_WarpSizeOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">;
 
 
//===--===//
 // Thread index and range
-def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">;
-def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">;
-def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">;
-def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">;
-def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">;
-def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">;
+def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">;
+def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">;
+def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">;
+def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">;
+def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">;
+def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">;
 
 
//===--===//
 // Block index and range
-def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">;
-def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">;
-def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">;
-def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">;
-def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">;
-def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">;
+def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">;
+def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">;
+def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">;
+def NVVM_GridDimXOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">;
+def NVVM_GridDimYOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">;
+def NVVM_GridDimZOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">;
 
 
//===--===//
 // CTA Cluster index and range
-def NVVM_ClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.x">;

[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)

2024-09-06 Thread Krzysztof Drewniak via llvm-branch-commits

krzysz00 wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/107659?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#107659** https://app.graphite.dev/github/pr/llvm/llvm-project/107659?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#107658** https://app.graphite.dev/github/pr/llvm/llvm-project/107658?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @krzysz00 and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/107659
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)

2024-09-06 Thread Krzysztof Drewniak via llvm-branch-commits

https://github.com/krzysz00 ready_for_review 
https://github.com/llvm/llvm-project/pull/107659
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][GPU] Plumb range information through the NVVM lowterings (PR #107659)

2024-09-06 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-llvm

Author: Krzysztof Drewniak (krzysz00)


Changes

Update the GPU to NVVM lowerings to correctly propagate range
information on IDs and dimension queries, etiher from
known_{block,grid}_size attributes or from `upperBound` annotations on
the operations themselves.

---

Patch is 37.14 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/107659.diff


6 Files Affected:

- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+149-134) 
- (modified) mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp (+34-16) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp 
(+1) 
- (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+15-3) 
- (modified) mlir/test/Target/LLVMIR/Import/nvvmir.ll (+3) 
- (modified) mlir/test/Target/LLVMIR/nvvmir.mlir (+5-2) 


``diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td 
b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 709dd922b8fa2f..66ac9f289d233b 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -86,8 +86,8 @@ class NVVM_Op traits = []> :
   LLVM_OpBase {
 }
 
-/// Base class that defines BasicPtxBuilderOpInterface. 
-class NVVM_PTXBuilder_Op traits = 
[DeclareOpInterfaceMethods]> :
   LLVM_OpBase {
 }
@@ -123,52 +123,67 @@ class NVVM_SpecialRegisterOp 
traits = []> :
   let assemblyFormat = "attr-dict `:` type($res)";
 }
 
+class NVVM_SpecialRangeableRegisterOp traits = 
[]> :
+  NVVM_SpecialRegisterOp {
+  let arguments = (ins OptionalAttr:$range);
+  let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
+  let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # 
baseLlvmBuilderCoda;
+  let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # 
baseMlirBuilderCoda;
+
+  // Backwards-compatibility builder for an unspecified range.
+  let builders = [
+OpBuilder<(ins "Type":$resultType), [{
+  build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{});
+}]>
+  ];
+}
+
 
//===--===//
 // Lane index and range
-def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">;
-def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">;
+def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">;
+def NVVM_WarpSizeOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">;
 
 
//===--===//
 // Thread index and range
-def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">;
-def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">;
-def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">;
-def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">;
-def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">;
-def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">;
+def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">;
+def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">;
+def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">;
+def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">;
+def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">;
+def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">;
 
 
//===--===//
 // Block index and range
-def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">;
-def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">;
-def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">;
-def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">;
-def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">;
-def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">;
+def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">;
+def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">;
+def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">;
+def NVVM_GridDimXOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">;
+def NVVM_GridDimYOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">;
+def NVVM_GridDimZOp : 
NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">;
 
 
//===--===//
 // CTA Cluster index and range
-def NVVM_ClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.x">;
-def NVVM_ClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.y">;
-def NVVM_ClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.z">;
-def NVVM_ClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ncl

[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-09-06 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/104252


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-09-06 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner edited 
https://github.com/llvm/llvm-project/pull/104252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-09-06 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/104252


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-09-06 Thread Justin Bogner via llvm-branch-commits


@@ -0,0 +1,102 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+declare void @scalar_user(float)
+declare void @vector_user(<4 x float>)
+
+define void @loadfloats() {
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
217, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0)
+  @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
+  i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.cast_handle
+
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 
68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  %data0 = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
+
+  ; The extract order depends on the users, so don't enforce that here.
+  ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
+  %data0_0 = extractelement <4 x float> %data0, i32 0
+  ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 2
+  %data0_2 = extractelement <4 x float> %data0, i32 2
+
+  ; If all of the uses are extracts, we skip creating a vector
+  ; CHECK-NOT: insertelement
+  call void @scalar_user(float %data0_0)
+  call void @scalar_user(float %data0_2)
+
+  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 
68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
+  %data4 = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
+
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 2
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 3
+  ; CHECK: insertelement <4 x float> undef
+  ; CHECK: insertelement <4 x float>
+  ; CHECK: insertelement <4 x float>
+  ; CHECK: insertelement <4 x float>
+  call void @vector_user(<4 x float> %data4)
+
+  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 
@dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
+  %data12 = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12)
+
+  ; CHECK: [[DATA12_3:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA12]], 3
+  %data12_3 = extractelement <4 x float> %data12, i32 3

bogner wrote:

There is now a test with a non-immediate access (and a bunch of logic so that 
that matches DXC...)

https://github.com/llvm/llvm-project/pull/104252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-09-06 Thread Justin Bogner via llvm-branch-commits


@@ -0,0 +1,102 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+declare void @scalar_user(float)
+declare void @vector_user(<4 x float>)
+
+define void @loadfloats() {
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 
217, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0)
+  @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
+  i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.cast_handle
+
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 
68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  %data0 = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
+
+  ; The extract order depends on the users, so don't enforce that here.
+  ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 0
+  %data0_0 = extractelement <4 x float> %data0, i32 0
+  ; CHECK-DAG: extractvalue %dx.types.ResRet.f32 [[DATA0]], 2
+  %data0_2 = extractelement <4 x float> %data0, i32 2
+
+  ; If all of the uses are extracts, we skip creating a vector
+  ; CHECK-NOT: insertelement
+  call void @scalar_user(float %data0_0)
+  call void @scalar_user(float %data0_2)
+
+  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 
68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
+  %data4 = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
+
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 0
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 1
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 2
+  ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA4]], 3
+  ; CHECK: insertelement <4 x float> undef
+  ; CHECK: insertelement <4 x float>
+  ; CHECK: insertelement <4 x float>
+  ; CHECK: insertelement <4 x float>
+  call void @vector_user(<4 x float> %data4)
+
+  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 
@dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
+  %data12 = call <4 x float> @llvm.dx.typedBufferLoad(

bogner wrote:

This changed in the latest ;)

https://github.com/llvm/llvm-project/pull/104252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] wip: [DirectX] Return a struct from llvm.dx.typedBufferLoad (PR #106645)

2024-09-06 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner closed 
https://github.com/llvm/llvm-project/pull/106645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] wip: [DirectX] Return a struct from llvm.dx.typedBufferLoad (PR #106645)

2024-09-06 Thread Justin Bogner via llvm-branch-commits

bogner wrote:

This isn't the direction we ended up going.

https://github.com/llvm/llvm-project/pull/106645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a7a4eb8 - Revert "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form …"

2024-09-06 Thread via llvm-branch-commits

Author: dyung
Date: 2024-09-06T19:52:24-07:00
New Revision: a7a4eb83468683ae6115bf0c153f54cf46eec45e

URL: 
https://github.com/llvm/llvm-project/commit/a7a4eb83468683ae6115bf0c153f54cf46eec45e
DIFF: 
https://github.com/llvm/llvm-project/commit/a7a4eb83468683ae6115bf0c153f54cf46eec45e.diff

LOG: Revert "[LSR] Do not create duplicated PHI nodes while preserving LCSSA 
form …"

This reverts commit 2cb4d1b1bd7bde2724b79976e859684bd3f5c771.

Added: 


Modified: 
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll
llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll
llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll
llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp 
b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index f966ccaa838422..3ca3818938fd26 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2186,12 +2186,6 @@ class LSRInstance {
   /// Induction variables that were generated and inserted by the SCEV 
Expander.
   SmallVector ScalarEvolutionIVs;
 
-  // Inserting instructions in the loop and using them as PHI's input could
-  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
-  // corresponding incoming block is not loop exiting). So collect all such
-  // instructions to form LCSSA for them later.
-  SmallSetVector InsertedNonLCSSAInsts;
-
   void OptimizeShadowIV();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2282,9 +2276,9 @@ class LSRInstance {
 SmallVectorImpl &DeadInsts) const;
   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
  const Formula &F,
- SmallVectorImpl &DeadInsts);
+ SmallVectorImpl &DeadInsts) const;
   void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
-   SmallVectorImpl &DeadInsts);
+   SmallVectorImpl &DeadInsts) const;
   void ImplementSolution(const SmallVectorImpl &Solution);
 
 public:
@@ -5864,11 +5858,17 @@ Value *LSRInstance::Expand(const LSRUse &LU, const 
LSRFixup &LF,
 /// Helper for Rewrite. PHI nodes are special because the use of their operands
 /// effectively happens in their predecessor blocks, so the expression may need
 /// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
-const LSRFixup &LF, const Formula &F,
-SmallVectorImpl &DeadInsts) {
+void LSRInstance::RewriteForPHI(
+PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+SmallVectorImpl &DeadInsts) const {
   DenseMap Inserted;
 
+  // Inserting instructions in the loop and using them as PHI's input could
+  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
+  // corresponding incoming block is not loop exiting). So collect all such
+  // instructions to form LCSSA for them later.
+  SmallVector InsertedNonLCSSAInsts;
+
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
 if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
   bool needUpdateFixups = false;
@@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse 
&LU,
 // the inserted value.
 if (auto *I = dyn_cast(FullV))
   if (L->contains(I) && !L->contains(BB))
-InsertedNonLCSSAInsts.insert(I);
+InsertedNonLCSSAInsts.push_back(I);
 
 PN->setIncomingValue(i, FullV);
 Pair.first->second = FullV;
@@ -5983,6 +5983,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse 
&LU,
 }
   }
 }
+
+  formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
 }
 
 /// Emit instructions for the leading candidate expression for this LSRUse 
(this
@@ -5990,7 +5992,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse 
&LU,
 /// expanded value.
 void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
   const Formula &F,
-  SmallVectorImpl &DeadInsts) {
+  SmallVectorImpl &DeadInsts) const {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast(LF.UserInst)) {
@@ -6078,9 +6080,6 @@ void LSRInstance::ImplementSolution(
   Changed = true;
 }
 
-  auto InsertedInsts = InsertedNonLCSSAInsts.take