[llvm-branch-commits] [clang] 392b77d - [clang-format] Fix misannotations of `<` in ternary expressions (#100980)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Owen Pan
Date: 2024-07-30T10:40:15+02:00
New Revision: 392b77d58a91049a155f3390ec16941a848aa766

URL: 
https://github.com/llvm/llvm-project/commit/392b77d58a91049a155f3390ec16941a848aa766
DIFF: 
https://github.com/llvm/llvm-project/commit/392b77d58a91049a155f3390ec16941a848aa766.diff

LOG: [clang-format] Fix misannotations of `<` in ternary expressions (#100980)

Fixes #100300.

(cherry picked from commit 73c961a3345c697f40e2148318f34f5f347701c1)

Added: 


Modified: 
clang/lib/Format/TokenAnnotator.cpp
clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 




diff  --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index 5c11f3cb1a874..63c8699fd62d1 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -154,8 +154,8 @@ class AnnotatingParser {
 if (NonTemplateLess.count(CurrentToken->Previous) > 0)
   return false;
 
-const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
-if (Previous.Previous) {
+if (const auto &Previous = *CurrentToken->Previous; // The '<'.
+Previous.Previous) {
   if (Previous.Previous->Tok.isLiteral())
 return false;
   if (Previous.Previous->is(tok::r_brace))
@@ -175,11 +175,13 @@ class AnnotatingParser {
 FormatToken *Left = CurrentToken->Previous;
 Left->ParentBracket = Contexts.back().ContextKind;
 ScopedContextCreator ContextCreator(*this, tok::less, 12);
-
 Contexts.back().IsExpression = false;
+
+const auto *BeforeLess = Left->Previous;
+
 // If there's a template keyword before the opening angle bracket, this is 
a
 // template parameter, not an argument.
-if (Left->Previous && Left->Previous->isNot(tok::kw_template))
+if (BeforeLess && BeforeLess->isNot(tok::kw_template))
   Contexts.back().ContextType = Context::TemplateArgument;
 
 if (Style.Language == FormatStyle::LK_Java &&
@@ -187,19 +189,24 @@ class AnnotatingParser {
   next();
 }
 
-while (CurrentToken) {
+for (bool SeenTernaryOperator = false; CurrentToken;) {
+  const bool InExpr = Contexts[Contexts.size() - 2].IsExpression;
   if (CurrentToken->is(tok::greater)) {
+const auto *Next = CurrentToken->Next;
 // Try to do a better job at looking for ">>" within the condition of
 // a statement. Conservatively insert spaces between consecutive ">"
 // tokens to prevent splitting right bitshift operators and potentially
 // altering program semantics. This check is overly conservative and
 // will prevent spaces from being inserted in select nested template
 // parameter cases, but should not alter program semantics.
-if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
+if (Next && Next->is(tok::greater) &&
 Left->ParentBracket != tok::less &&
 CurrentToken->getStartOfNonWhitespace() ==
-CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
--1)) {
+Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
+  return false;
+}
+if (InExpr && SeenTernaryOperator &&
+(!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
   return false;
 }
 Left->MatchingParen = CurrentToken;
@@ -210,14 +217,14 @@ class AnnotatingParser {
 //   msg: < item: data >
 // In TT_TextProto, map does not occur.
 if (Style.Language == FormatStyle::LK_TextProto ||
-(Style.Language == FormatStyle::LK_Proto && Left->Previous &&
- Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
+(Style.Language == FormatStyle::LK_Proto && BeforeLess &&
+ BeforeLess->isOneOf(TT_SelectorName, TT_DictLiteral))) {
   CurrentToken->setType(TT_DictLiteral);
 } else {
   CurrentToken->setType(TT_TemplateCloser);
   CurrentToken->Tok.setLength(1);
 }
-if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
+if (Next && Next->Tok.isLiteral())
   return false;
 next();
 return true;
@@ -229,18 +236,21 @@ class AnnotatingParser {
   }
   if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace))
 return false;
+  const auto &Prev = *CurrentToken->Previous;
   // If a && or || is found and interpreted as a binary operator, this set
   // of angles is likely part of something like "a < b && c > d". If the
   // angles are inside an expression, the ||/&& might also be a binary
   // operator that was misinterpreted because we are parsing template
   // parameters.
   // FIXME: This is getting out of hand, write a decent parser.
-  if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
-  CurrentToken->Previo

[llvm-branch-commits] [llvm] 63d44ea - [NVPTX] Fix DwarfFrameBase construction (#101000)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Nikita Popov
Date: 2024-07-30T12:01:33+02:00
New Revision: 63d44ea32a28ed49e99572ca46b03eb92706433e

URL: 
https://github.com/llvm/llvm-project/commit/63d44ea32a28ed49e99572ca46b03eb92706433e
DIFF: 
https://github.com/llvm/llvm-project/commit/63d44ea32a28ed49e99572ca46b03eb92706433e.diff

LOG: [NVPTX] Fix DwarfFrameBase construction (#101000)

The `{0}` here was initializing the first union member `Register`,
rather than the union member used by CFA, which is `Offset`. Prior to
https://github.com/llvm/llvm-project/pull/99263 this was harmless, but
now they have different layout, leading to test failures on some
platforms (at least i686 and s390x).

(cherry picked from commit 842a332f11f53c698fa0560505e533ecdca28876)

Added: 


Modified: 
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp 
b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 10ae81e0460e3..9abe0e3186f20 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -93,5 +93,8 @@ MachineBasicBlock::iterator 
NVPTXFrameLowering::eliminateCallFramePseudoInstr(
 
 TargetFrameLowering::DwarfFrameBase
 NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
-  return {DwarfFrameBase::CFA, {0}};
+  DwarfFrameBase FrameBase;
+  FrameBase.Kind = DwarfFrameBase::CFA;
+  FrameBase.Location.Offset = 0;
+  return FrameBase;
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 146fc62 - [clang][ARM64EC] Add support for hybrid_patchable attribute. (#99478)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Jacek Caban
Date: 2024-07-30T12:03:34+02:00
New Revision: 146fc62f508ba12026f712d9576c80ea95fc6747

URL: 
https://github.com/llvm/llvm-project/commit/146fc62f508ba12026f712d9576c80ea95fc6747
DIFF: 
https://github.com/llvm/llvm-project/commit/146fc62f508ba12026f712d9576c80ea95fc6747.diff

LOG: [clang][ARM64EC] Add support for hybrid_patchable attribute. (#99478)

(cherry picked from commit ea98dc8b8f508b8393651992830e5e51d3876728)

Added: 
clang/test/CodeGen/arm64ec-hybrid-patchable.c

Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/CodeGen/CodeGenFunction.cpp
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/test/Misc/pragma-attribute-supported-attributes-list.test

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 71d615553c613..610061406a1ec 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -629,6 +629,9 @@ Attribute Changes in Clang
   The attributes declare constraints about a function's behavior pertaining to 
blocking and
   heap memory allocation.
 
+- The ``hybrid_patchable`` attribute is now supported on ARM64EC targets. It 
can be used to specify
+  that a function requires an additional x86-64 thunk, which may be patched at 
runtime.
+
 Improvements to Clang's diagnostics
 ---
 - Clang now emits an error instead of a warning for ``-Wundefined-internal``

diff  --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 4825979a974d2..46d0a66d59c37 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -477,6 +477,9 @@ def TargetELF : TargetSpec {
 def TargetELFOrMachO : TargetSpec {
   let ObjectFormats = ["ELF", "MachO"];
 }
+def TargetWindowsArm64EC : TargetSpec {
+  let CustomCode = [{ Target.getTriple().isWindowsArm64EC() }];
+}
 
 def TargetSupportsInitPriority : TargetSpec {
   let CustomCode = [{ !Target.getTriple().isOSzOS() }];
@@ -4027,6 +4030,12 @@ def SelectAny : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def HybridPatchable : InheritableAttr, 
TargetSpecificAttr {
+  let Spellings = [Declspec<"hybrid_patchable">, Clang<"hybrid_patchable">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [HybridPatchableDocs];
+}
+
 def Thread : Attr {
   let Spellings = [Declspec<"thread">];
   let LangOpts = [MicrosoftExt];

diff  --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 99738812c8157..b5d468eb5ec95 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5985,6 +5985,16 @@ For more information see
 or `msvc documentation `_.
 }]; }
 
+def HybridPatchableDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``hybrid_patchable`` attribute declares an ARM64EC function with an 
additional
+x86-64 thunk, which may be patched at runtime.
+
+For more information see
+`ARM64EC ABI documentation 
`_.
+}]; }
+
 def WebAssemblyExportNameDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eb0506e71fe3f..95ce4166ceb66 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3677,6 +3677,9 @@ def err_attribute_weak_static : Error<
   "weak declaration cannot have internal linkage">;
 def err_attribute_selectany_non_extern_data : Error<
   "'selectany' can only be applied to data items with external linkage">;
+def warn_attribute_hybrid_patchable_non_extern : Warning<
+  "'hybrid_patchable' is ignored on functions without external linkage">,
+  InGroup;
 def err_declspec_thread_on_thread_variable : Error<
   "'__declspec(thread)' applied to variable that already has a "
   "thread-local storage specifier">;

diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index d6078696a7d91..af201554898f3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -991,6 +991,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType 
RetTy,
   if (D && D->hasAttr())
 Fn->addFnAttr(llvm::Attribute::NoProfile);
 
+  if (D && D->hasAttr())
+Fn->addFnAttr(llvm::Attribute::HybridPatchable);
+
   if (D) {
 // Function attributes take precedence over command line flags.
 if (auto *A = D->getAttr()) {

diff  --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index bb25a0b3a45ae..f60cc78be4f92 100644
--- a/clang/lib/Sema/SemaDecl.cpp
++

[llvm-branch-commits] [compiler-rt] 67f509a - [sanitizer_common][test] Always skip select allocator tests on SPARC V9 (#100530)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Rainer Orth
Date: 2024-07-30T12:07:31+02:00
New Revision: 67f509a93be67aab643ab2ca333a2f8149f49be2

URL: 
https://github.com/llvm/llvm-project/commit/67f509a93be67aab643ab2ca333a2f8149f49be2
DIFF: 
https://github.com/llvm/llvm-project/commit/67f509a93be67aab643ab2ca333a2f8149f49be2.diff

LOG: [sanitizer_common][test] Always skip select allocator tests on SPARC V9 
(#100530)

Two allocator tests `FAIL` on Linux/sparc64:
```
  SanitizerCommon-Unit :: 
./Sanitizer-sparcv9-Test/SanitizerCommon/CombinedAllocator32Compact
  SanitizerCommon-Unit :: 
./Sanitizer-sparcv9-Test/SanitizerCommon/SizeClassAllocator32Iteration
```
The failure mode is the same on Solaris/sparcv9, where those tests are
already disabled since 0f69cbe2694a4740e6db5b99bd81a26746403072.
Therefore, this patch skips them on SPARC in general.

Tested on `sparc64-unknown-linux-gnu` and `sparcv9-sun-solaris2.11`.

(cherry picked from commit 3d149123f46cee5ac8d961c6bf77c5c566f1e410)

Added: 


Modified: 
compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp

Removed: 




diff  --git 
a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp 
b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
index 1a1ccce82d259..601897a64f051 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
@@ -28,12 +28,13 @@
 
 using namespace __sanitizer;
 
-#if SANITIZER_SOLARIS && defined(__sparcv9)
+#if defined(__sparcv9)
 // FIXME: These tests probably fail because Solaris/sparcv9 uses the full
-// 64-bit address space.  Needs more investigation
-#define SKIP_ON_SOLARIS_SPARCV9(x) DISABLED_##x
+// 64-bit address space.  Same on Linux/sparc64, so probably a general SPARC
+// issue.  Needs more investigation
+#  define SKIP_ON_SPARCV9(x) DISABLED_##x
 #else
-#define SKIP_ON_SOLARIS_SPARCV9(x) x
+#  define SKIP_ON_SPARCV9(x) x
 #endif
 
 // On 64-bit systems with small virtual address spaces (e.g. 39-bit) we can't
@@ -781,7 +782,7 @@ TEST(SanitizerCommon, CombinedAllocator64VeryCompact) {
 }
 #endif
 
-TEST(SanitizerCommon, SKIP_ON_SOLARIS_SPARCV9(CombinedAllocator32Compact)) {
+TEST(SanitizerCommon, SKIP_ON_SPARCV9(CombinedAllocator32Compact)) {
   TestCombinedAllocator();
 }
 
@@ -1028,7 +1029,7 @@ TEST(SanitizerCommon, 
SizeClassAllocator64DynamicPremappedIteration) {
 #endif
 #endif
 
-TEST(SanitizerCommon, SKIP_ON_SOLARIS_SPARCV9(SizeClassAllocator32Iteration)) {
+TEST(SanitizerCommon, SKIP_ON_SPARCV9(SizeClassAllocator32Iteration)) {
   TestSizeClassAllocatorIteration();
 }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] release/19.x: [libunwind][AIX] Fix the wrong traceback from signal handler (#101069) (PR #101182)

2024-07-30 Thread Saleem Abdulrasool via llvm-branch-commits

https://github.com/compnerd approved this pull request.

This is good to include on the release branch, it should be safe and fixes the 
behavior on AIX.

https://github.com/llvm/llvm-project/pull/101182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][spaceship] Marks P1614 as complete. (#99375) (PR #100770)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100770

>From 3389604cd95d4d12eb975f4057ed21828f5b53ce Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Thu, 25 Jul 2024 18:37:36 +0200
Subject: [PATCH] [libc++][spaceship] Marks P1614 as complete. (#99375)

Implements parts of:
- P1902R1 Missing feature-test macros 2017-2019

Completes:
- P1614R2 The Mothership has Landed

Fixes #100018
---
 libcxx/docs/FeatureTestMacroTable.rst  |  2 +-
 libcxx/docs/ReleaseNotes/19.rst|  1 +
 libcxx/docs/Status/Cxx20.rst   |  1 +
 libcxx/docs/Status/Cxx20Papers.csv |  2 +-
 libcxx/docs/Status/SpaceshipPapers.csv |  2 +-
 libcxx/include/version |  4 ++--
 .../compare.version.compile.pass.cpp   | 14 +++---
 .../version.version.compile.pass.cpp   | 14 +++---
 .../generate_feature_test_macro_components.py  |  3 +--
 9 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index 262da3f8937d2..a1506e115fe70 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -290,7 +290,7 @@ Status
 -- 
-
 ``__cpp_lib_syncbuf``  ``201803L``
 -- 
-
-``__cpp_lib_three_way_comparison`` ``201711L``
+``__cpp_lib_three_way_comparison`` ``201907L``
 -- 
-
 ``__cpp_lib_to_address``   ``201711L``
 -- 
-
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index c2c2bfbed4ac3..92896f6b0d11e 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -53,6 +53,7 @@ Implemented Papers
 --
 
 - P1132R8 - ``out_ptr`` - a scalable output pointer abstraction
+- P1614R2 - The Mothership has Landed
 - P2637R3 - Member ``visit``
 - P2652R2 - Disallow User Specialization of ``allocator_traits``
 - P2819R2 - Add ``tuple`` protocol to ``complex``
diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst
index c00d6fb237286..b76e30fbb3712 100644
--- a/libcxx/docs/Status/Cxx20.rst
+++ b/libcxx/docs/Status/Cxx20.rst
@@ -48,6 +48,7 @@ Paper Status
.. [#note-P0883.1] P0883: shared_ptr and floating-point changes weren't 
applied as they themselves aren't implemented yet.
.. [#note-P0883.2] P0883: ``ATOMIC_FLAG_INIT`` was marked deprecated in 
version 14.0, but was undeprecated with the implementation of LWG3659 in 
version 15.0.
.. [#note-P0660] P0660: The paper is implemented but the features are 
experimental and can be enabled via ``-fexperimental-library``.
+   .. [#note-P1614] P1614: ``std::strong_order(long double, long double)`` is 
partly implemented.
.. [#note-P0355] P0355: The implementation status is:
 
   * ``Calendars`` mostly done in Clang 7
diff --git a/libcxx/docs/Status/Cxx20Papers.csv 
b/libcxx/docs/Status/Cxx20Papers.csv
index 34fc5586f74d9..4015d7ad48b06 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -123,7 +123,7 @@
 "`P1522R1 `__","LWG","Iterator Difference Type and 
Integer Overflow","Cologne","|Complete|","15.0","|ranges|"
 "`P1523R1 `__","LWG","Views and Size 
Types","Cologne","|Complete|","15.0","|ranges|"
 "`P1612R1 `__","LWG","Relocate Endian's 
Specification","Cologne","|Complete|","10.0"
-"`P1614R2 `__","LWG","The Mothership has 
Landed","Cologne","|In Progress|",""
+"`P1614R2 `__","LWG","The Mothership has 
Landed","Cologne","|Complete| [#note-P1614]_","19.0"
 "`P1638R1 `__","LWG","basic_istream_view::iterator 
should not be copyable","Cologne","|Complete|","16.0","|ranges|"
 "`P1643R1 `__","LWG","Add wait/notify to 
atomic_ref","Cologne","|Complete|","19.0"
 "`P1644R0 `__","LWG","Add wait/notify to 
atomic","Cologne","",""
diff --git a/libcxx/docs/Status/SpaceshipPapers.csv 
b/libcxx/docs/Status/SpaceshipPapers.csv
index 39e1f968c1754..1ab64a9caf86a 100644
--- a/libcxx/docs/Status/SpaceshipPapers.csv
+++ b/libcxx/docs/Status/SpaceshipPapers.csv
@@ -1,5 +1,5 @@
 "Number","Name","Status","First released version"
-`P1614R2 `_,The Mothership has Landed,|In Progress|,
+`P1614R2 `_,The Mothership has 
Landed,|Complete|,19.0
 `P2404R3 `_,"Relaxing 
``equality_comparable_with``

[llvm-branch-commits] [libcxx] 3389604 - [libc++][spaceship] Marks P1614 as complete. (#99375)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Mark de Wever
Date: 2024-07-30T17:06:54+02:00
New Revision: 3389604cd95d4d12eb975f4057ed21828f5b53ce

URL: 
https://github.com/llvm/llvm-project/commit/3389604cd95d4d12eb975f4057ed21828f5b53ce
DIFF: 
https://github.com/llvm/llvm-project/commit/3389604cd95d4d12eb975f4057ed21828f5b53ce.diff

LOG: [libc++][spaceship] Marks P1614 as complete. (#99375)

Implements parts of:
- P1902R1 Missing feature-test macros 2017-2019

Completes:
- P1614R2 The Mothership has Landed

Fixes #100018

Added: 


Modified: 
libcxx/docs/FeatureTestMacroTable.rst
libcxx/docs/ReleaseNotes/19.rst
libcxx/docs/Status/Cxx20.rst
libcxx/docs/Status/Cxx20Papers.csv
libcxx/docs/Status/SpaceshipPapers.csv
libcxx/include/version

libcxx/test/std/language.support/support.limits/support.limits.general/compare.version.compile.pass.cpp

libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
libcxx/utils/generate_feature_test_macro_components.py

Removed: 




diff  --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index 262da3f8937d2..a1506e115fe70 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -290,7 +290,7 @@ Status
 -- 
-
 ``__cpp_lib_syncbuf``  ``201803L``
 -- 
-
-``__cpp_lib_three_way_comparison`` ``201711L``
+``__cpp_lib_three_way_comparison`` ``201907L``
 -- 
-
 ``__cpp_lib_to_address``   ``201711L``
 -- 
-

diff  --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index c2c2bfbed4ac3..92896f6b0d11e 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -53,6 +53,7 @@ Implemented Papers
 --
 
 - P1132R8 - ``out_ptr`` - a scalable output pointer abstraction
+- P1614R2 - The Mothership has Landed
 - P2637R3 - Member ``visit``
 - P2652R2 - Disallow User Specialization of ``allocator_traits``
 - P2819R2 - Add ``tuple`` protocol to ``complex``

diff  --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst
index c00d6fb237286..b76e30fbb3712 100644
--- a/libcxx/docs/Status/Cxx20.rst
+++ b/libcxx/docs/Status/Cxx20.rst
@@ -48,6 +48,7 @@ Paper Status
.. [#note-P0883.1] P0883: shared_ptr and floating-point changes weren't 
applied as they themselves aren't implemented yet.
.. [#note-P0883.2] P0883: ``ATOMIC_FLAG_INIT`` was marked deprecated in 
version 14.0, but was undeprecated with the implementation of LWG3659 in 
version 15.0.
.. [#note-P0660] P0660: The paper is implemented but the features are 
experimental and can be enabled via ``-fexperimental-library``.
+   .. [#note-P1614] P1614: ``std::strong_order(long double, long double)`` is 
partly implemented.
.. [#note-P0355] P0355: The implementation status is:
 
   * ``Calendars`` mostly done in Clang 7

diff  --git a/libcxx/docs/Status/Cxx20Papers.csv 
b/libcxx/docs/Status/Cxx20Papers.csv
index 34fc5586f74d9..4015d7ad48b06 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -123,7 +123,7 @@
 "`P1522R1 `__","LWG","Iterator Difference Type and 
Integer Overflow","Cologne","|Complete|","15.0","|ranges|"
 "`P1523R1 `__","LWG","Views and Size 
Types","Cologne","|Complete|","15.0","|ranges|"
 "`P1612R1 `__","LWG","Relocate Endian's 
Specification","Cologne","|Complete|","10.0"
-"`P1614R2 `__","LWG","The Mothership has 
Landed","Cologne","|In Progress|",""
+"`P1614R2 `__","LWG","The Mothership has 
Landed","Cologne","|Complete| [#note-P1614]_","19.0"
 "`P1638R1 `__","LWG","basic_istream_view::iterator 
should not be copyable","Cologne","|Complete|","16.0","|ranges|"
 "`P1643R1 `__","LWG","Add wait/notify to 
atomic_ref","Cologne","|Complete|","19.0"
 "`P1644R0 `__","LWG","Add wait/notify to 
atomic","Cologne","",""

diff  --git a/libcxx/docs/Status/SpaceshipPapers.csv 
b/libcxx/docs/Status/SpaceshipPapers.csv
index 39e1f968c1754..1ab64a9caf86a 100644
--- a/libcxx/docs/Status/SpaceshipPapers.csv
+++ b/libcxx/docs/Status/SpaceshipPapers.csv
@@ -1,5 +1,5 @@
 "Number","Name","Status","First released version"
-`P1614R2 `_,The Mothership has Landed,|In Progress|,
+`P1614R2 `_,The Mothership has 
Landed,|Complete|,

[llvm-branch-commits] [libcxx] [libc++][spaceship] Marks P1614 as complete. (#99375) (PR #100770)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/100770
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][spaceship] Marks P1614 as complete. (#99375) (PR #100770)

2024-07-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@mordante (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/100770
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839) (PR #101071)

2024-07-30 Thread Quentin Colombet via llvm-branch-commits

https://github.com/qcolombet approved this pull request.

Thanks for the back port.

LGTM

https://github.com/llvm/llvm-project/pull/101071
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839) (PR #101071)

2024-07-30 Thread Stefan Pintilie via llvm-branch-commits

https://github.com/stefanp-ibm updated 
https://github.com/llvm/llvm-project/pull/101071

>From 47c4d91cceb783c7b3e561c68150fac32a4a27c1 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie 
Date: Tue, 23 Jul 2024 21:59:27 -0400
Subject: [PATCH] [RegisterCoalescer] Fix SUBREG_TO_REG handling in the
 RegisterCoalescer. (#96839)

The issue with the handling of the SUBREG_TO_REG is that we don't join
the subranges correctly when we join live ranges across the
SUBREG_TO_REG. For example when joining across this:
```
32B   %2:gr64_nosp = SUBREG_TO_REG 0, %0:gr32, %subreg.sub_32bit
```
we want to join these live ranges:
```
%0 [16r,32r:0) 0@16r  weight:0.00e+00
%2 [32r,112r:0) 0@32r  weight:0.00e+00
```
Before the fix the range for the resulting merged `%2` is:
```
%2 [16r,112r:0) 0@16r  weight:0.00e+00
```
After the fix it is now this:
```
%2 [16r,112r:0) 0@16r  L000F [16r,112r:0) 0@16r  weight:0.00e+00
```

Two tests are added to this fix. The X86 test fails without the patch.
The PowerPC test passes with and without the patch but is added as a way
track future possible failures when register classes are changed in a
future patch.

(cherry picked from commit 26fa399012da00fbf806f50ad72a3b5f0ee63eab)
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp|  7 
 .../test/CodeGen/PowerPC/subreg-coalescer.mir | 34 +
 llvm/test/CodeGen/X86/subreg-fail.mir | 37 +++
 3 files changed, 78 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
 create mode 100644 llvm/test/CodeGen/X86/subreg-fail.mir

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp 
b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 1c35a88b4dc4a..043ea20191487 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+  } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
+LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
+   CP.getNewRC()->getLaneMask(), LHS);
+mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), 
CP,
+  CP.getDstIdx());
+LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir 
b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
new file mode 100644
index 0..39eab1f562e71
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s \
+# RUN:   -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on PowerPC. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name: check_subregs
+alignment:   16
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $x3
+
+; CHECK-LABEL: name: check_subregs
+; CHECK: liveins: $x3
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead 
[[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
+; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], 
implicit $rm
+; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
+; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
+; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
+%0:g8rc_and_g8rc_nox0 = COPY $x3
+%1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+%3:f4rc = FRSP killed %1, implicit $rm
+%4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
+%5:vrrc = XVCVDPSP killed %4, implicit $rm
+$v2 = COPY %5
+BLR8 implicit $lr8, implicit $rm, implicit $v2
+...
+
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir 
b/llvm/test/CodeGen/X86/subreg-fail.mir
new file mode 100644
index 0..c8146f099b814
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple x86_64-unknown-unknown %s \
+# RUN:   -verify-coalescing -enable-subreg-liveness \
+# RUN:   --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on X86. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name:test1
+alignment:   16
+tracksRegLiveness: true
+body: |
+  bb.0:
+; CHECK-LABEL: name: test1
+; 

[llvm-branch-commits] [clang] [clang][FMV][AArch64] Improve streaming mode compatibility (PR #101007)

2024-07-30 Thread Alexandros Lamprineas via llvm-branch-commits

labrinea wrote:

> We used to diagnose streaming functions that attempted to be multiversioned

but we did not diagnose a streaming caller of a versioned function, and since 
streaming compatible versions were not diagnosed either it was possible to mix 
calling conventions between versions:

- Streaming function calls streaming compatible version: 
https://godbolt.org/z/E6vnWd33x. Note that if bar._sve() was declared non 
streaming, then the call would be surrounded by smtop/smstart: 
https://godbolt.org/z/Wdf8oWoxh.
- In a separate translation unit, another version is defined (the default) with 
a different calling convention (non streaming). With llvm18 this compiles just 
fine: https://godbolt.org/z/GxzEGr6Yx
- The same as before in llvm trunk yields a semantic error: 
https://godbolt.org/z/fzbb5hTGb

https://github.com/llvm/llvm-project/pull/101007
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [CodeGen][ARM64EC] Use alias symbol for exporting hybrid_patchable functions. (#100872) (PR #101178)

2024-07-30 Thread Eli Friedman via llvm-branch-commits

efriedma-quic wrote:

LGTM

https://github.com/llvm/llvm-project/pull/101178
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-07-30 Thread Amir Ayupov via llvm-branch-commits


@@ -223,6 +223,22 @@ static cl::opt TopCalledLimit(
  "functions section"),
 cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
+// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
+static cl::opt ShowDensity("show-density", cl::init(false),

aaupov wrote:

Let's keep it disabled by default until we find good threshold value.

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits


@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl 
&Clones,
   for (DbgVariableRecord *DVR : DbgVariableRecords)
 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
false /*UseEntryValue*/);
-  return Shape;
-}
 
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
-  if (Shape.ABI != coro::ABI::Switch) {
-for (auto *End : Shape.CoroEnds) {
-  replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
-}
-  } else {
-for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
-  auto &Context = End->getContext();
-  End->replaceAllUsesWith(ConstantInt::getFalse(Context));
-  End->eraseFromParent();
-}
+  removeCoroEndsFromRampFunction(Shape);
+
+  if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) {

yuxuanchen1997 wrote:

> Yes, I would be in favor of adding a second attribute. What do you & others 
> think? CC @ChuanqiXu9

I can put up another PR for that. Wdyt?

https://github.com/llvm/llvm-project/pull/99283
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99283

>From f1b66d6376ad890119449b93e20e9cdc36c770a6 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] Implement noalloc in CoroSplit

---
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   4 +
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 121 ++
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  27 
 llvm/test/Transforms/Coroutines/ArgAddr.ll|   2 +-
 .../Transforms/Coroutines/coro-alloca-07.ll   |   2 +-
 .../coro-alloca-loop-carried-address.ll   |   2 +-
 .../Coroutines/coro-lifetime-end.ll   |   6 +-
 .../Coroutines/coro-spill-after-phi.ll|   2 +-
 .../Transforms/Coroutines/coro-split-00.ll|   7 +
 9 files changed, 140 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h 
b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5716fd0ea4ab9..d91cccd99a703 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M,
 const std::initializer_list);
 void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
 
+void suppressCoroAllocs(CoroIdInst *CoroId);
+void suppressCoroAllocs(LLVMContext &Context,
+ArrayRef CoroAllocs);
+
 /// Attempts to rewrite the location operand of debug intrinsics in terms of
 /// the coroutine frame pointer, folding pointer offsets into the DIExpression
 /// of the intrinsic.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9e4da5f8ca961..f78c03e9687c9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
@@ -1179,6 +1180,14 @@ static void 
updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
+static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  return DL.getTypeAllocSize(Shape.FrameTy);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
 updateAsyncFuncPointerContextSize(Shape);
@@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape 
&Shape) {
 
   // In the same function all coro.sizes should have the same result type.
   auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+  auto *SizeConstant =
+  ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape));
 
   for (CoroSizeInst *CS : Shape.CoroSizes) {
 CS->replaceAllUsesWith(SizeConstant);
@@ -1455,6 +1462,62 @@ struct SwitchCoroutineSplitter {
 setCoroInfo(F, Shape, Clones);
   }
 
+  static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
+SmallVectorImpl &Clones) {
+auto *OrigFnTy = F.getFunctionType();
+auto OldParams = OrigFnTy->params();
+
+SmallVector NewParams;
+NewParams.reserve(OldParams.size() + 1);
+NewParams.append(OldParams.begin(), OldParams.end());
+NewParams.push_back(PointerType::getUnqual(Shape.FrameTy));
+
+auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams,
+  OrigFnTy->isVarArg());
+Function *NoAllocF =
+Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc");
+ValueToValueMapTy VMap;
+unsigned int Idx = 0;
+for (const auto &I : F.args()) {
+  VMap[&I] = NoAllocF->getArg(Idx++);
+}
+SmallVector Returns;
+CloneFunctionInto(NoAllocF, &F, VMap,
+  CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+if (Shape.CoroBegin) {
+  auto *NewCoroBegin =
+  cast_if_present(VMap[Shape.CoroBegin]);
+  auto *NewCoroId = cast(NewCoroBegin->getId());
+  coro::replaceCoroFree(NewCoroId, /*Elide=*/true);
+  coro::suppressCoroAllocs(NewCoroId);
+  NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx));
+  NewCoroBegin->eraseFromParent();
+}
+
+Module *M = F.getParent();
+M->getFunctionList().insert(M->end(), NoAllocF);
+
+removeUnreachableBlocks(*NoAllocF);
+auto NewAttrs = NoAllocF->getAttributes();
+// We just appended the

[llvm-branch-commits] [llvm] [LLVM][Coroutines] Transform "coro_must_elide" calls to switch ABI coroutines to the `noalloc` variant (PR #99285)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99285

>From 3421b3f7ba207c864d8e71ad9adf5bebc91f3f2d Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] add CoroAnnotationElidePass

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D60250514
---
 .../Coroutines/CoroAnnotationElide.h  |  34 +
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |  10 +-
 llvm/lib/Passes/PassRegistry.def  |   1 +
 llvm/lib/Transforms/Coroutines/CMakeLists.txt |   1 +
 .../Coroutines/CoroAnnotationElide.cpp| 136 ++
 llvm/test/Other/new-pm-defaults.ll|   1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |   1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   1 +
 .../Coroutines/coro-transform-must-elide.ll   |  76 ++
 11 files changed, 261 insertions(+), 2 deletions(-)
 create mode 100644 
llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
 create mode 100644 llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
 create mode 100644 llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll

diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h 
b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
new file mode 100644
index 0..dae1cc0c689a3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
@@ -0,0 +1,34 @@
+//===- CoroAnnotationElide.h - Optimizing a coro_must_elide call 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// \file
+// This pass transforms all Call or Invoke instructions that are annotated
+// "coro_must_elide" to call the `.noalloc` variant of coroutine instead.
+//
+//===--===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct CoroAnnotationElidePass : PassInfoMixin {
+  CoroAnnotationElidePass() {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+  static bool isRequired() { return false; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5dbb1e2f49871..8349ea728520f 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -135,6 +135,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a6118726945e8..eb26ecdb9c4a7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
@@ -979,8 +980,10 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
   RequireAnalysisPass()));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+MainCGPipeline.addPass(CoroAnnotationElidePass());
+  }
 
   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
@@ -1022,9 +1025,12 @@ 
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
   buildFunctionSimplificationPipeline(Level, Phase),
   PTO.EagerlyInvalidateAnalyses));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MPM.addPass(createM

[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99283

>From 2c3902bbcc80bbd546b3a47c7d20a0fcf0c2c127 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] Implement noalloc in CoroSplit

---
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   4 +
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 121 ++
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  27 
 llvm/test/Transforms/Coroutines/ArgAddr.ll|   2 +-
 .../Transforms/Coroutines/coro-alloca-07.ll   |   2 +-
 .../coro-alloca-loop-carried-address.ll   |   2 +-
 .../Coroutines/coro-lifetime-end.ll   |   6 +-
 .../Coroutines/coro-spill-after-phi.ll|   2 +-
 .../Transforms/Coroutines/coro-split-00.ll|   7 +
 9 files changed, 140 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h 
b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5716fd0ea4ab9..d91cccd99a703 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M,
 const std::initializer_list);
 void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
 
+void suppressCoroAllocs(CoroIdInst *CoroId);
+void suppressCoroAllocs(LLVMContext &Context,
+ArrayRef CoroAllocs);
+
 /// Attempts to rewrite the location operand of debug intrinsics in terms of
 /// the coroutine frame pointer, folding pointer offsets into the DIExpression
 /// of the intrinsic.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9e4da5f8ca961..f78c03e9687c9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
@@ -1179,6 +1180,14 @@ static void 
updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
+static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  return DL.getTypeAllocSize(Shape.FrameTy);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
 updateAsyncFuncPointerContextSize(Shape);
@@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape 
&Shape) {
 
   // In the same function all coro.sizes should have the same result type.
   auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+  auto *SizeConstant =
+  ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape));
 
   for (CoroSizeInst *CS : Shape.CoroSizes) {
 CS->replaceAllUsesWith(SizeConstant);
@@ -1455,6 +1462,62 @@ struct SwitchCoroutineSplitter {
 setCoroInfo(F, Shape, Clones);
   }
 
+  static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
+SmallVectorImpl &Clones) {
+auto *OrigFnTy = F.getFunctionType();
+auto OldParams = OrigFnTy->params();
+
+SmallVector NewParams;
+NewParams.reserve(OldParams.size() + 1);
+NewParams.append(OldParams.begin(), OldParams.end());
+NewParams.push_back(PointerType::getUnqual(Shape.FrameTy));
+
+auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams,
+  OrigFnTy->isVarArg());
+Function *NoAllocF =
+Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc");
+ValueToValueMapTy VMap;
+unsigned int Idx = 0;
+for (const auto &I : F.args()) {
+  VMap[&I] = NoAllocF->getArg(Idx++);
+}
+SmallVector Returns;
+CloneFunctionInto(NoAllocF, &F, VMap,
+  CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+if (Shape.CoroBegin) {
+  auto *NewCoroBegin =
+  cast_if_present(VMap[Shape.CoroBegin]);
+  auto *NewCoroId = cast(NewCoroBegin->getId());
+  coro::replaceCoroFree(NewCoroId, /*Elide=*/true);
+  coro::suppressCoroAllocs(NewCoroId);
+  NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx));
+  NewCoroBegin->eraseFromParent();
+}
+
+Module *M = F.getParent();
+M->getFunctionList().insert(M->end(), NoAllocF);
+
+removeUnreachableBlocks(*NoAllocF);
+auto NewAttrs = NoAllocF->getAttributes();
+// We just appended the

[llvm-branch-commits] [llvm] [LLVM][Coroutines] Transform "coro_must_elide" calls to switch ABI coroutines to the `noalloc` variant (PR #99285)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99285

>From 828463c9951daff698db417d1f2e2b8fe24d6243 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] add CoroAnnotationElidePass

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D60250514
---
 .../Coroutines/CoroAnnotationElide.h  |  34 +
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |  10 +-
 llvm/lib/Passes/PassRegistry.def  |   1 +
 llvm/lib/Transforms/Coroutines/CMakeLists.txt |   1 +
 .../Coroutines/CoroAnnotationElide.cpp| 136 ++
 llvm/test/Other/new-pm-defaults.ll|   1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |   1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   1 +
 .../Coroutines/coro-transform-must-elide.ll   |  76 ++
 11 files changed, 261 insertions(+), 2 deletions(-)
 create mode 100644 
llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
 create mode 100644 llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
 create mode 100644 llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll

diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h 
b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
new file mode 100644
index 0..dae1cc0c689a3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
@@ -0,0 +1,34 @@
+//===- CoroAnnotationElide.h - Optimizing a coro_must_elide call 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// \file
+// This pass transforms all Call or Invoke instructions that are annotated
+// "coro_must_elide" to call the `.noalloc` variant of coroutine instead.
+//
+//===--===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct CoroAnnotationElidePass : PassInfoMixin {
+  CoroAnnotationElidePass() {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+  static bool isRequired() { return false; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5dbb1e2f49871..8349ea728520f 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -135,6 +135,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a6118726945e8..eb26ecdb9c4a7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
@@ -979,8 +980,10 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
   RequireAnalysisPass()));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+MainCGPipeline.addPass(CoroAnnotationElidePass());
+  }
 
   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
@@ -1022,9 +1025,12 @@ 
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
   buildFunctionSimplificationPipeline(Level, Phase),
   PTO.EagerlyInvalidateAnalyses));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MPM.addPass(createM

[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/101212

#100690 introduces allocator registry with the ability to store allocator index 
in the descriptor. This patch adds an attribute to fir.embox and 
fircg.ext_embox to be able to set the allocator index while populating the 
descriptor fields. 

>From 77727fdf40e1164d9975378bb6951bc49baaf04a Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 29 Jul 2024 14:50:05 -0700
Subject: [PATCH] [flang] Add allocator_idx attribute on fir.embox and
 fircg.ext_embox

---
 .../include/flang/Optimizer/CodeGen/CGOps.td  |  4 ++-
 .../include/flang/Optimizer/Dialect/FIROps.td | 10 +--
 .../flang/Runtime}/allocator-registry.h   |  6 +++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp   | 28 ++-
 flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp  |  4 +--
 flang/runtime/allocator-registry.cpp  |  2 +-
 flang/runtime/descriptor.cpp  |  2 +-
 flang/test/Fir/embox.fir  | 24 
 8 files changed, 64 insertions(+), 16 deletions(-)
 rename flang/{runtime => include/flang/Runtime}/allocator-registry.h (90%)

diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td 
b/flang/include/flang/Optimizer/CodeGen/CGOps.td
index f4740a263ffd2..34c5dc07284f0 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGOps.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td
@@ -48,6 +48,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
  describe an correspond to the elemental derived type.
+   - allocator_idx: specify special allocator to use.
 
 The memref and shape arguments are mandatory. The rest are optional.
   }];
@@ -60,7 +61,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
 Variadic:$subcomponent,
 Variadic:$substr,
 Variadic:$lenParams,
-Optional:$sourceBox
+Optional:$sourceBox,
+OptionalAttr:$allocator_idx
   );
   let results = (outs BoxOrClassType);
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bee8e8f603ce3..7856fa7d90184 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -783,6 +783,7 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 - slice: an array section can be described with a slice triple,
 - typeparams: for emboxing a derived type with LEN type parameters,
 - accessMap: unused/experimental.
+- allocator_idx: specify special allocator to use.
   }];
 
   let arguments = (ins
@@ -791,7 +792,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 Optional:$slice,
 Variadic:$typeparams,
 Optional:$sourceBox,
-OptionalAttr:$accessMap
+OptionalAttr:$accessMap,
+OptionalAttr:$allocator_idx
   );
 
   let results = (outs BoxOrClassType);
@@ -801,9 +803,11 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
   "mlir::Value":$memref, CArg<"mlir::Value", "{}">:$shape,
   CArg<"mlir::Value", "{}">:$slice,
   CArg<"mlir::ValueRange", "{}">:$typeparams,
-  CArg<"mlir::Value", "{}">:$sourceBox),
+  CArg<"mlir::Value", "{}">:$sourceBox,
+  CArg<"mlir::IntegerAttr", "{}">:$allocator_idx),
 [{ return build($_builder, $_state, resultTypes, memref, shape, slice,
-typeparams, sourceBox, mlir::AffineMapAttr{}); }]>
+typeparams, sourceBox, mlir::AffineMapAttr{},
+allocator_idx); }]>
   ];
 
   let assemblyFormat = [{
diff --git a/flang/runtime/allocator-registry.h 
b/flang/include/flang/Runtime/allocator-registry.h
similarity index 90%
rename from flang/runtime/allocator-registry.h
rename to flang/include/flang/Runtime/allocator-registry.h
index 3243e1deab630..c481bec8e8e51 100644
--- a/flang/runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+static constexpr unsigned kDefaultAllocator = 0;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
@@ -37,7 +39,9 @@ struct AllocatorRegistry {
   RT_API_ATTRS constexpr AllocatorRegistry()
   : allocators{{&MallocWrapper, &FreeWrapper}} {}
 #else
-  constexpr AllocatorRegistry() { allocators[0] = {&std::malloc, &std::free}; 
};
+  constexpr AllocatorRegistry() {
+allocators[kDefaultAllocator] = {&std::malloc, &std::free};
+  };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
   RT_API_ATTRS AllocFct GetAllocator(int pos);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp 
b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a98585c34c7d..412cc4f1a020c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/li

[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-codegen

Author: Valentin Clement (バレンタイン クレメン) (clementval)


Changes

#100690 introduces allocator registry with the ability to store 
allocator index in the descriptor. This patch adds an attribute to fir.embox 
and fircg.ext_embox to be able to set the allocator index while populating the 
descriptor fields. 

---
Full diff: https://github.com/llvm/llvm-project/pull/101212.diff


8 Files Affected:

- (modified) flang/include/flang/Optimizer/CodeGen/CGOps.td (+3-1) 
- (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+7-3) 
- (renamed) flang/include/flang/Runtime/allocator-registry.h (+5-1) 
- (modified) flang/lib/Optimizer/CodeGen/CodeGen.cpp (+21-7) 
- (modified) flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp (+2-2) 
- (modified) flang/runtime/allocator-registry.cpp (+1-1) 
- (modified) flang/runtime/descriptor.cpp (+1-1) 
- (modified) flang/test/Fir/embox.fir (+24) 


``diff
diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td 
b/flang/include/flang/Optimizer/CodeGen/CGOps.td
index f4740a263ffd2..34c5dc07284f0 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGOps.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td
@@ -48,6 +48,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
  describe an correspond to the elemental derived type.
+   - allocator_idx: specify special allocator to use.
 
 The memref and shape arguments are mandatory. The rest are optional.
   }];
@@ -60,7 +61,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
 Variadic:$subcomponent,
 Variadic:$substr,
 Variadic:$lenParams,
-Optional:$sourceBox
+Optional:$sourceBox,
+OptionalAttr:$allocator_idx
   );
   let results = (outs BoxOrClassType);
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bee8e8f603ce3..7856fa7d90184 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -783,6 +783,7 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 - slice: an array section can be described with a slice triple,
 - typeparams: for emboxing a derived type with LEN type parameters,
 - accessMap: unused/experimental.
+- allocator_idx: specify special allocator to use.
   }];
 
   let arguments = (ins
@@ -791,7 +792,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 Optional:$slice,
 Variadic:$typeparams,
 Optional:$sourceBox,
-OptionalAttr:$accessMap
+OptionalAttr:$accessMap,
+OptionalAttr:$allocator_idx
   );
 
   let results = (outs BoxOrClassType);
@@ -801,9 +803,11 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
   "mlir::Value":$memref, CArg<"mlir::Value", "{}">:$shape,
   CArg<"mlir::Value", "{}">:$slice,
   CArg<"mlir::ValueRange", "{}">:$typeparams,
-  CArg<"mlir::Value", "{}">:$sourceBox),
+  CArg<"mlir::Value", "{}">:$sourceBox,
+  CArg<"mlir::IntegerAttr", "{}">:$allocator_idx),
 [{ return build($_builder, $_state, resultTypes, memref, shape, slice,
-typeparams, sourceBox, mlir::AffineMapAttr{}); }]>
+typeparams, sourceBox, mlir::AffineMapAttr{},
+allocator_idx); }]>
   ];
 
   let assemblyFormat = [{
diff --git a/flang/runtime/allocator-registry.h 
b/flang/include/flang/Runtime/allocator-registry.h
similarity index 90%
rename from flang/runtime/allocator-registry.h
rename to flang/include/flang/Runtime/allocator-registry.h
index 3243e1deab630..c481bec8e8e51 100644
--- a/flang/runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+static constexpr unsigned kDefaultAllocator = 0;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
@@ -37,7 +39,9 @@ struct AllocatorRegistry {
   RT_API_ATTRS constexpr AllocatorRegistry()
   : allocators{{&MallocWrapper, &FreeWrapper}} {}
 #else
-  constexpr AllocatorRegistry() { allocators[0] = {&std::malloc, &std::free}; 
};
+  constexpr AllocatorRegistry() {
+allocators[kDefaultAllocator] = {&std::malloc, &std::free};
+  };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
   RT_API_ATTRS AllocFct GetAllocator(int pos);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp 
b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a98585c34c7d..412cc4f1a020c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/TypeCode.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Ru

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/101216

Add allocators for CUDA fortran allocation on the device. 3 allocators are 
added for pinned, device and managed/unified memory allocation. 
`CUFRegisterAllocator()` is called to register the allocators in the allocator 
registry added in #100690.


 Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to 
conditionally build these.  

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)


Changes

Add allocators for CUDA fortran allocation on the device. 3 allocators are 
added for pinned, device and managed/unified memory allocation. 
`CUFRegisterAllocator()` is called to register the allocators in the allocator 
registry added in #100690.


 Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to 
conditionally build these.  

---
Full diff: https://github.com/llvm/llvm-project/pull/101216.diff


8 Files Affected:

- (modified) flang/CMakeLists.txt (+7) 
- (added) flang/include/flang/Runtime/CUDA/allocator.h (+43) 
- (modified) flang/runtime/CMakeLists.txt (+3) 
- (added) flang/runtime/CUDA/CMakeLists.txt (+18) 
- (added) flang/runtime/CUDA/allocator.cpp (+62) 
- (modified) flang/unittests/Runtime/CMakeLists.txt (+2) 
- (added) flang/unittests/Runtime/CUDA/AllocatorCUF.cpp (+87) 
- (added) flang/unittests/Runtime/CUDA/CMakeLists.txt (+15) 


``diff
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../allocator-registry.h"
+#include "../derived.h"
+#include "../stat.h"
+#include "../ter

[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)

2024-07-30 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/100953

>From 9980c1fbe9da05695f30e15005119b000a19da3f Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 28 Jul 2024 15:28:09 -0400
Subject: [PATCH] [LLVM][PassBuilder] Extend the function signature of callback
 for optimizer pipeline extension point

These callbacks can be invoked in multiple places when building an optimization
pipeline, both in compile time and link time. However, there is no indicator on
what pipeline it is currently building.

In this patch, an extra argument is added to indicate its (Thin)LTO stage such
that the callback can check it if needed. There is no test expected from this,
and the benefit of this change will be demonstrated in 
https://github.com/llvm/llvm-project/pull/66488.
---
 clang/lib/CodeGen/BackendUtil.cpp | 19 +-
 llvm/include/llvm/Passes/PassBuilder.h| 20 +++
 llvm/lib/Passes/PassBuilderPipelines.cpp  | 36 +--
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  7 +++-
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 11 +++---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 15 
 llvm/tools/opt/NewPMDriver.cpp|  2 +-
 7 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index e765bbf637a66..64f0020a170aa 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -643,7 +643,7 @@ static void addKCFIPass(const Triple &TargetTriple, const 
LangOptions &LangOpts,
 
   // Ensure we lower KCFI operand bundles with -O0.
   PB.registerOptimizerLastEPCallback(
-  [&](ModulePassManager &MPM, OptimizationLevel Level) {
+  [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) 
{
 if (Level == OptimizationLevel::O0 &&
 LangOpts.Sanitize.has(SanitizerKind::KCFI))
   MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
@@ -662,8 +662,8 @@ static void addKCFIPass(const Triple &TargetTriple, const 
LangOptions &LangOpts,
 static void addSanitizers(const Triple &TargetTriple,
   const CodeGenOptions &CodeGenOpts,
   const LangOptions &LangOpts, PassBuilder &PB) {
-  auto SanitizersCallback = [&](ModulePassManager &MPM,
-OptimizationLevel Level) {
+  auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel 
Level,
+ThinOrFullLTOPhase) {
 if (CodeGenOpts.hasSanitizeCoverage()) {
   auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
   MPM.addPass(SanitizerCoveragePass(
@@ -749,7 +749,7 @@ static void addSanitizers(const Triple &TargetTriple,
 PB.registerOptimizerEarlyEPCallback(
 [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
   ModulePassManager NewMPM;
-  SanitizersCallback(NewMPM, Level);
+  SanitizersCallback(NewMPM, Level, ThinOrFullLTOPhase::None);
   if (!NewMPM.isEmpty()) {
 // Sanitizers can abandon.
 NewMPM.addPass(RequireAnalysisPass());
@@ -1018,11 +1018,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
 // TODO: Consider passing the MemoryProfileOutput to the pass builder via
 // the PGOOptions, and set this up there.
 if (!CodeGenOpts.MemoryProfileOutput.empty()) {
-  PB.registerOptimizerLastEPCallback(
-  [](ModulePassManager &MPM, OptimizationLevel Level) {
-MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
-MPM.addPass(ModuleMemProfilerPass());
-  });
+  PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM,
+OptimizationLevel Level,
+ThinOrFullLTOPhase) {
+MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+MPM.addPass(ModuleMemProfilerPass());
+  });
 }
 
 if (CodeGenOpts.FatLTO) {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h 
b/llvm/include/llvm/Passes/PassBuilder.h
index e1d78a8685aed..ad3901902f784 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -246,8 +246,9 @@ class PassBuilder {
   /// optimization and code generation without any link-time optimization. It
   /// typically correspond to frontend "-O[123]" options for optimization
   /// levels \c O1, \c O2 and \c O3 resp.
-  ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
-  bool LTOPreLink = false);
+  ModulePassManager buildPerModuleDefaultPipeline(
+  OptimizationLevel Level,
+  ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::None);
 
   /// Build a fat object default optimization pipeline.
   ///
@@ -297,8 +298,9 @@ class PassBuilder {
   /// Build an O0 pipeline with the minimal semantica

[llvm-branch-commits] [clang] [llvm] [WIP][Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)

2024-07-30 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian closed 
https://github.com/llvm/llvm-project/pull/100954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [WIP][Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)

2024-07-30 Thread Shilei Tian via llvm-branch-commits

shiltian wrote:

Moved most of the code to #100952 so this one is no longer needed. Will open a 
new PR if anything we need to do after the two patches are landed.

https://github.com/llvm/llvm-project/pull/100954
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits

https://github.com/vzakhari edited 
https://github.com/llvm/llvm-project/pull/101212
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits

https://github.com/vzakhari commented:

Thank you, Valentin.  It looks good to me with one minor comment.

https://github.com/llvm/llvm-project/pull/101212
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits


@@ -103,3 +103,27 @@ func.func @fir_dev_issue_1416(%arg0: 
!fir.ref>, %low: index
 fir.call @do_something(%3) : (!fir.box>) -> ()
 return
 }
+
+// CHECK-LABEL: define void @_QPtest_allocator1()
+func.func @_QPtest_allocator1() {
+  %c20 = arith.constant 20 : index
+  %0 = fir.alloca !fir.array<20xi32> {bindc_name = "x", uniq_name = 
"_QFtest_sliceEx"}
+  %1 = fir.shape %c20 : (index) -> !fir.shape<1>
+  %3 = fir.embox %0(%1) {allocator_idx = 1 : i32} : 
(!fir.ref>, !fir.shape<1>) -> !fir.box>
+  fir.call @_QPtest_callee(%3) : (!fir.box>) -> ()
+  return
+}
+
+// %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { 
ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 
20240719, i8 1, i8 9, i8 0, i8 2, [1 x [3 x i64]] [[3 x i64] [i64 1, i64 20, 
i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }

vzakhari wrote:

Missing `CHECK:`?

https://github.com/llvm/llvm-project/pull/101212
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

2024-07-30 Thread Amy Kwan via llvm-branch-commits

https://github.com/amy-kwan created 
https://github.com/llvm/llvm-project/pull/101224



This patch aims to reduce TOC usage by merging internal and private global data.

Moreover, we also add the GlobalMerge pass within the PPCTargetMachine 
pipeline, which is disabled by default. This transformation can be enabled by 
-ppc-global-merge.

>From 61a41df3ee9f90484f7a7f0fd576cd4646710d7c Mon Sep 17 00:00:00 2001
From: Amy Kwan 
Date: Tue, 30 Jul 2024 12:53:15 -0500
Subject: [PATCH] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal
 and private global data

This patch aims to reduce TOC usage by merging internal and private global data.

Moreover, we also add the GlobalMerge pass within the PPCTargetMachine pipeline,
which is disabled by default. This transformation can be enabled by 
-ppc-global-merge.
---
 llvm/include/llvm/CodeGen/GlobalMerge.h  |  4 +++
 llvm/include/llvm/CodeGen/Passes.h   |  4 ++-
 llvm/lib/CodeGen/GlobalMerge.cpp | 27 
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 13 ++
 llvm/test/CodeGen/PowerPC/merge-private.ll   | 20 +++
 5 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h 
b/llvm/include/llvm/CodeGen/GlobalMerge.h
index 13ad67d4544bc..ef767d548dc6e 100644
--- a/llvm/include/llvm/CodeGen/GlobalMerge.h
+++ b/llvm/include/llvm/CodeGen/GlobalMerge.h
@@ -28,6 +28,10 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
+  /// Whether we should merge global variables that have private linkage.
+  bool MergePrivateGlobals = false;
+  /// Whether we should merge constant global variables.
+  bool MergeConstantGlobals = false;
   /// Whether we should try to optimize for size only.
   /// Currently, this applies a dead simple heuristic: only consider globals
   /// used in minsize functions for merging.
diff --git a/llvm/include/llvm/CodeGen/Passes.h 
b/llvm/include/llvm/CodeGen/Passes.h
index cafb9781698a2..b401a8d9f10fd 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -476,7 +476,9 @@ namespace llvm {
   ///
   Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
   bool OnlyOptimizeForSize = false,
-  bool MergeExternalByDefault = false);
+  bool MergeExternalByDefault = false,
+  bool MergePrivateByDefault = false,
+  bool MergeConstantByDefault = false);
 
   /// This pass splits the stack into a safe stack and an unsafe stack to
   /// protect against stack-based overflow vulnerabilities.
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index e420c2bb7a25e..4056860fddd93 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -196,11 +196,14 @@ class GlobalMerge : public FunctionPass {
   }
 
   explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
-   bool OnlyOptimizeForSize, bool MergeExternalGlobals)
+   bool OnlyOptimizeForSize, bool MergeExternalGlobals,
+   bool MergePrivateGlobals, bool MergeConstantGlobals)
   : FunctionPass(ID), TM(TM) {
 Opt.MaxOffset = MaximalOffset;
 Opt.SizeOnly = OnlyOptimizeForSize;
 Opt.MergeExternal = MergeExternalGlobals;
+Opt.MergePrivateGlobals = MergePrivateGlobals;
+Opt.MergeConstantGlobals = MergeConstantGlobals;
 initializeGlobalMergePass(*PassRegistry::getPassRegistry());
   }
 
@@ -475,7 +478,8 @@ bool GlobalMergeImpl::doMerge(const 
SmallVectorImpl &Globals,
   auto &DL = M.getDataLayout();
 
   LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
-<< GlobalSet.find_first() << "\n");
+<< GlobalSet.find_first() << ", total of " << 
Globals.size()
+<< "\n");
 
   bool Changed = false;
   ssize_t i = GlobalSet.find_first();
@@ -551,6 +555,8 @@ bool GlobalMergeImpl::doMerge(const 
SmallVectorImpl &Globals,
 MergedGV->setAlignment(MaxAlign);
 MergedGV->setSection(Globals[i]->getSection());
 
+LLVM_DEBUG(dbgs() << "MergedGV:  " << *MergedGV << "\n");
+
 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
   GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
@@ -700,6 +706,11 @@ bool GlobalMergeImpl::run(Module &M) {
   else
 Globals[{AddressSpace, Section}].push_back(&GV);
 }
+LLVM_DEBUG(dbgs() << "GV "
+  << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
+  ? "to merge: "
+  : "not to merge: ")
+  << GV << "\n");
   }
 
   for (auto &P : Globals)

[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-powerpc

Author: Amy Kwan (amy-kwan)


Changes



This patch aims to reduce TOC usage by merging internal and private global data.

Moreover, we also add the GlobalMerge pass within the PPCTargetMachine 
pipeline, which is disabled by default. This transformation can be enabled by 
-ppc-global-merge.

---
Full diff: https://github.com/llvm/llvm-project/pull/101224.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalMerge.h (+4) 
- (modified) llvm/include/llvm/CodeGen/Passes.h (+3-1) 
- (modified) llvm/lib/CodeGen/GlobalMerge.cpp (+22-5) 
- (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+13) 
- (modified) llvm/test/CodeGen/PowerPC/merge-private.ll (+10-10) 


``diff
diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h 
b/llvm/include/llvm/CodeGen/GlobalMerge.h
index 13ad67d4544bc..ef767d548dc6e 100644
--- a/llvm/include/llvm/CodeGen/GlobalMerge.h
+++ b/llvm/include/llvm/CodeGen/GlobalMerge.h
@@ -28,6 +28,10 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
+  /// Whether we should merge global variables that have private linkage.
+  bool MergePrivateGlobals = false;
+  /// Whether we should merge constant global variables.
+  bool MergeConstantGlobals = false;
   /// Whether we should try to optimize for size only.
   /// Currently, this applies a dead simple heuristic: only consider globals
   /// used in minsize functions for merging.
diff --git a/llvm/include/llvm/CodeGen/Passes.h 
b/llvm/include/llvm/CodeGen/Passes.h
index cafb9781698a2..b401a8d9f10fd 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -476,7 +476,9 @@ namespace llvm {
   ///
   Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
   bool OnlyOptimizeForSize = false,
-  bool MergeExternalByDefault = false);
+  bool MergeExternalByDefault = false,
+  bool MergePrivateByDefault = false,
+  bool MergeConstantByDefault = false);
 
   /// This pass splits the stack into a safe stack and an unsafe stack to
   /// protect against stack-based overflow vulnerabilities.
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index e420c2bb7a25e..4056860fddd93 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -196,11 +196,14 @@ class GlobalMerge : public FunctionPass {
   }
 
   explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
-   bool OnlyOptimizeForSize, bool MergeExternalGlobals)
+   bool OnlyOptimizeForSize, bool MergeExternalGlobals,
+   bool MergePrivateGlobals, bool MergeConstantGlobals)
   : FunctionPass(ID), TM(TM) {
 Opt.MaxOffset = MaximalOffset;
 Opt.SizeOnly = OnlyOptimizeForSize;
 Opt.MergeExternal = MergeExternalGlobals;
+Opt.MergePrivateGlobals = MergePrivateGlobals;
+Opt.MergeConstantGlobals = MergeConstantGlobals;
 initializeGlobalMergePass(*PassRegistry::getPassRegistry());
   }
 
@@ -475,7 +478,8 @@ bool GlobalMergeImpl::doMerge(const 
SmallVectorImpl &Globals,
   auto &DL = M.getDataLayout();
 
   LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
-<< GlobalSet.find_first() << "\n");
+<< GlobalSet.find_first() << ", total of " << 
Globals.size()
+<< "\n");
 
   bool Changed = false;
   ssize_t i = GlobalSet.find_first();
@@ -551,6 +555,8 @@ bool GlobalMergeImpl::doMerge(const 
SmallVectorImpl &Globals,
 MergedGV->setAlignment(MaxAlign);
 MergedGV->setSection(Globals[i]->getSection());
 
+LLVM_DEBUG(dbgs() << "MergedGV:  " << *MergedGV << "\n");
+
 const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
 for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
   GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
@@ -700,6 +706,11 @@ bool GlobalMergeImpl::run(Module &M) {
   else
 Globals[{AddressSpace, Section}].push_back(&GV);
 }
+LLVM_DEBUG(dbgs() << "GV "
+  << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
+  ? "to merge: "
+  : "not to merge: ")
+  << GV << "\n");
   }
 
   for (auto &P : Globals)
@@ -710,7 +721,7 @@ bool GlobalMergeImpl::run(Module &M) {
 if (P.second.size() > 1)
   Changed |= doMerge(P.second, M, false, P.first.first);
 
-  if (EnableGlobalMergeOnConst)
+  if (Opt.MergeConstantGlobals)
 for (auto &P : ConstGlobals)
   if (P.second.size() > 1)
 Changed |= doMerge(P.second, M, true, P.first.first);
@@ -720,8 +731,14 @@ bool GlobalMergeImpl::run(Module &M) {
 
 Pass *llvm::createG

[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX (PR #101226)

2024-07-30 Thread Amy Kwan via llvm-branch-commits

https://github.com/amy-kwan created 
https://github.com/llvm/llvm-project/pull/101226

This patch turns on the GlobalMerge pass by default on AIX and updates LIT 
tests accordingly.

>From 2dd0302dbe47b752d0452bb221fbea3a5dcc8b14 Mon Sep 17 00:00:00 2001
From: Amy Kwan 
Date: Tue, 30 Jul 2024 12:55:34 -0500
Subject: [PATCH] [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX

This patch turns on the GlobalMerge pass by default on AIX and updates LIT
tests accordingly.
---
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp| 5 -
 llvm/test/CodeGen/PowerPC/merge-private.ll  | 6 ++
 llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll  | 4 ++--
 llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll | 2 +-
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp 
b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e4045ec304435..f975d1495543b 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -500,7 +500,10 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
-  if (EnableGlobalMerge)
+  if ((EnableGlobalMerge.getNumOccurrences() > 0)
+  ? EnableGlobalMerge
+  : (TM->getTargetTriple().isOSAIX() &&
+ getOptLevel() != CodeGenOptLevel::None))
 addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true,
   true));
 
diff --git a/llvm/test/CodeGen/PowerPC/merge-private.ll 
b/llvm/test/CodeGen/PowerPC/merge-private.ll
index 6ed2d6dfc542b..0ca706abb275f 100644
--- a/llvm/test/CodeGen/PowerPC/merge-private.ll
+++ b/llvm/test/CodeGen/PowerPC/merge-private.ll
@@ -11,6 +11,12 @@
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
 ; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN: --check-prefix=LINUX64BE
+; The below run line is added to ensure that the assembly corresponding to
+; the following check-prefix is generated by default on AIX (without any
+; options).
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=AIX64
 
 @.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1
 @str = internal constant [16 x i8] c"Internal global\00", align 1
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll 
b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
index 81147d10cde6e..833ed9fa65acf 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 
-enable-global-merge=false \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=AIX32,AIXDATA
-; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 
-enable-global-merge=false \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=AIX64,AIXDATA
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=LINUX64BE,LINUXDATA
diff --git a/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll 
b/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
index 5432b59d583ba..1a467ec72a75d 100644
--- a/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
+++ b/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
@@ -5,7 +5,7 @@
 ;; AIX assembly syntax.
 
 ; REQUIRES: powerpc-registered-target
-; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff < %s
+; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff 
-ppc-global-merge=false < %s
 ; RUN: llvm-symbolizer --obj=%t 'DATA 0x60' 'DATA 0x61' 'DATA 0x64' 'DATA 
0X68' \
 ; RUN:   'DATA 0x90' 'DATA 0x94' 'DATA 0X98' | \
 ; RUN:   FileCheck %s

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX (PR #101226)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-debuginfo

@llvm/pr-subscribers-backend-powerpc

Author: Amy Kwan (amy-kwan)


Changes

This patch turns on the GlobalMerge pass by default on AIX and updates LIT 
tests accordingly.

---
Full diff: https://github.com/llvm/llvm-project/pull/101226.diff


4 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+4-1) 
- (modified) llvm/test/CodeGen/PowerPC/merge-private.ll (+6) 
- (modified) llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll (+2-2) 
- (modified) llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll (+1-1) 


``diff
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp 
b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e4045ec304435..f975d1495543b 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -500,7 +500,10 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
-  if (EnableGlobalMerge)
+  if ((EnableGlobalMerge.getNumOccurrences() > 0)
+  ? EnableGlobalMerge
+  : (TM->getTargetTriple().isOSAIX() &&
+ getOptLevel() != CodeGenOptLevel::None))
 addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true,
   true));
 
diff --git a/llvm/test/CodeGen/PowerPC/merge-private.ll 
b/llvm/test/CodeGen/PowerPC/merge-private.ll
index 6ed2d6dfc542b..0ca706abb275f 100644
--- a/llvm/test/CodeGen/PowerPC/merge-private.ll
+++ b/llvm/test/CodeGen/PowerPC/merge-private.ll
@@ -11,6 +11,12 @@
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
 ; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN: --check-prefix=LINUX64BE
+; The below run line is added to ensure that the assembly corresponding to
+; the following check-prefix is generated by default on AIX (without any
+; options).
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN: --check-prefix=AIX64
 
 @.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1
 @str = internal constant [16 x i8] c"Internal global\00", align 1
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll 
b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
index 81147d10cde6e..833ed9fa65acf 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 
-enable-global-merge=false \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=AIX32,AIXDATA
-; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 
-enable-global-merge=false \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=AIX64,AIXDATA
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s 
--check-prefixes=LINUX64BE,LINUXDATA
diff --git a/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll 
b/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
index 5432b59d583ba..1a467ec72a75d 100644
--- a/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
+++ b/llvm/test/DebugInfo/Symbolize/XCOFF/xcoff-symbolize-data.ll
@@ -5,7 +5,7 @@
 ;; AIX assembly syntax.
 
 ; REQUIRES: powerpc-registered-target
-; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff < %s
+; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff 
-ppc-global-merge=false < %s
 ; RUN: llvm-symbolizer --obj=%t 'DATA 0x60' 'DATA 0x61' 'DATA 0x64' 'DATA 
0X68' \
 ; RUN:   'DATA 0x90' 'DATA 0x94' 'DATA 0X98' | \
 ; RUN:   FileCheck %s

``




https://github.com/llvm/llvm-project/pull/101226
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits


@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \

vzakhari wrote:

It might be best to use `FortranRuntime` `terminator` class to report and 
terminate in case of an error. Ideally, we would also provide the source code 
location from the compiler to the runtime.

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits

https://github.com/vzakhari edited 
https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits

https://github.com/vzakhari approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

2024-07-30 Thread Eli Friedman via llvm-branch-commits


@@ -28,6 +28,10 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
+  /// Whether we should merge global variables that have private linkage.
+  bool MergePrivateGlobals = false;

efriedma-quic wrote:

MergePrivateGlobals is unused?

https://github.com/llvm/llvm-project/pull/101224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX (PR #101226)

2024-07-30 Thread Kai Nacke via llvm-branch-commits


@@ -500,7 +500,10 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
-  if (EnableGlobalMerge)
+  if ((EnableGlobalMerge.getNumOccurrences() > 0)
+  ? EnableGlobalMerge
+  : (TM->getTargetTriple().isOSAIX() &&
+ getOptLevel() != CodeGenOptLevel::None))

redstar wrote:

I think the condition can be simplified:

```suggestion
  if (EnableGlobalMerge
  || (TM->getTargetTriple().isOSAIX() &&
 getOptLevel() != CodeGenOptLevel::None))
```
(formatting is most likely wrong)

https://github.com/llvm/llvm-project/pull/101226
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

2024-07-30 Thread Kai Nacke via llvm-branch-commits


@@ -720,8 +731,14 @@ bool GlobalMergeImpl::run(Module &M) {
 
 Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
   bool OnlyOptimizeForSize,
-  bool MergeExternalByDefault) {
+  bool MergeExternalByDefault,
+  bool MergePrivateByDefault,
+  bool MergeConstantByDefault) {
   bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
 MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
-  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
+  bool MergeConstant = EnableGlobalMergeOnConst.getNumOccurrences() > 0
+   ? EnableGlobalMergeOnConst
+   : MergeConstantByDefault;

redstar wrote:

```suggestion
  bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
```

https://github.com/llvm/llvm-project/pull/101224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99283

>From f8bb0f44f026540c7ae620e6b5bb7583e1078c67 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] Implement noalloc in CoroSplit

---
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   4 +
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 121 ++
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  27 
 llvm/test/Transforms/Coroutines/ArgAddr.ll|   2 +-
 .../Transforms/Coroutines/coro-alloca-07.ll   |   2 +-
 .../coro-alloca-loop-carried-address.ll   |   2 +-
 .../Coroutines/coro-lifetime-end.ll   |   6 +-
 .../Coroutines/coro-spill-after-phi.ll|   2 +-
 .../Transforms/Coroutines/coro-split-00.ll|   7 +
 9 files changed, 140 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h 
b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5716fd0ea4ab9..d91cccd99a703 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M,
 const std::initializer_list);
 void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
 
+void suppressCoroAllocs(CoroIdInst *CoroId);
+void suppressCoroAllocs(LLVMContext &Context,
+ArrayRef CoroAllocs);
+
 /// Attempts to rewrite the location operand of debug intrinsics in terms of
 /// the coroutine frame pointer, folding pointer offsets into the DIExpression
 /// of the intrinsic.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9e4da5f8ca961..f78c03e9687c9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
@@ -1179,6 +1180,14 @@ static void 
updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
+static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  return DL.getTypeAllocSize(Shape.FrameTy);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
 updateAsyncFuncPointerContextSize(Shape);
@@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape 
&Shape) {
 
   // In the same function all coro.sizes should have the same result type.
   auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+  auto *SizeConstant =
+  ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape));
 
   for (CoroSizeInst *CS : Shape.CoroSizes) {
 CS->replaceAllUsesWith(SizeConstant);
@@ -1455,6 +1462,62 @@ struct SwitchCoroutineSplitter {
 setCoroInfo(F, Shape, Clones);
   }
 
+  static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
+SmallVectorImpl &Clones) {
+auto *OrigFnTy = F.getFunctionType();
+auto OldParams = OrigFnTy->params();
+
+SmallVector NewParams;
+NewParams.reserve(OldParams.size() + 1);
+NewParams.append(OldParams.begin(), OldParams.end());
+NewParams.push_back(PointerType::getUnqual(Shape.FrameTy));
+
+auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams,
+  OrigFnTy->isVarArg());
+Function *NoAllocF =
+Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc");
+ValueToValueMapTy VMap;
+unsigned int Idx = 0;
+for (const auto &I : F.args()) {
+  VMap[&I] = NoAllocF->getArg(Idx++);
+}
+SmallVector Returns;
+CloneFunctionInto(NoAllocF, &F, VMap,
+  CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+if (Shape.CoroBegin) {
+  auto *NewCoroBegin =
+  cast_if_present(VMap[Shape.CoroBegin]);
+  auto *NewCoroId = cast(NewCoroBegin->getId());
+  coro::replaceCoroFree(NewCoroId, /*Elide=*/true);
+  coro::suppressCoroAllocs(NewCoroId);
+  NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx));
+  NewCoroBegin->eraseFromParent();
+}
+
+Module *M = F.getParent();
+M->getFunctionList().insert(M->end(), NoAllocF);
+
+removeUnreachableBlocks(*NoAllocF);
+auto NewAttrs = NoAllocF->getAttributes();
+// We just appended the

[llvm-branch-commits] [llvm] [LLVM][Coroutines] Transform "coro_must_elide" calls to switch ABI coroutines to the `noalloc` variant (PR #99285)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99285

>From d2fac21c4e9e5302fa2bee5365ab8be01dd024ea Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] add CoroAnnotationElidePass

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D60250514
---
 .../Coroutines/CoroAnnotationElide.h  |  34 +
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |  10 +-
 llvm/lib/Passes/PassRegistry.def  |   1 +
 llvm/lib/Transforms/Coroutines/CMakeLists.txt |   1 +
 .../Coroutines/CoroAnnotationElide.cpp| 136 ++
 llvm/test/Other/new-pm-defaults.ll|   1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |   1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   1 +
 .../Coroutines/coro-transform-must-elide.ll   |  76 ++
 11 files changed, 261 insertions(+), 2 deletions(-)
 create mode 100644 
llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
 create mode 100644 llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
 create mode 100644 llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll

diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h 
b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
new file mode 100644
index 0..dae1cc0c689a3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
@@ -0,0 +1,34 @@
+//===- CoroAnnotationElide.h - Optimizing a coro_must_elide call 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// \file
+// This pass transforms all Call or Invoke instructions that are annotated
+// "coro_must_elide" to call the `.noalloc` variant of coroutine instead.
+//
+//===--===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct CoroAnnotationElidePass : PassInfoMixin {
+  CoroAnnotationElidePass() {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+  static bool isRequired() { return false; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5dbb1e2f49871..8349ea728520f 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -135,6 +135,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a6118726945e8..eb26ecdb9c4a7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
@@ -979,8 +980,10 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
   RequireAnalysisPass()));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+MainCGPipeline.addPass(CoroAnnotationElidePass());
+  }
 
   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
@@ -1022,9 +1025,12 @@ 
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
   buildFunctionSimplificationPipeline(Level, Phase),
   PTO.EagerlyInvalidateAnalyses));
 
-  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
+  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
 MPM.addPass(createM

[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101212

>From 77727fdf40e1164d9975378bb6951bc49baaf04a Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 29 Jul 2024 14:50:05 -0700
Subject: [PATCH 1/2] [flang] Add allocator_idx attribute on fir.embox and
 fircg.ext_embox

---
 .../include/flang/Optimizer/CodeGen/CGOps.td  |  4 ++-
 .../include/flang/Optimizer/Dialect/FIROps.td | 10 +--
 .../flang/Runtime}/allocator-registry.h   |  6 +++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp   | 28 ++-
 flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp  |  4 +--
 flang/runtime/allocator-registry.cpp  |  2 +-
 flang/runtime/descriptor.cpp  |  2 +-
 flang/test/Fir/embox.fir  | 24 
 8 files changed, 64 insertions(+), 16 deletions(-)
 rename flang/{runtime => include/flang/Runtime}/allocator-registry.h (90%)

diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td 
b/flang/include/flang/Optimizer/CodeGen/CGOps.td
index f4740a263ffd2..34c5dc07284f0 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGOps.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td
@@ -48,6 +48,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
  describe an correspond to the elemental derived type.
+   - allocator_idx: specify special allocator to use.
 
 The memref and shape arguments are mandatory. The rest are optional.
   }];
@@ -60,7 +61,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", 
[AttrSizedOperandSegments]> {
 Variadic:$subcomponent,
 Variadic:$substr,
 Variadic:$lenParams,
-Optional:$sourceBox
+Optional:$sourceBox,
+OptionalAttr:$allocator_idx
   );
   let results = (outs BoxOrClassType);
 
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bee8e8f603ce3..7856fa7d90184 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -783,6 +783,7 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 - slice: an array section can be described with a slice triple,
 - typeparams: for emboxing a derived type with LEN type parameters,
 - accessMap: unused/experimental.
+- allocator_idx: specify special allocator to use.
   }];
 
   let arguments = (ins
@@ -791,7 +792,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
 Optional:$slice,
 Variadic:$typeparams,
 Optional:$sourceBox,
-OptionalAttr:$accessMap
+OptionalAttr:$accessMap,
+OptionalAttr:$allocator_idx
   );
 
   let results = (outs BoxOrClassType);
@@ -801,9 +803,11 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, 
AttrSizedOperandSegments]> {
   "mlir::Value":$memref, CArg<"mlir::Value", "{}">:$shape,
   CArg<"mlir::Value", "{}">:$slice,
   CArg<"mlir::ValueRange", "{}">:$typeparams,
-  CArg<"mlir::Value", "{}">:$sourceBox),
+  CArg<"mlir::Value", "{}">:$sourceBox,
+  CArg<"mlir::IntegerAttr", "{}">:$allocator_idx),
 [{ return build($_builder, $_state, resultTypes, memref, shape, slice,
-typeparams, sourceBox, mlir::AffineMapAttr{}); }]>
+typeparams, sourceBox, mlir::AffineMapAttr{},
+allocator_idx); }]>
   ];
 
   let assemblyFormat = [{
diff --git a/flang/runtime/allocator-registry.h 
b/flang/include/flang/Runtime/allocator-registry.h
similarity index 90%
rename from flang/runtime/allocator-registry.h
rename to flang/include/flang/Runtime/allocator-registry.h
index 3243e1deab630..c481bec8e8e51 100644
--- a/flang/runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+static constexpr unsigned kDefaultAllocator = 0;
+
 #define MAX_ALLOCATOR 5
 
 namespace Fortran::runtime {
@@ -37,7 +39,9 @@ struct AllocatorRegistry {
   RT_API_ATTRS constexpr AllocatorRegistry()
   : allocators{{&MallocWrapper, &FreeWrapper}} {}
 #else
-  constexpr AllocatorRegistry() { allocators[0] = {&std::malloc, &std::free}; 
};
+  constexpr AllocatorRegistry() {
+allocators[kDefaultAllocator] = {&std::malloc, &std::free};
+  };
 #endif
   RT_API_ATTRS void Register(int, Allocator_t);
   RT_API_ATTRS AllocFct GetAllocator(int pos);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp 
b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a98585c34c7d..412cc4f1a020c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/TypeCode.h"
 #include "flang/Optimizer/Support/Utils.h"
+#include "flang/Runtime/allocator-registry.h"

[llvm-branch-commits] [flang] [flang] Add allocator_idx attribute on fir.embox and fircg.ext_embox (PR #101212)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -103,3 +103,27 @@ func.func @fir_dev_issue_1416(%arg0: 
!fir.ref>, %low: index
 fir.call @do_something(%3) : (!fir.box>) -> ()
 return
 }
+
+// CHECK-LABEL: define void @_QPtest_allocator1()
+func.func @_QPtest_allocator1() {
+  %c20 = arith.constant 20 : index
+  %0 = fir.alloca !fir.array<20xi32> {bindc_name = "x", uniq_name = 
"_QFtest_sliceEx"}
+  %1 = fir.shape %c20 : (index) -> !fir.shape<1>
+  %3 = fir.embox %0(%1) {allocator_idx = 1 : i32} : 
(!fir.ref>, !fir.shape<1>) -> !fir.box>
+  fir.call @_QPtest_callee(%3) : (!fir.box>) -> ()
+  return
+}
+
+// %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { 
ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 
20240719, i8 1, i8 9, i8 0, i8 2, [1 x [3 x i64]] [[3 x i64] [i64 1, i64 20, 
i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }

clementval wrote:

Yeah! Thanks for catching this. 

https://github.com/llvm/llvm-project/pull/101212
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \

clementval wrote:

I will update that to use the terminator. 

We don't have the source code information where we call the allocator 
(`Descriptor::Allocate()`). I'll see to update that in a follow up patch if 
it's ok for you. 

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [sanitizer_common] Fix internal_*stat on Linux/sparc64 (PR #101236)

2024-07-30 Thread Rainer Orth via llvm-branch-commits

https://github.com/rorth milestoned 
https://github.com/llvm/llvm-project/pull/101236
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [sanitizer_common] Fix internal_*stat on Linux/sparc64 (PR #101236)

2024-07-30 Thread Rainer Orth via llvm-branch-commits

https://github.com/rorth created 
https://github.com/llvm/llvm-project/pull/101236

Backport of fcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535 and 
16e9bb9cd7f50ae2ec7f29a80bc3b95f528bfdbf to `release/19.x` branch.

>From 7d97041c217bcb4b04cacb3a5d17285f8b241a88 Mon Sep 17 00:00:00 2001
From: Rainer Orth 
Date: Tue, 30 Jul 2024 09:03:00 +0200
Subject: [PATCH 1/2] [sanitizer_common] Fix internal_*stat on Linux/sparc64
 (#101012)

```
  SanitizerCommon-Unit :: ./Sanitizer-sparcv9-Test/SanitizerCommon/FileOps
```
`FAIL`s on 64-bit Linux/sparc64:
```
projects/compiler-rt/lib/sanitizer_common/tests/./Sanitizer-sparcv9-Test 
--gtest_filter=SanitizerCommon.FileOps
--
compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp:144: Failure
Expected equality of these values:
  len1 + len2
Which is: 10
  fsize
Which is: 1721875535
```
The issue is similar to the mips64 case: the Linux/sparc64 `*stat`
syscalls take a `struct kernel_stat64 *` arg. Also the syscalls actually
used differ.

This patch handles this, adopting the mips64 code to avoid too much
duplication.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit fcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp  | 41 +++
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 76acf591871ab..09f58b7ced2e9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -33,11 +33,15 @@
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
-// sys/stat.h, we use this trick.
-#  if SANITIZER_MIPS64
+// sys/stat.h, we use this trick.  sparc64 is similar, using
+// syscall(__NR_stat64) and struct kernel_stat64.
+#  if SANITIZER_MIPS64 || SANITIZER_SPARC64
 #include 
 #include 
 #define stat kernel_stat
+#if SANITIZER_SPARC64
+#  define stat64 kernel_stat64
+#endif
 #if SANITIZER_GO
 #  undef st_atime
 #  undef st_mtime
@@ -48,6 +52,7 @@
 #endif
 #include 
 #undef stat
+#undef stat64
 #  endif
 
 #  include 
@@ -285,8 +290,7 @@ uptr internal_ftruncate(fd_t fd, uptr size) {
   return res;
 }
 
-#if (!SANITIZER_LINUX_USES_64BIT_SYSCALLS || SANITIZER_SPARC) && \
-SANITIZER_LINUX
+#if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -327,7 +331,12 @@ static void statx_to_stat(struct statx *in, struct stat 
*out) {
 }
 #endif
 
-#if SANITIZER_MIPS64
+#if SANITIZER_MIPS64 || SANITIZER_SPARC64
+#  if SANITIZER_MIPS64
+typedef struct kernel_stat kstat_t;
+#  else
+typedef struct kernel_stat64 kstat_t;
+#  endif
 // Undefine compatibility macros from 
 // so that they would not clash with the kernel_stat
 // st_[a|m|c]time fields
@@ -345,7 +354,7 @@ static void statx_to_stat(struct statx *in, struct stat 
*out) {
 #undef st_mtime_nsec
 #undef st_ctime_nsec
 #  endif
-static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+static void kernel_stat_to_stat(kstat_t *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
@@ -391,6 +400,12 @@ uptr internal_stat(const char *path, void *buf) {
   !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
   0);
+#  elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+ (uptr)&buf64, 0);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #  else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -423,6 +438,12 @@ uptr internal_lstat(const char *path, void *buf) {
   !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
   AT_SYMLINK_NOFOLLOW);
+#  elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+ (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #  else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -442,10 +463,16 @@ uptr internal_fstat(fd_t fd, void *buf) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
 #  if SANITIZER_MIPS64
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
-  st

[llvm-branch-commits] [compiler-rt] [sanitizer_common] Fix internal_*stat on Linux/sparc64 (PR #101236)

2024-07-30 Thread Rainer Orth via llvm-branch-commits

https://github.com/rorth edited https://github.com/llvm/llvm-project/pull/101236
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [sanitizer_common] Fix internal_*stat on Linux/sparc64 (PR #101236)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Rainer Orth (rorth)


Changes

Backport of fcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535 and 
16e9bb9cd7f50ae2ec7f29a80bc3b95f528bfdbf to `release/19.x` branch.

---
Full diff: https://github.com/llvm/llvm-project/pull/101236.diff


1 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+34-7) 


``diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 76acf591871ab..d1c50cbda7d7e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -33,11 +33,15 @@
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
-// sys/stat.h, we use this trick.
-#  if SANITIZER_MIPS64
+// sys/stat.h, we use this trick.  sparc64 is similar, using
+// syscall(__NR_stat64) and struct kernel_stat64.
+#  if SANITIZER_LINUX && (SANITIZER_MIPS64 || SANITIZER_SPARC64)
 #include 
 #include 
 #define stat kernel_stat
+#if SANITIZER_SPARC64
+#  define stat64 kernel_stat64
+#endif
 #if SANITIZER_GO
 #  undef st_atime
 #  undef st_mtime
@@ -48,6 +52,7 @@
 #endif
 #include 
 #undef stat
+#undef stat64
 #  endif
 
 #  include 
@@ -285,8 +290,7 @@ uptr internal_ftruncate(fd_t fd, uptr size) {
   return res;
 }
 
-#if (!SANITIZER_LINUX_USES_64BIT_SYSCALLS || SANITIZER_SPARC) && \
-SANITIZER_LINUX
+#if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -327,7 +331,12 @@ static void statx_to_stat(struct statx *in, struct stat 
*out) {
 }
 #endif
 
-#if SANITIZER_MIPS64
+#if SANITIZER_MIPS64 || SANITIZER_SPARC64
+#  if SANITIZER_MIPS64
+typedef struct kernel_stat kstat_t;
+#  else
+typedef struct kernel_stat64 kstat_t;
+#  endif
 // Undefine compatibility macros from 
 // so that they would not clash with the kernel_stat
 // st_[a|m|c]time fields
@@ -345,7 +354,7 @@ static void statx_to_stat(struct statx *in, struct stat 
*out) {
 #undef st_mtime_nsec
 #undef st_ctime_nsec
 #  endif
-static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+static void kernel_stat_to_stat(kstat_t *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
@@ -391,6 +400,12 @@ uptr internal_stat(const char *path, void *buf) {
   !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
   0);
+#  elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+ (uptr)&buf64, 0);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #  else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -423,6 +438,12 @@ uptr internal_lstat(const char *path, void *buf) {
   !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
   AT_SYMLINK_NOFOLLOW);
+#  elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+ (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #  else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -442,10 +463,16 @@ uptr internal_fstat(fd_t fd, void *buf) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
 #  if SANITIZER_MIPS64
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
-  struct kernel_stat kbuf;
+  kstat_t kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
+#  elif SANITIZER_LINUX && SANITIZER_SPARC64
+  // For sparc64, fstat64 syscall fills buffer in the format of kernel_stat64
+  kstat_t kbuf;
+  int res = internal_syscall(SYSCALL(fstat64), fd, &kbuf);
+  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+  return res;
 #  elif SANITIZER_LINUX && defined(__loongarch__)
   struct statx bufx;
   int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH,

``




https://github.com/llvm/llvm-project/pull/101236
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/2] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../a

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/3] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../a

[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Slava Zakharin via llvm-branch-commits


@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}

vzakhari wrote:

Thank you for switching to using `terminator`.  I think you have to add a 
linking dependency on `FortranRuntime` now.

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits


@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}

clementval wrote:

Yeah that would make sense!

https://github.com/llvm/llvm-project/pull/101216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)

2024-07-30 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/101216

>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/4] [flang][cuda] Add CUF allocator

---
 flang/CMakeLists.txt  |  7 ++
 flang/include/flang/Runtime/CUDA/allocator.h  | 43 +
 flang/runtime/CMakeLists.txt  |  3 +
 flang/runtime/CUDA/CMakeLists.txt | 18 
 flang/runtime/CUDA/allocator.cpp  | 62 +
 flang/unittests/Runtime/CMakeLists.txt|  2 +
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++
 flang/unittests/Runtime/CUDA/CMakeLists.txt   | 15 
 8 files changed, 237 insertions(+)
 create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
 create mode 100644 flang/runtime/CUDA/CMakeLists.txt
 create mode 100644 flang/runtime/CUDA/allocator.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
 create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h 
b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+if (!result) \
+  return; \
+const char *name = nullptr; \
+cuGetErrorName(result, &name); \
+if (!name) \
+  name = ""; \
+fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt 
b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt 
-===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#======#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS 
${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../a

[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/101249

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/101250

This adjusts the DXILOpBuilder API in a couple of ways:
1. Remove the need to call `getOverloadTy` before creating Ops
2. Introduce `tryCreateOp` to parallel `createOp` but propagate errors
3. Introduce specialized createOp methods for each DXIL Op

This will simplify usage of the builder in upcoming changes, and also allows us
to propagate errors via DiagnosticInfo rather than using fatal errors.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: Justin Bogner (bogner)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/101249.diff


2 Files Affected:

- (modified) llvm/lib/Target/DirectX/DXILConstants.h (+8-1) 
- (modified) llvm/utils/TableGen/DXILEmitter.cpp (+21-28) 


``diff
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h 
b/llvm/lib/Target/DirectX/DXILConstants.h
index 78a641df8e6ec..0c9c1ac38fdbc 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -15,8 +15,15 @@
 namespace llvm {
 namespace dxil {
 
-#define DXIL_OP_ENUM
+enum class OpCode : unsigned {
+#define DXIL_OPCODE(Op, Name) Name = Op,
 #include "DXILOperation.inc"
+};
+
+enum class OpCodeClass : unsigned {
+#define DXIL_OPCLASS(Name) Name,
+#include "DXILOperation.inc"
+};
 
 } // namespace dxil
 } // namespace llvm
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp 
b/llvm/utils/TableGen/DXILEmitter.cpp
index 74c4fd50f37f0..2361fd286c976 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -429,34 +429,26 @@ static std::string getAttributeMaskString(const 
SmallVector Recs) {
   return MaskString;
 }
 
-/// Emit Enums of DXIL Ops
-/// \param A vector of DXIL Ops
-/// \param Output stream
-static void emitDXILEnums(std::vector &Ops,
-  raw_ostream &OS) {
-  OS << "#ifdef DXIL_OP_ENUM\n\n";
-  OS << "// Enumeration for operations specified by DXIL\n";
-  OS << "enum class OpCode : unsigned {\n";
-
-  for (auto &Op : Ops) {
-// Name = ID, // Doc
-OS << Op.OpName << " = " << Op.OpCode << ", // " << Op.Doc << "\n";
-  }
-
-  OS << "\n};\n\n";
+/// Emit a mapping of DXIL opcode to opname
+static void emitDXILOpCodes(std::vector &Ops,
+raw_ostream &OS) {
+  OS << "#ifdef DXIL_OPCODE\n";
+  for (const DXILOperationDesc &Op : Ops)
+OS << "DXIL_OPCODE(" << Op.OpCode << ", " << Op.OpName << ")\n";
+  OS << "#undef DXIL_OPCODE\n";
+  OS << "\n";
+  OS << "#endif\n\n";
+}
 
-  OS << "// Groups for DXIL operations with equivalent function templates\n";
-  OS << "enum class OpCodeClass : unsigned {\n";
-  // Build an OpClass set to print
-  SmallSet OpClassSet;
-  for (auto &Op : Ops) {
-OpClassSet.insert(Op.OpClass);
-  }
-  for (auto &C : OpClassSet) {
-OS << C << ",\n";
-  }
-  OS << "\n};\n\n";
-  OS << "#undef DXIL_OP_ENUM\n";
+/// Emit a list of DXIL op classes
+static void emitDXILOpClasses(RecordKeeper &Records,
+  raw_ostream &OS) {
+  OS << "#ifdef DXIL_OPCLASS\n";
+  std::vector OpClasses =
+  Records.getAllDerivedDefinitions("DXILOpClass");
+  for (Record *OpClass : OpClasses)
+OS << "DXIL_OPCLASS(" << OpClass->getName() << ")\n";
+  OS << "#undef DXIL_OPCLASS\n";
   OS << "#endif\n\n";
 }
 
@@ -646,7 +638,8 @@ static void EmitDXILOperation(RecordKeeper &Records, 
raw_ostream &OS) {
 return A.OpCode < B.OpCode;
   });
 
-  emitDXILEnums(DXILOps, OS);
+  emitDXILOpCodes(DXILOps, OS);
+  emitDXILOpClasses(Records, OS);
   emitDXILIntrinsicMap(DXILOps, OS);
   OS << "#ifdef DXIL_OP_OPERATION_TABLE\n\n";
   emitDXILOperationTableDataStructs(Records, OS);

``




https://github.com/llvm/llvm-project/pull/101249
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: Justin Bogner (bogner)


Changes

This adjusts the DXILOpBuilder API in a couple of ways:
1. Remove the need to call `getOverloadTy` before creating Ops
2. Introduce `tryCreateOp` to parallel `createOp` but propagate errors
3. Introduce specialized createOp methods for each DXIL Op

This will simplify usage of the builder in upcoming changes, and also allows us
to propagate errors via DiagnosticInfo rather than using fatal errors.


---

Patch is 43.67 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/101250.diff


31 Files Affected:

- (modified) llvm/lib/Target/DirectX/DXIL.td (+29-29) 
- (modified) llvm/lib/Target/DirectX/DXILOpBuilder.cpp (+62-82) 
- (modified) llvm/lib/Target/DirectX/DXILOpBuilder.h (+27-14) 
- (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+12-7) 
- (modified) llvm/test/CodeGen/DirectX/acos_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/asin_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/atan_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/ceil_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/cos_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/cosh_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/dot2_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/dot3_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/dot4_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/exp2_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/flattened_thread_id_in_group_error.ll 
(+2-1) 
- (modified) llvm/test/CodeGen/DirectX/floor_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/frac_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/group_id_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/isinf_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/log2_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/round_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/rsqrt_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/sin_error.ll (+2-2) 
- (modified) llvm/test/CodeGen/DirectX/sinh_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/sqrt_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/tan_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/tanh_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/thread_id_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/thread_id_in_group_error.ll (+2-1) 
- (modified) llvm/test/CodeGen/DirectX/trunc_error.ll (+2-1) 
- (modified) llvm/utils/TableGen/DXILEmitter.cpp (+7-14) 


``diff
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index a66f5b6470934..67015cff78a79 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -318,7 +318,7 @@ class DXILOp {
 def Abs :  DXILOp<6, unary> {
   let Doc = "Returns the absolute value of the input.";
   let LLVMIntrinsic = int_fabs;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -338,7 +338,7 @@ def IsInf :  DXILOp<9, isSpecialFloat> {
 def Cos :  DXILOp<12, unary> {
   let Doc = "Returns cosine(theta) for theta in radians.";
   let LLVMIntrinsic = int_cos;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -348,7 +348,7 @@ def Cos :  DXILOp<12, unary> {
 def Sin :  DXILOp<13, unary> {
   let Doc = "Returns sine(theta) for theta in radians.";
   let LLVMIntrinsic = int_sin;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -358,7 +358,7 @@ def Sin :  DXILOp<13, unary> {
 def Tan :  DXILOp<14, unary> {
   let Doc = "Returns tangent(theta) for theta in radians.";
   let LLVMIntrinsic = int_tan;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -368,7 +368,7 @@ def Tan :  DXILOp<14, unary> {
 def ACos :  DXILOp<15, unary> {
   let Doc = "Returns the arccosine of the specified value.";
   let LLVMIntrinsic = int_acos;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -378,7 +378,7 @@ def ACos :  DXILOp<15, unary> {
 def ASin :  DXILOp<16, unary> {
   let Doc = "Returns the arcsine of the specified value.";
   let LLVMIntrinsic = int_asin;
-  let arguments = [LLVMMatchType<0>];
+  let arguments = [overloadTy];
   let result = overloadTy;
   let overloads = [Overloads];
   let stages = [Stages];
@@ -388,7 +388,7 @@ def ASin :  DXILOp<16, unary> {
 def ATan :  DXILOp<17, unary> {
   let Doc = "Returns the arctangent of the specified value.";
   let LLVMIntrinsic = int_atan;
-  let arguments =

[llvm-branch-commits] [clang] ReleaseNotes.rst: Fix typo "my" for "may" (PR #101251)

2024-07-30 Thread Hubert Tong via llvm-branch-commits

https://github.com/hubert-reinterpretcast created 
https://github.com/llvm/llvm-project/pull/101251

Replace typo for "may" with "can".

>From 20320854d647630b1022b6bc4a1671e85c93a287 Mon Sep 17 00:00:00 2001
From: Hubert Tong 
Date: Tue, 30 Jul 2024 17:56:55 -0400
Subject: [PATCH] ReleaseNotes.rst: Fix typo "my" for "may"

Replace typo for "may" with "can".
---
 clang/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 610061406a1ec..b4ef1e9672a5d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -147,7 +147,7 @@ Clang Frontend Potentially Breaking Changes
   that ``none`` means that there is no operating system. As opposed to an 
unknown
   type of operating system.
 
-  This change my cause clang to not find libraries, or libraries to be built at
+  This change can cause clang to not find libraries, or libraries to be built 
at
   different file system locations. This can be fixed by changing your builds to
   use the new normalized triple. However, we recommend instead getting the
   normalized triple from clang itself, as this will make your builds more

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] ReleaseNotes.rst: Fix typo "my" for "may" (PR #101251)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Hubert Tong (hubert-reinterpretcast)


Changes

Replace typo for "may" with "can".

---
Full diff: https://github.com/llvm/llvm-project/pull/101251.diff


1 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+1-1) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 610061406a1ec..b4ef1e9672a5d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -147,7 +147,7 @@ Clang Frontend Potentially Breaking Changes
   that ``none`` means that there is no operating system. As opposed to an 
unknown
   type of operating system.
 
-  This change my cause clang to not find libraries, or libraries to be built at
+  This change can cause clang to not find libraries, or libraries to be built 
at
   different file system locations. This can be fixed by changing your builds to
   use the new normalized triple. However, we recommend instead getting the
   normalized triple from clang itself, as this will make your builds more

``




https://github.com/llvm/llvm-project/pull/101251
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Xiang Li via llvm-branch-commits


@@ -151,7 +151,11 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
 assert(knownType && "Specification of multiple differing overload "
 "parameter types not yet supported");
   } else {
-OverloadParamIndices.push_back(i);
+// Skip the return value - nothing is overloaded on only return, and it

python3kgae wrote:

Things like LoadInput is overloaded on only return.

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[MC] Compute fragment offsets eagerly" (PR #101254)

2024-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/101254

Backport 4eb5450f630849ee0518487de38d857fbe5b1aee

Requested by: @MaskRay

>From e535b345fab504a6f3c94d54d4af98eda8dfb1e8 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Tue, 30 Jul 2024 14:52:29 -0700
Subject: [PATCH] Revert "[MC] Compute fragment offsets eagerly"

This reverts commit 1a47f3f3db66589c11f8ddacfeaecc03fb80c510.

Fix #100283

This commit is actually a trigger of other preexisting problems:

* Size change of fill fragments does not influence the fixed-point iteration.
* The `invalid number of bytes` error is reported too early. Since
  `.zero A-B` might have temporary negative values in the first few
  iterations.

However, the problems appeared at least "benign" (did not affect the
Linux kernel builds) before this commit.

(cherry picked from commit 4eb5450f630849ee0518487de38d857fbe5b1aee)
---
 llvm/include/llvm/MC/MCAsmBackend.h   |  5 +-
 llvm/include/llvm/MC/MCAssembler.h|  4 +-
 llvm/include/llvm/MC/MCSection.h  |  5 ++
 llvm/lib/MC/MCAssembler.cpp   | 77 +--
 llvm/lib/MC/MCSection.cpp |  4 +-
 .../MCTargetDesc/HexagonAsmBackend.cpp|  4 +-
 .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 26 +--
 7 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/MC/MCAsmBackend.h 
b/llvm/include/llvm/MC/MCAsmBackend.h
index d1d1814dd8b52..3f88ac02cd92a 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -217,9 +217,8 @@ class MCAsmBackend {
   virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
 const MCSubtargetInfo *STI) const = 0;
 
-  // Return true if fragment offsets have been adjusted and an extra layout
-  // iteration is needed.
-  virtual bool finishLayout(const MCAssembler &Asm) const { return false; }
+  /// Give backend an opportunity to finish layout after relaxation
+  virtual void finishLayout(MCAssembler const &Asm) const {}
 
   /// Handle any target-specific assembler flags. By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
diff --git a/llvm/include/llvm/MC/MCAssembler.h 
b/llvm/include/llvm/MC/MCAssembler.h
index d9752912ee66a..c6fa48128d189 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -111,7 +111,6 @@ class MCAssembler {
   /// Check whether the given fragment needs relaxation.
   bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF) const;
 
-  void layoutSection(MCSection &Sec);
   /// Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool layoutOnce();
@@ -148,9 +147,10 @@ class MCAssembler {
   uint64_t computeFragmentSize(const MCFragment &F) const;
 
   void layoutBundle(MCFragment *Prev, MCFragment *F) const;
+  void ensureValid(MCSection &Sec) const;
 
   // Get the offset of the given fragment inside its containing section.
-  uint64_t getFragmentOffset(const MCFragment &F) const { return F.Offset; }
+  uint64_t getFragmentOffset(const MCFragment &F) const;
 
   uint64_t getSectionAddressSize(const MCSection &Sec) const;
   uint64_t getSectionFileSize(const MCSection &Sec) const;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 1289d6f6f9f65..dcdcd094fa17b 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -99,6 +99,8 @@ class MCSection {
   /// Whether this section has had instructions emitted into it.
   bool HasInstructions : 1;
 
+  bool HasLayout : 1;
+
   bool IsRegistered : 1;
 
   bool IsText : 1;
@@ -167,6 +169,9 @@ class MCSection {
   bool hasInstructions() const { return HasInstructions; }
   void setHasInstructions(bool Value) { HasInstructions = Value; }
 
+  bool hasLayout() const { return HasLayout; }
+  void setHasLayout(bool Value) { HasLayout = Value; }
+
   bool isRegistered() const { return IsRegistered; }
   void setIsRegistered(bool Value) { IsRegistered = Value; }
 
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index ceeb7af0fecc4..c3da4bb5cc363 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -432,6 +432,28 @@ void MCAssembler::layoutBundle(MCFragment *Prev, 
MCFragment *F) const {
   DF->Offset = EF->Offset;
 }
 
+void MCAssembler::ensureValid(MCSection &Sec) const {
+  if (Sec.hasLayout())
+return;
+  Sec.setHasLayout(true);
+  MCFragment *Prev = nullptr;
+  uint64_t Offset = 0;
+  for (MCFragment &F : Sec) {
+F.Offset = Offset;
+if (isBundlingEnabled() && F.hasInstructions()) {
+  layoutBundle(Prev, &F);
+  Offset = F.Offset;
+}
+Offset += computeFragmentSize(F);
+Prev = &F;
+  }
+}
+
+uint64_t MCAssembler::getFragmentOffset(const MCFragment &F) const {
+  ensureValid(*F.getParent());
+  return F.Offset;
+}
+
 // Simple getSymbolOffset helper for the non-variable case.
 stat

[llvm-branch-commits] [llvm] release/19.x: Revert "[MC] Compute fragment offsets eagerly" (PR #101254)

2024-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/101254
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread Xiang Li via llvm-branch-commits

https://github.com/python3kgae approved this pull request.


https://github.com/llvm/llvm-project/pull/101249
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[MC] Compute fragment offsets eagerly" (PR #101254)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-hexagon

Author: None (llvmbot)


Changes

Backport 4eb5450f630849ee0518487de38d857fbe5b1aee

Requested by: @MaskRay

---
Full diff: https://github.com/llvm/llvm-project/pull/101254.diff


7 Files Affected:

- (modified) llvm/include/llvm/MC/MCAsmBackend.h (+2-3) 
- (modified) llvm/include/llvm/MC/MCAssembler.h (+2-2) 
- (modified) llvm/include/llvm/MC/MCSection.h (+5) 
- (modified) llvm/lib/MC/MCAssembler.cpp (+37-40) 
- (modified) llvm/lib/MC/MCSection.cpp (+2-2) 
- (modified) llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp (+2-2) 
- (modified) llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (+21-5) 


``diff
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h 
b/llvm/include/llvm/MC/MCAsmBackend.h
index d1d1814dd8b52..3f88ac02cd92a 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -217,9 +217,8 @@ class MCAsmBackend {
   virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
 const MCSubtargetInfo *STI) const = 0;
 
-  // Return true if fragment offsets have been adjusted and an extra layout
-  // iteration is needed.
-  virtual bool finishLayout(const MCAssembler &Asm) const { return false; }
+  /// Give backend an opportunity to finish layout after relaxation
+  virtual void finishLayout(MCAssembler const &Asm) const {}
 
   /// Handle any target-specific assembler flags. By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
diff --git a/llvm/include/llvm/MC/MCAssembler.h 
b/llvm/include/llvm/MC/MCAssembler.h
index d9752912ee66a..c6fa48128d189 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -111,7 +111,6 @@ class MCAssembler {
   /// Check whether the given fragment needs relaxation.
   bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF) const;
 
-  void layoutSection(MCSection &Sec);
   /// Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool layoutOnce();
@@ -148,9 +147,10 @@ class MCAssembler {
   uint64_t computeFragmentSize(const MCFragment &F) const;
 
   void layoutBundle(MCFragment *Prev, MCFragment *F) const;
+  void ensureValid(MCSection &Sec) const;
 
   // Get the offset of the given fragment inside its containing section.
-  uint64_t getFragmentOffset(const MCFragment &F) const { return F.Offset; }
+  uint64_t getFragmentOffset(const MCFragment &F) const;
 
   uint64_t getSectionAddressSize(const MCSection &Sec) const;
   uint64_t getSectionFileSize(const MCSection &Sec) const;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 1289d6f6f9f65..dcdcd094fa17b 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -99,6 +99,8 @@ class MCSection {
   /// Whether this section has had instructions emitted into it.
   bool HasInstructions : 1;
 
+  bool HasLayout : 1;
+
   bool IsRegistered : 1;
 
   bool IsText : 1;
@@ -167,6 +169,9 @@ class MCSection {
   bool hasInstructions() const { return HasInstructions; }
   void setHasInstructions(bool Value) { HasInstructions = Value; }
 
+  bool hasLayout() const { return HasLayout; }
+  void setHasLayout(bool Value) { HasLayout = Value; }
+
   bool isRegistered() const { return IsRegistered; }
   void setIsRegistered(bool Value) { IsRegistered = Value; }
 
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index ceeb7af0fecc4..c3da4bb5cc363 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -432,6 +432,28 @@ void MCAssembler::layoutBundle(MCFragment *Prev, 
MCFragment *F) const {
   DF->Offset = EF->Offset;
 }
 
+void MCAssembler::ensureValid(MCSection &Sec) const {
+  if (Sec.hasLayout())
+return;
+  Sec.setHasLayout(true);
+  MCFragment *Prev = nullptr;
+  uint64_t Offset = 0;
+  for (MCFragment &F : Sec) {
+F.Offset = Offset;
+if (isBundlingEnabled() && F.hasInstructions()) {
+  layoutBundle(Prev, &F);
+  Offset = F.Offset;
+}
+Offset += computeFragmentSize(F);
+Prev = &F;
+  }
+}
+
+uint64_t MCAssembler::getFragmentOffset(const MCFragment &F) const {
+  ensureValid(*F.getParent());
+  return F.Offset;
+}
+
 // Simple getSymbolOffset helper for the non-variable case.
 static bool getLabelOffset(const MCAssembler &Asm, const MCSymbol &S,
bool ReportError, uint64_t &Val) {
@@ -916,20 +938,22 @@ void MCAssembler::layout() {
 
   // Layout until everything fits.
   this->HasLayout = true;
-  for (MCSection &Sec : *this)
-layoutSection(Sec);
   while (layoutOnce()) {
+if (getContext().hadError())
+  return;
+// Size of fragments in one section can depend on the size of fragments in
+// another. If any fragment has changed size, we have to re-layout (and
+// as a result possibly further relax) all.
+for (MCSection &Sec : *this)
+  Sec.setHasLayout(false);
   }
 
   DEBUG_WITH_TYPE("mc-du

[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-30 Thread via llvm-branch-commits


@@ -223,10 +249,31 @@ class StaleMatcher {
 return Hash1.InstrHash == Hash2.InstrHash;
   }
 
+  /// Returns true if a profiled block was matched with its pseudo probe.
+  bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
+return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
+   MatchedWithPseudoProbes.end();
+  }
+
+  /// Returns the number of blocks matched with opcodes.
+  size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; }
+
+  /// Returns the number of blocks matched with pseudo probes.
+  size_t getNumBlocksMatchedWithPseudoProbes() const {
+return MatchedWithPseudoProbes.size();
+  }
+
 private:
   using HashBlockPairType = std::pair;
   std::unordered_map> OpHashToBlocks;
   std::unordered_map> 
CallHashToBlocks;
+  std::unordered_map>

spupyrev wrote:

@dcci @shawbyoung this due to a limitation of ADT maps, e.g., the following 
code crashes:
```  
  DenseMap TestMap;
  TestMap[uint16_t(-1)] = 123;
```
If not that, I agree that ADT data structures would be preferred

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[MC] Compute fragment offsets eagerly" (PR #101254)

2024-07-30 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay approved this pull request.


https://github.com/llvm/llvm-project/pull/101254
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Justin Bogner via llvm-branch-commits


@@ -151,7 +151,11 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
 assert(knownType && "Specification of multiple differing overload "
 "parameter types not yet supported");
   } else {
-OverloadParamIndices.push_back(i);
+// Skip the return value - nothing is overloaded on only return, and it

bogner wrote:

That's a good point. This should still happen to work since we treat "not 
overloaded" and "overloaded on return type" the same for the purposes of 
overloadKind in tryCreateOp, and I've updated the comment to say so.

The right thing to do, which I think should be done in a follow up, is to 
generate a table of the function signatures per opcode, which can then be 
queried by a simple switch over those records. I think we'll need that soon any 
way so I'll do that shortly.

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Boilerplate for DXILResourceAnalysis pass (PR #100700)

2024-07-30 Thread Damyan Pepper via llvm-branch-commits

https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/100700
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libcxx] [clang] Finish implementation of P0522 (PR #96023)

2024-07-30 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/96023

>From 4fb1c50098372d6b2f1bc50948ffdf56e1a9efff Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Mon, 17 Jun 2024 21:39:08 -0300
Subject: [PATCH] [clang] Finish implementation of P0522

This finishes the clang implementation of P0522, getting rid
of the fallback to the old, pre-P0522 rules.

Before this patch, when partial ordering template template parameters,
we would perform, in order:
* If the old rules would match, we would accept it. Otherwise, don't
  generate diagnostics yet.
* If the new rules would match, just accept it. Otherwise, don't
  generate any diagnostics yet again.
* Apply the old rules again, this time with diagnostics.

This situation was far from ideal, as we would sometimes:
* Accept some things we shouldn't.
* Reject some things we shouldn't.
* Only diagnose rejection in terms of the old rules.

With this patch, we apply the P0522 rules throughout.

This needed to extend template argument deduction in order
to accept the historial rule for TTP matching pack parameter to non-pack
arguments.
This change also makes us accept some combinations of historical and P0522
allowances we wouldn't before.

It also fixes a bunch of bugs that were documented in the test suite,
which I am not sure there are issues already created for them.

This causes a lot of changes to the way these failures are diagnosed,
with related test suite churn.

The problem here is that the old rules were very simple and
non-recursive, making it easy to provide customized diagnostics,
and to keep them consistent with each other.

The new rules are a lot more complex and rely on template argument
deduction, substitutions, and they are recursive.

The approach taken here is to mostly rely on existing diagnostics,
and create a new instantiation context that keeps track of this context.

So for example when a substitution failure occurs, we use the error
produced there unmodified, and just attach notes to it explaining
that it occurred in the context of partial ordering this template
argument against that template parameter.

This diverges from the old diagnostics, which would lead with an
error pointing to the template argument, explain the problem
in subsequent notes, and produce a final note pointing to the parameter.
---
 clang/docs/ReleaseNotes.rst   |   8 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/include/clang/Sema/Sema.h   |  14 +-
 clang/lib/Frontend/FrontendActions.cpp|   2 +
 clang/lib/Sema/SemaTemplate.cpp   |  94 ++---
 clang/lib/Sema/SemaTemplateDeduction.cpp  | 352 +-
 clang/lib/Sema/SemaTemplateInstantiate.cpp|  15 +
 .../temp/temp.arg/temp.arg.template/p3-0x.cpp |  31 +-
 clang/test/CXX/temp/temp.param/p12.cpp|  21 +-
 clang/test/Modules/cxx-templates.cpp  |  15 +-
 clang/test/SemaCXX/make_integer_seq.cpp   |   5 +-
 clang/test/SemaTemplate/cwg2398.cpp   |  19 +-
 clang/test/SemaTemplate/temp_arg_nontype.cpp  |  46 ++-
 clang/test/SemaTemplate/temp_arg_template.cpp |  38 +-
 .../SemaTemplate/temp_arg_template_p0522.cpp  |  70 ++--
 .../Templight/templight-empty-entries-fix.cpp |  12 +
 .../templight-prior-template-arg.cpp  |  33 +-
 .../type_traits/is_specialization.verify.cpp  |   8 +-
 18 files changed, 516 insertions(+), 274 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 903461532940c..043604b3e5195 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -67,6 +67,8 @@ C++ Language Changes
 
 C++17 Feature Support
 ^
+- The implementation of the relaxed template template argument matching rules 
is
+  more complete and reliable, and should provide more accurate diagnostics.
 
 C++14 Feature Support
 ^
@@ -133,6 +135,10 @@ Improvements to Clang's diagnostics
 
 - Clang now diagnoses undefined behavior in constant expressions more 
consistently. This includes invalid shifts, and signed overflow in arithmetic.
 
+- Clang now properly explains the reason a template template argument failed to
+  match a template template parameter, in terms of the C++17 relaxed matching 
rules
+  instead of the old ones.
+
 Improvements to Clang's time-trace
 --
 
@@ -158,6 +164,8 @@ Bug Fixes to C++ Support
 - Fixed a failed assertion when checking invalid delete operator declaration. 
(#GH96191)
 - When performing partial ordering of function templates, clang now checks that
   the deduction was consistent. Fixes (#GH18291).
+- Fixes to several issues in partial ordering of template template parameters, 
which
+  were documented in the test suite.
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 810abe4f23e31..bb7bba73d1964 100644
--- a/clang/in

[llvm-branch-commits] [llvm] release/19.x: [Support] Silence warnings when retrieving exported functions (#97905) (PR #101266)

2024-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/101266

Backport 39e192b

Requested by: @aganea

>From 91be3dbbd6295cff5ebdfd2ee9ec4d241e709dfb Mon Sep 17 00:00:00 2001
From: Alexandre Ganea 
Date: Tue, 30 Jul 2024 19:06:03 -0400
Subject: [PATCH] [Support] Silence warnings when retrieving exported functions
 (#97905)

Since functions exported from DLLs are type-erased, before this patch I
was seeing the new Clang 19 warning `-Wcast-function-type-mismatch`.

This happens when building LLVM on Windows.

Following discussion in
https://github.com/llvm/llvm-project/commit/593f708118aef792f434185547f74fedeaf51dd4#commitcomment-143905744

(cherry picked from commit 39e192b379362e9e645427631c35450d55ed517d)
---
 llvm/lib/Support/Windows/Process.inc |  3 ++-
 llvm/lib/Support/Windows/Signals.inc | 38 +++-
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Support/Windows/Process.inc 
b/llvm/lib/Support/Windows/Process.inc
index 34d294b232c32b..d525f5b16e862e 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -482,7 +482,8 @@ static RTL_OSVERSIONINFOEXW GetWindowsVer() {
 HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
 assert(hMod);
 
-auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+auto getVer =
+(RtlGetVersionPtr)(void *)::GetProcAddress(hMod, "RtlGetVersion");
 assert(getVer);
 
 RTL_OSVERSIONINFOEXW info{};
diff --git a/llvm/lib/Support/Windows/Signals.inc 
b/llvm/lib/Support/Windows/Signals.inc
index 29ebf7c696e04f..f11ad09f371397 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -171,23 +171,27 @@ static bool load64BitDebugHelp(void) {
   HMODULE hLib =
   ::LoadLibraryExA("Dbghelp.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
   if (hLib) {
-fMiniDumpWriteDump =
-(fpMiniDumpWriteDump)::GetProcAddress(hLib, "MiniDumpWriteDump");
-fStackWalk64 = (fpStackWalk64)::GetProcAddress(hLib, "StackWalk64");
-fSymGetModuleBase64 =
-(fpSymGetModuleBase64)::GetProcAddress(hLib, "SymGetModuleBase64");
-fSymGetSymFromAddr64 =
-(fpSymGetSymFromAddr64)::GetProcAddress(hLib, "SymGetSymFromAddr64");
-fSymGetLineFromAddr64 =
-(fpSymGetLineFromAddr64)::GetProcAddress(hLib, "SymGetLineFromAddr64");
-fSymGetModuleInfo64 =
-(fpSymGetModuleInfo64)::GetProcAddress(hLib, "SymGetModuleInfo64");
-fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)::GetProcAddress(
-hLib, "SymFunctionTableAccess64");
-fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
-fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
-fEnumerateLoadedModules = (fpEnumerateLoadedModules)::GetProcAddress(
-hLib, "EnumerateLoadedModules64");
+fMiniDumpWriteDump = (fpMiniDumpWriteDump)(void *)::GetProcAddress(
+hLib, "MiniDumpWriteDump");
+fStackWalk64 = (fpStackWalk64)(void *)::GetProcAddress(hLib, 
"StackWalk64");
+fSymGetModuleBase64 = (fpSymGetModuleBase64)(void *)::GetProcAddress(
+hLib, "SymGetModuleBase64");
+fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64)(void *)::GetProcAddress(
+hLib, "SymGetSymFromAddr64");
+fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)(void *)::GetProcAddress(
+hLib, "SymGetLineFromAddr64");
+fSymGetModuleInfo64 = (fpSymGetModuleInfo64)(void *)::GetProcAddress(
+hLib, "SymGetModuleInfo64");
+fSymFunctionTableAccess64 =
+(fpSymFunctionTableAccess64)(void *)::GetProcAddress(
+hLib, "SymFunctionTableAccess64");
+fSymSetOptions =
+(fpSymSetOptions)(void *)::GetProcAddress(hLib, "SymSetOptions");
+fSymInitialize =
+(fpSymInitialize)(void *)::GetProcAddress(hLib, "SymInitialize");
+fEnumerateLoadedModules =
+(fpEnumerateLoadedModules)(void *)::GetProcAddress(
+hLib, "EnumerateLoadedModules64");
   }
   return isDebugHelpInitialized();
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Support] Silence warnings when retrieving exported functions (#97905) (PR #101266)

2024-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/101266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Support] Silence warnings when retrieving exported functions (#97905) (PR #101266)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:

@compnerd What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/101266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Support] Silence warnings when retrieving exported functions (#97905) (PR #101266)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-support

Author: None (llvmbot)


Changes

Backport 39e192b

Requested by: @aganea

---
Full diff: https://github.com/llvm/llvm-project/pull/101266.diff


2 Files Affected:

- (modified) llvm/lib/Support/Windows/Process.inc (+2-1) 
- (modified) llvm/lib/Support/Windows/Signals.inc (+21-17) 


``diff
diff --git a/llvm/lib/Support/Windows/Process.inc 
b/llvm/lib/Support/Windows/Process.inc
index 34d294b232c32b..d525f5b16e862e 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -482,7 +482,8 @@ static RTL_OSVERSIONINFOEXW GetWindowsVer() {
 HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
 assert(hMod);
 
-auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+auto getVer =
+(RtlGetVersionPtr)(void *)::GetProcAddress(hMod, "RtlGetVersion");
 assert(getVer);
 
 RTL_OSVERSIONINFOEXW info{};
diff --git a/llvm/lib/Support/Windows/Signals.inc 
b/llvm/lib/Support/Windows/Signals.inc
index 29ebf7c696e04f..f11ad09f371397 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -171,23 +171,27 @@ static bool load64BitDebugHelp(void) {
   HMODULE hLib =
   ::LoadLibraryExA("Dbghelp.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
   if (hLib) {
-fMiniDumpWriteDump =
-(fpMiniDumpWriteDump)::GetProcAddress(hLib, "MiniDumpWriteDump");
-fStackWalk64 = (fpStackWalk64)::GetProcAddress(hLib, "StackWalk64");
-fSymGetModuleBase64 =
-(fpSymGetModuleBase64)::GetProcAddress(hLib, "SymGetModuleBase64");
-fSymGetSymFromAddr64 =
-(fpSymGetSymFromAddr64)::GetProcAddress(hLib, "SymGetSymFromAddr64");
-fSymGetLineFromAddr64 =
-(fpSymGetLineFromAddr64)::GetProcAddress(hLib, "SymGetLineFromAddr64");
-fSymGetModuleInfo64 =
-(fpSymGetModuleInfo64)::GetProcAddress(hLib, "SymGetModuleInfo64");
-fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)::GetProcAddress(
-hLib, "SymFunctionTableAccess64");
-fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
-fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
-fEnumerateLoadedModules = (fpEnumerateLoadedModules)::GetProcAddress(
-hLib, "EnumerateLoadedModules64");
+fMiniDumpWriteDump = (fpMiniDumpWriteDump)(void *)::GetProcAddress(
+hLib, "MiniDumpWriteDump");
+fStackWalk64 = (fpStackWalk64)(void *)::GetProcAddress(hLib, 
"StackWalk64");
+fSymGetModuleBase64 = (fpSymGetModuleBase64)(void *)::GetProcAddress(
+hLib, "SymGetModuleBase64");
+fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64)(void *)::GetProcAddress(
+hLib, "SymGetSymFromAddr64");
+fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)(void *)::GetProcAddress(
+hLib, "SymGetLineFromAddr64");
+fSymGetModuleInfo64 = (fpSymGetModuleInfo64)(void *)::GetProcAddress(
+hLib, "SymGetModuleInfo64");
+fSymFunctionTableAccess64 =
+(fpSymFunctionTableAccess64)(void *)::GetProcAddress(
+hLib, "SymFunctionTableAccess64");
+fSymSetOptions =
+(fpSymSetOptions)(void *)::GetProcAddress(hLib, "SymSetOptions");
+fSymInitialize =
+(fpSymInitialize)(void *)::GetProcAddress(hLib, "SymInitialize");
+fEnumerateLoadedModules =
+(fpEnumerateLoadedModules)(void *)::GetProcAddress(
+hLib, "EnumerateLoadedModules64");
   }
   return isDebugHelpInitialized();
 }

``




https://github.com/llvm/llvm-project/pull/101266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96523


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm][misexpect] Enable diagnostics for profitable llvm.expect annotations (PR #96523)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96523


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96524


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [misexpect] Support diagnostics from frontend profile data (PR #96524)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96524


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96525


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang][misexpect] Add support to clang for profitable annotation diagnostics (PR #96525)

2024-07-30 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/96525


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Adrian Vogelsgesang via llvm-branch-commits


@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl 
&Clones,
   for (DbgVariableRecord *DVR : DbgVariableRecords)
 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
false /*UseEntryValue*/);
-  return Shape;
-}
 
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
-  if (Shape.ABI != coro::ABI::Switch) {
-for (auto *End : Shape.CoroEnds) {
-  replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
-}
-  } else {
-for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
-  auto &Context = End->getContext();
-  End->replaceAllUsesWith(ConstantInt::getFalse(Context));
-  End->eraseFromParent();
-}
+  removeCoroEndsFromRampFunction(Shape);
+
+  if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) {

vogelsgesang wrote:

fine by me. Doesn't need to be addressed as part of this commit, can also be 
done in a follow-up commit.
Maybe add a `FIXME` to the source code, such that other people reading the 
source code in the meantime are aware of this future work?

https://github.com/llvm/llvm-project/pull/99283
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT][NFC] Print timers in perf2bolt invocation (PR #101270)

2024-07-30 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov created 
https://github.com/llvm/llvm-project/pull/101270

When BOLT is run in AggregateOnly mode (perf2bolt), it exits with code
zero so destructors are not run thus TimerGroup never prints the timers.

Add explicit printing just before the exit to honor options requesting
timers (`--time-rewrite`, `--time-aggr`).

Test Plan: updated bolt/test/timers.c



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT][NFC] Print timers in perf2bolt invocation (PR #101270)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)


Changes

When BOLT is run in AggregateOnly mode (perf2bolt), it exits with code
zero so destructors are not run thus TimerGroup never prints the timers.

Add explicit printing just before the exit to honor options requesting
timers (`--time-rewrite`, `--time-aggr`).

Test Plan: updated bolt/test/timers.c


---
Full diff: https://github.com/llvm/llvm-project/pull/101270.diff


2 Files Affected:

- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+1) 
- (modified) bolt/test/timers.c (+4) 


``diff
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp 
b/bolt/lib/Rewrite/RewriteInstance.cpp
index b7e361c35088a..9077869fe4955 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3200,6 +3200,7 @@ void RewriteInstance::processProfileData() {
   if (opts::AggregateOnly) {
 PrintProgramStats PPS(&*BAT);
 BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*BC));
+TimerGroup::printAll(outs());
 exit(0);
   }
 }
diff --git a/bolt/test/timers.c b/bolt/test/timers.c
index 97dc2506adecd..8d146c321d7f9 100644
--- a/bolt/test/timers.c
+++ b/bolt/test/timers.c
@@ -3,6 +3,9 @@
 # RUN: link_fdata %s %t.exe %t.fdata
 # RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \
 # RUN:   | FileCheck %s
+# RUN: link_fdata %s %t.exe %t.preagg PREAGG
+# RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \
+# RUN:   | FileCheck %s
 
 # CHECK-DAG: update metadata post-emit
 # CHECK-DAG: process section metadata
@@ -11,5 +14,6 @@
 # CHECK-DAG: finalize metadata pre-emit
 
 # FDATA: 0 [unknown] 0 1 main 0 1 0
+# PREAGG: B X:0 #main# 1 0
 */
 int main() { return 0; }

``




https://github.com/llvm/llvm-project/pull/101270
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread Damyan Pepper via llvm-branch-commits

https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/101249
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Justin Bogner via llvm-branch-commits


@@ -151,7 +151,11 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
 assert(knownType && "Specification of multiple differing overload "
 "parameter types not yet supported");
   } else {
-OverloadParamIndices.push_back(i);
+// Skip the return value - nothing is overloaded on only return, and it

bogner wrote:

Thinking about this some more I don't think we can get away with this - if an 
op is overloaded on return type we simply need to provide the one we want. I 
think I'll add a `Type *` argument that's defaulted to null for "infer it" and 
return an error if we can't do so.

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Damyan Pepper via llvm-branch-commits

https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Damyan Pepper via llvm-branch-commits


@@ -289,24 +289,25 @@ static ShaderKind 
getShaderKindEnum(Triple::EnvironmentType EnvType) {
 ///   its specification in DXIL.td.
 /// \param OverloadTy Return type to be used to construct DXIL function type.

damyanp wrote:

The doxygen comment about getDxilOpFunctionType is now out of date.

I'm not sure that the list of parameters in the comment is really adding much, 
but I think the mismatch would generate a Doxygen warning so it'd be worth 
resolving this one way or another.

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

2024-07-30 Thread Chen Zheng via llvm-branch-commits


@@ -28,6 +28,10 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
+  /// Whether we should merge global variables that have private linkage.
+  bool MergePrivateGlobals = false;

chenzheng1030 wrote:

+1

Like the internal global variables, private global variables should always be 
merged and should not be guarded under an option. You've already done this in 
https://github.com/llvm/llvm-project/pull/101222 :)

https://github.com/llvm/llvm-project/pull/101224
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX (PR #101226)

2024-07-30 Thread Chen Zheng via llvm-branch-commits

https://github.com/chenzheng1030 approved this pull request.

LGTM once the formatting comment from Kai is resolved

https://github.com/llvm/llvm-project/pull/101226
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-30 Thread Yuxuan Chen via llvm-branch-commits


@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl 
&Clones,
   for (DbgVariableRecord *DVR : DbgVariableRecords)
 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
false /*UseEntryValue*/);
-  return Shape;
-}
 
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
-  if (Shape.ABI != coro::ABI::Switch) {
-for (auto *End : Shape.CoroEnds) {
-  replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
-}
-  } else {
-for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
-  auto &Context = End->getContext();
-  End->replaceAllUsesWith(ConstantInt::getFalse(Context));
-  End->eraseFromParent();
-}
+  removeCoroEndsFromRampFunction(Shape);
+
+  if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) {

yuxuanchen1997 wrote:

For sure!


https://github.com/llvm/llvm-project/pull/99283
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Justin Bogner via llvm-branch-commits


@@ -151,7 +151,11 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
 assert(knownType && "Specification of multiple differing overload "
 "parameter types not yet supported");
   } else {
-OverloadParamIndices.push_back(i);
+// Skip the return value - nothing is overloaded on only return, and it

bogner wrote:

Fixed in latest.

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Make DXILOpBuilder's API more useable (PR #101250)

2024-07-30 Thread Justin Bogner via llvm-branch-commits


@@ -289,24 +289,25 @@ static ShaderKind 
getShaderKindEnum(Triple::EnvironmentType EnvType) {
 ///   its specification in DXIL.td.
 /// \param OverloadTy Return type to be used to construct DXIL function type.

bogner wrote:

Removed the parameters from the comment

https://github.com/llvm/llvm-project/pull/101250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/101249


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DirectX] Simplify tablegen'd OpCode and OpClass enums (PR #101249)

2024-07-30 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/101249


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 63cf3d4 - [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Stefan Pintilie
Date: 2024-07-31T08:51:00+02:00
New Revision: 63cf3d4fb07a4e2c484ae44cec5df2c273fc7fff

URL: 
https://github.com/llvm/llvm-project/commit/63cf3d4fb07a4e2c484ae44cec5df2c273fc7fff
DIFF: 
https://github.com/llvm/llvm-project/commit/63cf3d4fb07a4e2c484ae44cec5df2c273fc7fff.diff

LOG: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. 
(#96839)

The issue with the handling of the SUBREG_TO_REG is that we don't join
the subranges correctly when we join live ranges across the
SUBREG_TO_REG. For example when joining across this:
```
32B   %2:gr64_nosp = SUBREG_TO_REG 0, %0:gr32, %subreg.sub_32bit
```
we want to join these live ranges:
```
%0 [16r,32r:0) 0@16r  weight:0.00e+00
%2 [32r,112r:0) 0@32r  weight:0.00e+00
```
Before the fix the range for the resulting merged `%2` is:
```
%2 [16r,112r:0) 0@16r  weight:0.00e+00
```
After the fix it is now this:
```
%2 [16r,112r:0) 0@16r  L000F [16r,112r:0) 0@16r  weight:0.00e+00
```

Two tests are added to this fix. The X86 test fails without the patch.
The PowerPC test passes with and without the patch but is added as a way
track future possible failures when register classes are changed in a
future patch.

(cherry picked from commit 26fa399012da00fbf806f50ad72a3b5f0ee63eab)

Added: 
llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
llvm/test/CodeGen/X86/subreg-fail.mir

Modified: 
llvm/lib/CodeGen/RegisterCoalescer.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp 
b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 1c35a88b4dc4a..043ea20191487 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+  } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
+LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
+   CP.getNewRC()->getLaneMask(), LHS);
+mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), 
CP,
+  CP.getDstIdx());
+LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting

diff  --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir 
b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
new file mode 100644
index 0..39eab1f562e71
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s \
+# RUN:   -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on PowerPC. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name: check_subregs
+alignment:   16
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $x3
+
+; CHECK-LABEL: name: check_subregs
+; CHECK: liveins: $x3
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead 
[[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
+; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], 
implicit $rm
+; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
+; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
+; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
+%0:g8rc_and_g8rc_nox0 = COPY $x3
+%1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+%3:f4rc = FRSP killed %1, implicit $rm
+%4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
+%5:vrrc = XVCVDPSP killed %4, implicit $rm
+$v2 = COPY %5
+BLR8 implicit $lr8, implicit $rm, implicit $v2
+...
+

diff  --git a/llvm/test/CodeGen/X86/subreg-fail.mir 
b/llvm/test/CodeGen/X86/subreg-fail.mir
new file mode 100644
index 0..c8146f099b814
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple x86_64-unknown-unknown %s \
+# RUN:   -verify-coalescing -enable-subreg-liveness \
+# RUN:   --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on X86. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name:test1
+alignment:   16
+tracksRegLiveness: true
+body: |
+  bb.0:
+; CHECK-LABEL: name: test1
+; CHECK: undef [[MOV32rm:%[0-9]+]].sub_

[llvm-branch-commits] [llvm] release/19.x: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839) (PR #101071)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

I had to manually merge this since there was a merge commit in this PR. It was 
fine but please don't merge in from the release branch in the future since we 
don't use squashing for the release branch.

https://github.com/llvm/llvm-project/pull/101071
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839) (PR #101071)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101071
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] release/19.x: [libunwind][AIX] Fix the wrong traceback from signal handler (#101069) (PR #101182)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101182

>From 64699d328a39d3a2cc7c043768111794782ef9f0 Mon Sep 17 00:00:00 2001
From: Xing Xue 
Date: Tue, 30 Jul 2024 06:28:59 -0400
Subject: [PATCH] [libunwind][AIX] Fix the wrong traceback from signal handler
 (#101069)

Patch [llvm#92291](https://github.com/llvm/llvm-project/pull/92291)
causes wrong traceback from a signal handler for AIX because the AIX
unwinder uses the traceback table at the end of each function instead of
FDE/CIE for unwinding. This patch adds a condition to exclude traceback
table based unwinding from the code added by the patch.

(cherry picked from commit d90fa612604b49dfc81c3f42c106fab7401322ec)
---
 libunwind/src/UnwindCursor.hpp | 3 ++-
 libunwind/test/aix_signal_unwind.pass.sh.S | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 2ec60e4c123d5..758557337899e 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -2589,7 +2589,8 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool 
isReturnAddress) {
 --pc;
 #endif
 
-#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32))
+#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) &&
\
+!defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND)
   // In case of this is frame of signal handler, the IP saved in the signal
   // handler points to first non-executed instruction, while FDE/CIE expects IP
   // to be after the first non-executed instruction.
diff --git a/libunwind/test/aix_signal_unwind.pass.sh.S 
b/libunwind/test/aix_signal_unwind.pass.sh.S
index 9ca18e9481f4f..a666577d095b1 100644
--- a/libunwind/test/aix_signal_unwind.pass.sh.S
+++ b/libunwind/test/aix_signal_unwind.pass.sh.S
@@ -10,7 +10,7 @@
 // a correct traceback when the function raising the signal does not save
 // the link register or does not store the stack back chain.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target=powerpc{{(64)?}}-ibm-aix{{.*}}
 
 // Test when the function raising the signal does not save the link register
 // RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] 64699d3 - [libunwind][AIX] Fix the wrong traceback from signal handler (#101069)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

Author: Xing Xue
Date: 2024-07-31T08:55:15+02:00
New Revision: 64699d328a39d3a2cc7c043768111794782ef9f0

URL: 
https://github.com/llvm/llvm-project/commit/64699d328a39d3a2cc7c043768111794782ef9f0
DIFF: 
https://github.com/llvm/llvm-project/commit/64699d328a39d3a2cc7c043768111794782ef9f0.diff

LOG: [libunwind][AIX] Fix the wrong traceback from signal handler (#101069)

Patch [llvm#92291](https://github.com/llvm/llvm-project/pull/92291)
causes wrong traceback from a signal handler for AIX because the AIX
unwinder uses the traceback table at the end of each function instead of
FDE/CIE for unwinding. This patch adds a condition to exclude traceback
table based unwinding from the code added by the patch.

(cherry picked from commit d90fa612604b49dfc81c3f42c106fab7401322ec)

Added: 


Modified: 
libunwind/src/UnwindCursor.hpp
libunwind/test/aix_signal_unwind.pass.sh.S

Removed: 




diff  --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 2ec60e4c123d5..758557337899e 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -2589,7 +2589,8 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool 
isReturnAddress) {
 --pc;
 #endif
 
-#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32))
+#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) &&
\
+!defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND)
   // In case of this is frame of signal handler, the IP saved in the signal
   // handler points to first non-executed instruction, while FDE/CIE expects IP
   // to be after the first non-executed instruction.

diff  --git a/libunwind/test/aix_signal_unwind.pass.sh.S 
b/libunwind/test/aix_signal_unwind.pass.sh.S
index 9ca18e9481f4f..a666577d095b1 100644
--- a/libunwind/test/aix_signal_unwind.pass.sh.S
+++ b/libunwind/test/aix_signal_unwind.pass.sh.S
@@ -10,7 +10,7 @@
 // a correct traceback when the function raising the signal does not save
 // the link register or does not store the stack back chain.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target=powerpc{{(64)?}}-ibm-aix{{.*}}
 
 // Test when the function raising the signal does not save the link register
 // RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] release/19.x: [libunwind][AIX] Fix the wrong traceback from signal handler (#101069) (PR #101182)

2024-07-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] release/19.x: [libunwind][AIX] Fix the wrong traceback from signal handler (#101069) (PR #101182)

2024-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libunwind

Author: None (llvmbot)


Changes

Backport d90fa61

Requested by: @daltenty

---
Full diff: https://github.com/llvm/llvm-project/pull/101182.diff


2 Files Affected:

- (modified) libunwind/src/UnwindCursor.hpp (+2-1) 
- (modified) libunwind/test/aix_signal_unwind.pass.sh.S (+1-1) 


``diff
diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 2ec60e4c123d5..758557337899e 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -2589,7 +2589,8 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool 
isReturnAddress) {
 --pc;
 #endif
 
-#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32))
+#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) &&
\
+!defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND)
   // In case of this is frame of signal handler, the IP saved in the signal
   // handler points to first non-executed instruction, while FDE/CIE expects IP
   // to be after the first non-executed instruction.
diff --git a/libunwind/test/aix_signal_unwind.pass.sh.S 
b/libunwind/test/aix_signal_unwind.pass.sh.S
index 9ca18e9481f4f..a666577d095b1 100644
--- a/libunwind/test/aix_signal_unwind.pass.sh.S
+++ b/libunwind/test/aix_signal_unwind.pass.sh.S
@@ -10,7 +10,7 @@
 // a correct traceback when the function raising the signal does not save
 // the link register or does not store the stack back chain.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target=powerpc{{(64)?}}-ibm-aix{{.*}}
 
 // Test when the function raising the signal does not save the link register
 // RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags}

``




https://github.com/llvm/llvm-project/pull/101182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >