[llvm-branch-commits] [clang] release/19.x: [clang-format] Fix a regression in parsing `switch` in macro call (#114506) (PR #114640)

2024-11-02 Thread Björn Schäpers via llvm-branch-commits

https://github.com/HazardyKnusperkeks approved this pull request.


https://github.com/llvm/llvm-project/pull/114640
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)

2024-11-02 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114438

>From 1aaf29fc290cca84843f0ed1d4b9b9258b8daa36 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 31 Oct 2024 12:49:07 -0400
Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor
 existing attribute

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 79 +++
 .../annotate-kernel-features-hsa-call.ll  | 46 +--
 .../AMDGPU/attributor-loop-issue-58639.ll |  3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|  3 +-
 .../CodeGen/AMDGPU/propagate-waves-per-eu.ll  | 59 +++---
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |  9 ++-
 .../AMDGPU/uniform-work-group-multistep.ll|  3 +-
 .../uniform-work-group-recursion-test.ll  |  2 +-
 8 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7878e13cfd9bf3..5a84a874fcb811 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -198,6 +198,17 @@ class AMDGPUInformationCache : public InformationCache {
 return ST.getWavesPerEU(F, FlatWorkGroupSize);
   }
 
+  std::optional>
+  getWavesPerEUAttr(const Function &F) {
+auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
+   /*OnlyFirstRequired=*/true);
+if (Val && Val->second == 0) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+  Val->second = ST.getMaxWavesPerEU();
+}
+return Val;
+  }
+
   std::pair
   getEffectiveWavesPerEU(const Function &F,
  std::pair WavesPerEU,
@@ -768,22 +779,6 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
-  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
- unsigned Max) {
-// Don't add the attribute if it's the implied default.
-if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
-  return ChangeStatus::UNCHANGED;
-
-Function *F = getAssociatedFunction();
-LLVMContext &Ctx = F->getContext();
-SmallString<10> Buffer;
-raw_svector_ostream OS(Buffer);
-OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
-return A.manifestAttrs(getIRPosition(),
-   {Attribute::get(Ctx, AttrName, OS.str())},
-   /*ForceReplace=*/true);
-  }
-
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -880,29 +875,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
   : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
 
-  bool isValidState() const override {
-return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
-  }
-
   void initialize(Attributor &A) override {
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 
-if (const auto *AssumedGroupSize = A.getAAFor(
-*this, IRPosition::function(*F), DepClassTy::REQUIRED);
-AssumedGroupSize->isValidState()) {
+auto TakeRange = [&](std::pair R) {
+  auto [Min, Max] = R;
+  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState RangeState(Range);
+  clampStateAndIndicateChange(this->getState(), RangeState);
+  indicateOptimisticFixpoint();
+};
 
-  unsigned Min, Max;
-  std::tie(Min, Max) = InfoCache.getWavesPerEU(
-  *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-   AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+std::pair MaxWavesPerEURange{
+1U, InfoCache.getMaxWavesPerEU(*F)};
 
-  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-  intersectKnown(Range);
+// If the attribute exists, we will honor it if it is not the default.
+if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+  if (*Attr != MaxWavesPerEURange) {
+TakeRange(*Attr);
+return;
+  }
 }
 
-if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-  indicatePessimisticFixpoint();
+// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
+// calculation of waves per EU involves flat work group size, we can't
+// simply use an assumed flat work group size as a start point, because the
+// update of flat work group size is in an inverse direction of waves per
+// EU. However, we can still do something if it is an entry function. Since
+// an entry function is a terminal node, and flat work group size either
+// from attribute or default will be used anyway, we can take that value 
and
+// calculate the waves per EU based on it. This result can't be updated by
+// no means, but that could still allow us 

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)

2024-11-02 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114438

>From 013d7aeb4698b74af0b48ce74512d770d46cd81e Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 31 Oct 2024 12:49:07 -0400
Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor
 existing attribute

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 81 +++
 .../annotate-kernel-features-hsa-call.ll  | 46 ++-
 .../AMDGPU/attributor-loop-issue-58639.ll |  3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|  3 +-
 .../CodeGen/AMDGPU/propagate-waves-per-eu.ll  | 59 +++---
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |  9 ++-
 .../AMDGPU/uniform-work-group-multistep.ll|  3 +-
 .../uniform-work-group-recursion-test.ll  |  2 +-
 8 files changed, 113 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 376f2067582f32..7d51412730d4d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -201,6 +201,19 @@ class AMDGPUInformationCache : public InformationCache {
 return ST.getWavesPerEU(F, FlatWorkGroupSize);
   }
 
+  std::optional>
+  getWavesPerEUAttr(const Function &F) {
+auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
+   /*OnlyFirstRequired=*/true);
+if (!Val)
+  return std::nullopt;
+if (!Val->second) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+  Val->second = ST.getMaxWavesPerEU();
+}
+return std::make_pair(Val->first, *(Val->second));
+  }
+
   std::pair
   getEffectiveWavesPerEU(const Function &F,
  std::pair WavesPerEU,
@@ -771,22 +784,6 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
-  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
- unsigned Max) {
-// Don't add the attribute if it's the implied default.
-if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
-  return ChangeStatus::UNCHANGED;
-
-Function *F = getAssociatedFunction();
-LLVMContext &Ctx = F->getContext();
-SmallString<10> Buffer;
-raw_svector_ostream OS(Buffer);
-OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
-return A.manifestAttrs(getIRPosition(),
-   {Attribute::get(Ctx, AttrName, OS.str())},
-   /*ForceReplace=*/true);
-  }
-
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -883,29 +880,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
   : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
 
-  bool isValidState() const override {
-return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
-  }
-
   void initialize(Attributor &A) override {
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 
-if (const auto *AssumedGroupSize = A.getAAFor(
-*this, IRPosition::function(*F), DepClassTy::REQUIRED);
-AssumedGroupSize->isValidState()) {
+auto TakeRange = [&](std::pair R) {
+  auto [Min, Max] = R;
+  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState RangeState(Range);
+  clampStateAndIndicateChange(this->getState(), RangeState);
+  indicateOptimisticFixpoint();
+};
 
-  unsigned Min, Max;
-  std::tie(Min, Max) = InfoCache.getWavesPerEU(
-  *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-   AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+std::pair MaxWavesPerEURange{
+1U, InfoCache.getMaxWavesPerEU(*F)};
 
-  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-  intersectKnown(Range);
+// If the attribute exists, we will honor it if it is not the default.
+if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+  if (*Attr != MaxWavesPerEURange) {
+TakeRange(*Attr);
+return;
+  }
 }
 
-if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-  indicatePessimisticFixpoint();
+// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
+// calculation of waves per EU involves flat work group size, we can't
+// simply use an assumed flat work group size as a start point, because the
+// update of flat work group size is in an inverse direction of waves per
+// EU. However, we can still do something if it is an entry function. Since
+// an entry function is a terminal node, and flat work group size either
+// from attribute or default will be used anyway, we can take that value 
and
+// calculate the waves per EU based on it. This resu

[llvm-branch-commits] [llvm] [CodeExtractor] Optionally keep code in original function. (PR #114669)

2024-11-02 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur created 
https://github.com/llvm/llvm-project/pull/114669

When extracting a region into a new function, optionally allow cloning basic 
blocks and instructions into the extracted function instead of moving them. The 
keeps the original code in the original function such that they can still be 
referenced -- and branched to -- in the original function.

The motivation is the use of CodeExtractor in the OpenMPIRBuilder. The 
implementation of createParallel first emits the parallel region into the 
lexical function, then uses CodeExtractor to outline that region into a new 
function. The problem here is that Clang's code generator will references some 
basic blocks for code inside as well as outside the region. This includes some 
special purpose block (EHResumeBlock, TerminateLandingPad, TerminateHandler, 
UnreachableBlock, ...) and cleanup/dtor code that is re-used from multiple 
scopes (see test case extract-block-cleanup.ll). Moving these blocks into a 
different function will result in malformed IR. The KeepOldBlocks option will 
instead clone the outlined code into a new function keeping the auxiliary code 
intact, relying on later DCE to remove code that indeed has become unreachable. 
Additionally, this code could also be uses as a fallback when 
threading/offloading is disabled via environment option.

Use of KeepOldBlocks by OpenMPIRBuilder is not part of this patch. For testing, 
we extend llvm-extract allowing the use of this option and thus making it more 
powerful.

Originally submitted as https://reviews.llvm.org/D115216

>From eb8c6a3ba6e3bc9ddd1ac9579ff665f74034051f Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Mon, 6 Dec 2021 22:36:52 -0600
Subject: [PATCH 1/5] KeepOldBlocks

---
 llvm/include/llvm/Transforms/IPO.h|   4 +-
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  14 +-
 .../llvm/Transforms/Utils/CodeExtractor.h |  15 +-
 llvm/lib/Transforms/IPO/BlockExtractor.cpp|  34 +-
 llvm/lib/Transforms/Utils/CloneFunction.cpp   |  12 +-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 326 ++
 .../llvm-extract/extract-block-cleanup.ll | 116 +++
 .../extract-block-multiple-exits.ll   | 200 +++
 .../tools/llvm-extract/extract-block-sink.ll  |  67 
 llvm/test/tools/llvm-extract/extract-block.ll |  10 +-
 .../extract-blocks-with-groups.ll |  24 +-
 llvm/tools/llvm-extract/llvm-extract.cpp  |  20 +-
 12 files changed, 737 insertions(+), 105 deletions(-)
 create mode 100644 llvm/test/tools/llvm-extract/extract-block-cleanup.ll
 create mode 100644 llvm/test/tools/llvm-extract/extract-block-multiple-exits.ll
 create mode 100644 llvm/test/tools/llvm-extract/extract-block-sink.ll

diff --git a/llvm/include/llvm/Transforms/IPO.h 
b/llvm/include/llvm/Transforms/IPO.h
index 67b9a93c47b215..d11c27304815db 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -192,11 +192,11 @@ Pass *createSingleLoopExtractorPass();
 ModulePass *createBlockExtractorPass();
 ModulePass *
 createBlockExtractorPass(const SmallVectorImpl &BlocksToExtract,
- bool EraseFunctions);
+ bool EraseFunctions, bool KeepOldBlocks = false);
 ModulePass *
 createBlockExtractorPass(const SmallVectorImpl>
  &GroupsOfBlocksToExtract,
- bool EraseFunctions);
+ bool EraseFunctions, bool KeepOldBlocks = false);
 
 /// createStripDeadPrototypesPass - This pass removes any function declarations
 /// (prototypes) that are not used.
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 5a1f322b20544c..cff5e6bc8daeaf 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -114,10 +114,16 @@ struct ClonedCodeInfo {
 /// If you would like to collect additional information about the cloned
 /// function, you can specify a ClonedCodeInfo object with the optional fifth
 /// parameter.
-BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
-const Twine &NameSuffix = "", Function *F = 
nullptr,
-ClonedCodeInfo *CodeInfo = nullptr,
-DebugInfoFinder *DIFinder = nullptr);
+///
+/// If you would like to clone only a subset of instructions in the basic 
block,
+/// you can specify a callback returning true only for those instructions that
+/// are to be cloned.
+BasicBlock *
+CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
+const Twine &NameSuffix = "", Function *F = nullptr,
+ClonedCodeInfo *CodeInfo = nullptr,
+DebugInfoFinder *DIFinder = nullptr,
+function_ref InstSelect = {});
 
 /// Return a copy of the specified function and add it to that
 /// function's module.  Also, any references speci