date:20241209

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: Pengcheng Wang (wangpc-pp)


Changes

To reduce compile time.

This is a follow-up of #118787.


---
Full diff: https://github.com/llvm/llvm-project/pull/119194.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/MachineLICM.cpp (+3-1) 
- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/MachineSink.cpp (+1-1) 
- (modified) llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (+1-2) 
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+2-2) 


``diff
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index d21059189b1844..8aaa5605f28b70 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -123,6 +123,7 @@ namespace {
 const TargetRegisterInfo *TRI = nullptr;
 const MachineFrameInfo *MFI = nullptr;
 MachineRegisterInfo *MRI = nullptr;
+RegisterClassInfo RegClassInfo;
 TargetSchedModel SchedModel;
 bool PreRegAlloc = false;
 bool HasProfileData = false;
@@ -389,6 +390,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
   MFI = &MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   SchedModel.init(&ST);
+  RegClassInfo.runOnMachineFunction(MF);
 
   HasProfileData = MF.getFunction().hasProfileData();
 
@@ -405,7 +407,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
 std::fill(RegPressure.begin(), RegPressure.end(), 0);
 RegLimit.resize(NumRPS);
 for (unsigned i = 0, e = NumRPS; i != e; ++i)
-  RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
+  RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
   }
 
   if (HoistConstLoads)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp 
b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 3ee0ba1fea5079..e2bbebfc5f5462 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1326,7 +1326,7 @@ class HighRegisterPressureDetector {
   // Calculate the upper limit of each pressure set
   void computePressureSetLimit(const RegisterClassInfo &RCI) {
 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
-  PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);
+  PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet);
   }
 
   // There are two patterns of last-use.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 7d0bedab7cdabc..d407d8a965ea67 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1094,7 +1094,7 @@ bool MachineSinking::registerPressureSetExceedsLimit(
   std::vector BBRegisterPressure = getBBRegisterPressure(MBB);
   for (; *PS != -1; PS++)
 if (Weight + BBRegisterPressure[*PS] >=
-TRI->getRegPressureSetLimit(*MBB.getParent(), *PS))
+RegClassInfo.getRegPressureSetLimit(*PS))
   return true;
   return false;
 }
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 
b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6b37dd9161685..8673deddb7057f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6936,7 +6936,6 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
   RegClassInfo.runOnMachineFunction(*MF);
   RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
  EndLoop->getParent()->end(), false, false);
-  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 
   bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
 
@@ -6979,7 +6978,7 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
 
   auto &P = RPTracker.getPressure().MaxSetPressure;
   for (unsigned I = 0, E = P.size(); I < E; ++I)
-if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
+if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
   return true;
 }
   return false;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 44f6db5061e21a..fa45a7fb7fabe6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure(
   };
 
   // For now we only care about float and double type fma.
-  unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
-  *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+  unsigned VSSRCLimit =
+  RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
 
   // Only reduce register pressure when pressure is high.
   return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >

``




https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Nikolas Klauser via llvm-branch-commits



@@ -587,42 +587,48 @@ template 
 
 */
 
-#include <__config>
-
-#include <__atomic/aliases.h>
-#include <__atomic/atomic.h>
-#include <__atomic/atomic_base.h>
-#include <__atomic/atomic_flag.h>
-#include <__atomic/atomic_init.h>
-#include <__atomic/atomic_lock_free.h>
-#include <__atomic/atomic_sync.h>
-#include <__atomic/check_memory_order.h>
-#include <__atomic/contention_t.h>
-#include <__atomic/cxx_atomic_impl.h>
-#include <__atomic/fence.h>
-#include <__atomic/is_always_lock_free.h>
-#include <__atomic/kill_dependency.h>
-#include <__atomic/memory_order.h>
-#include 
-
-#if _LIBCPP_STD_VER >= 20
-#  include <__atomic/atomic_ref.h>
-#endif
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
-
-#if !_LIBCPP_HAS_ATOMIC_HEADER
-#  error  is not implemented
-#endif
-
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include 
-#  include 
-#  include 
-#  include 
-#  include 
-#endif
+#include <__configuration/cxx03.h>
+
+#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_USE_CXX03_HEADERS)
+#  include <__cxx03/algorithm>

philnik777 wrote:

I've checked the top level headers manually and fixed a few more.

https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Pengcheng Wang via llvm-branch-commits


wangpc-pp wrote:

> Why do we need #118787 if we can just update the passes to use 
> RegisterClassInfo?

Because the APIs are messy and confusing, we don't know if there will be some 
future users that use the raw limit directly.

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-powerpc

Author: Pengcheng Wang (wangpc-pp)


Changes

To reduce compile time.

This is a follow-up of #118787.


---
Full diff: https://github.com/llvm/llvm-project/pull/119194.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/MachineLICM.cpp (+3-1) 
- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/MachineSink.cpp (+1-1) 
- (modified) llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (+1-2) 
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+2-2) 


``diff
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index d21059189b1844..8aaa5605f28b70 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -123,6 +123,7 @@ namespace {
 const TargetRegisterInfo *TRI = nullptr;
 const MachineFrameInfo *MFI = nullptr;
 MachineRegisterInfo *MRI = nullptr;
+RegisterClassInfo RegClassInfo;
 TargetSchedModel SchedModel;
 bool PreRegAlloc = false;
 bool HasProfileData = false;
@@ -389,6 +390,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
   MFI = &MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   SchedModel.init(&ST);
+  RegClassInfo.runOnMachineFunction(MF);
 
   HasProfileData = MF.getFunction().hasProfileData();
 
@@ -405,7 +407,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
 std::fill(RegPressure.begin(), RegPressure.end(), 0);
 RegLimit.resize(NumRPS);
 for (unsigned i = 0, e = NumRPS; i != e; ++i)
-  RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
+  RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
   }
 
   if (HoistConstLoads)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp 
b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 3ee0ba1fea5079..e2bbebfc5f5462 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1326,7 +1326,7 @@ class HighRegisterPressureDetector {
   // Calculate the upper limit of each pressure set
   void computePressureSetLimit(const RegisterClassInfo &RCI) {
 for (unsigned PSet = 0; PSet < PSetNum; PSet++)
-  PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet);
+  PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet);
   }
 
   // There are two patterns of last-use.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 7d0bedab7cdabc..d407d8a965ea67 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1094,7 +1094,7 @@ bool MachineSinking::registerPressureSetExceedsLimit(
   std::vector BBRegisterPressure = getBBRegisterPressure(MBB);
   for (; *PS != -1; PS++)
 if (Weight + BBRegisterPressure[*PS] >=
-TRI->getRegPressureSetLimit(*MBB.getParent(), *PS))
+RegClassInfo.getRegPressureSetLimit(*PS))
   return true;
   return false;
 }
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 
b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6b37dd9161685..8673deddb7057f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6936,7 +6936,6 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
   RegClassInfo.runOnMachineFunction(*MF);
   RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
  EndLoop->getParent()->end(), false, false);
-  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 
   bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
 
@@ -6979,7 +6978,7 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
 
   auto &P = RPTracker.getPressure().MaxSetPressure;
   for (unsigned I = 0, E = P.size(); I < E; ++I)
-if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
+if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
   return true;
 }
   return false;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 44f6db5061e21a..fa45a7fb7fabe6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure(
   };
 
   // For now we only care about float and double type fma.
-  unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
-  *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+  unsigned VSSRCLimit =
+  RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
 
   // Only reduce register pressure when pressure is high.
   return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >

``




https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Nikolas Klauser via llvm-branch-commits



@@ -1827,232 +1827,147 @@ template 
 
 */
 
-#include <__config>
-
-#include <__algorithm/adjacent_find.h>
-#include <__algorithm/all_of.h>
-#include <__algorithm/any_of.h>
-#include <__algorithm/binary_search.h>
-#include <__algorithm/copy.h>
-#include <__algorithm/copy_backward.h>
-#include <__algorithm/copy_if.h>
-#include <__algorithm/copy_n.h>
-#include <__algorithm/count.h>
-#include <__algorithm/count_if.h>
-#include <__algorithm/equal.h>
-#include <__algorithm/equal_range.h>
-#include <__algorithm/fill.h>
-#include <__algorithm/fill_n.h>
-#include <__algorithm/find.h>
-#include <__algorithm/find_end.h>
-#include <__algorithm/find_first_of.h>
-#include <__algorithm/find_if.h>
-#include <__algorithm/find_if_not.h>
-#include <__algorithm/for_each.h>
-#include <__algorithm/generate.h>
-#include <__algorithm/generate_n.h>
-#include <__algorithm/includes.h>
-#include <__algorithm/inplace_merge.h>
-#include <__algorithm/is_heap.h>
-#include <__algorithm/is_heap_until.h>
-#include <__algorithm/is_partitioned.h>
-#include <__algorithm/is_permutation.h>
-#include <__algorithm/is_sorted.h>
-#include <__algorithm/is_sorted_until.h>
-#include <__algorithm/iter_swap.h>
-#include <__algorithm/lexicographical_compare.h>
-#include <__algorithm/lower_bound.h>
-#include <__algorithm/make_heap.h>
-#include <__algorithm/max.h>
-#include <__algorithm/max_element.h>
-#include <__algorithm/merge.h>
-#include <__algorithm/min.h>
-#include <__algorithm/min_element.h>
-#include <__algorithm/minmax.h>
-#include <__algorithm/minmax_element.h>
-#include <__algorithm/mismatch.h>
-#include <__algorithm/move.h>
-#include <__algorithm/move_backward.h>
-#include <__algorithm/next_permutation.h>
-#include <__algorithm/none_of.h>
-#include <__algorithm/nth_element.h>
-#include <__algorithm/partial_sort.h>
-#include <__algorithm/partial_sort_copy.h>
-#include <__algorithm/partition.h>
-#include <__algorithm/partition_copy.h>
-#include <__algorithm/partition_point.h>
-#include <__algorithm/pop_heap.h>
-#include <__algorithm/prev_permutation.h>
-#include <__algorithm/push_heap.h>
-#include <__algorithm/remove.h>
-#include <__algorithm/remove_copy.h>
-#include <__algorithm/remove_copy_if.h>
-#include <__algorithm/remove_if.h>
-#include <__algorithm/replace.h>
-#include <__algorithm/replace_copy.h>
-#include <__algorithm/replace_copy_if.h>
-#include <__algorithm/replace_if.h>
-#include <__algorithm/reverse.h>
-#include <__algorithm/reverse_copy.h>
-#include <__algorithm/rotate.h>
-#include <__algorithm/rotate_copy.h>
-#include <__algorithm/search.h>
-#include <__algorithm/search_n.h>
-#include <__algorithm/set_difference.h>
-#include <__algorithm/set_intersection.h>
-#include <__algorithm/set_symmetric_difference.h>
-#include <__algorithm/set_union.h>
-#include <__algorithm/shuffle.h>
-#include <__algorithm/sort.h>
-#include <__algorithm/sort_heap.h>
-#include <__algorithm/stable_partition.h>
-#include <__algorithm/stable_sort.h>
-#include <__algorithm/swap_ranges.h>
-#include <__algorithm/transform.h>
-#include <__algorithm/unique.h>
-#include <__algorithm/unique_copy.h>
-#include <__algorithm/upper_bound.h>
-
-#if _LIBCPP_STD_VER >= 17
-#  include <__algorithm/clamp.h>
-#  include <__algorithm/for_each_n.h>
-#  include <__algorithm/pstl.h>
-#  include <__algorithm/sample.h>
-#endif // _LIBCPP_STD_VER >= 17
-
-#if _LIBCPP_STD_VER >= 20
-#  include <__algorithm/in_found_result.h>
-#  include <__algorithm/in_fun_result.h>
-#  include <__algorithm/in_in_out_result.h>
-#  include <__algorithm/in_in_result.h>
-#  include <__algorithm/in_out_out_result.h>
-#  include <__algorithm/in_out_result.h>
-#  include <__algorithm/lexicographical_compare_three_way.h>
-#  include <__algorithm/min_max_result.h>
-#  include <__algorithm/ranges_adjacent_find.h>
-#  include <__algorithm/ranges_all_of.h>
-#  include <__algorithm/ranges_any_of.h>
-#  include <__algorithm/ranges_binary_search.h>
-#  include <__algorithm/ranges_clamp.h>
-#  include <__algorithm/ranges_contains.h>
-#  include <__algorithm/ranges_copy.h>
-#  include <__algorithm/ranges_copy_backward.h>
-#  include <__algorithm/ranges_copy_if.h>
-#  include <__algorithm/ranges_copy_n.h>
-#  include <__algorithm/ranges_count.h>
-#  include <__algorithm/ranges_count_if.h>
-#  include <__algorithm/ranges_equal.h>
-#  include <__algorithm/ranges_equal_range.h>
-#  include <__algorithm/ranges_fill.h>
-#  include <__algorithm/ranges_fill_n.h>
-#  include <__algorithm/ranges_find.h>
-#  include <__algorithm/ranges_find_end.h>
-#  include <__algorithm/ranges_find_first_of.h>
-#  include <__algorithm/ranges_find_if.h>
-#  include <__algorithm/ranges_find_if_not.h>
-#  include <__algorithm/ranges_for_each.h>
-#  include <__algorithm/ranges_for_each_n.h>
-#  include <__algorithm/ranges_generate.h>
-#  include <__algorithm/ranges_generate_n.h>
-#  include <__algorithm/ranges_includes.h>
-#  include <__algorithm/ranges_inplace_merge.h>
-#  include <__algorithm/ranges_is_heap.h>
-#  include <__a

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -1416,14 +1466,14 @@ void VPlanTransforms::addActiveLaneMask(
   auto *FoundWidenCanonicalIVUser =
   find_if(Plan.getCanonicalIV()->users(),
   [](VPUser *U) { return isa(U); });
-  assert(FoundWidenCanonicalIVUser &&
+  assert(FoundWidenCanonicalIVUser && *FoundWidenCanonicalIVUser &&

MacDue wrote:


This looks a little odd. Doesn't `find_if` return an iterator? 
```suggestion
  auto IVUsers = Plan.getCanonicalIV()->users();
  /// ...
  assert(FoundWidenCanonicalIVUser != IVUsers.end() && "Must have widened 
canonical IV when tail folding!"); 
```

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -77,9 +77,13 @@ struct VPlanTransforms {
   /// creation) and instead it is handled using active-lane-mask. \p
   /// DataAndControlFlowWithoutRuntimeCheck implies \p
   /// UseActiveLaneMaskForControlFlow.
+  /// RTChecks refers to the pointer pairs that need aliasing elements to be
+  /// masked off each loop iteration.

MacDue wrote:

No docs for PSE?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -3073,6 +3075,56 @@ struct VPWidenStoreEVLRecipe final : public 
VPWidenMemoryRecipe {
   }
 };
 
+// Given a pointer A that is being stored to, and pointer B that is being
+// read from, both with unknown lengths, create a mask that disables
+// elements which could overlap across a loop iteration. For example, if A
+// is X and B is X + 2 with VF being 4, only the final two elements of the
+// loaded vector can be stored since they don't overlap with the stored
+// vector. %b.vec = load %b ; = [s, t, u, v]
+// [...]
+// store %a, %b.vec ; only u and v can be stored as their addresses don't
+// overlap with %a + (VF - 1)

MacDue wrote:

This is specifically RAW? Of something like:
```
store A[x]
load A[x + 2] 
```
Perhaps I'm muddled on what "final two elements" means, but isn't the first two 
elements store that is valid (so it won't overwrite the elements for the load)?



https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -427,6 +428,29 @@ Value *VPInstruction::generate(VPTransformState &State) {
{PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, Name);
   }
+  // Count the number of bits set in each lane and reduce the result to a 
scalar
+  case VPInstruction::PopCount: {
+Value *Op = State.get(getOperand(0));
+auto *VT = Op->getType();

MacDue wrote:

nit: Spell out type if it's not present on the RHS.
```suggestion
Type *VT = Op->getType();
```

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -1300,14 +1301,38 @@ static VPActiveLaneMaskPHIRecipe 
*addVPLaneMaskPhiAndUpdateExitBranch(
   cast(CanonicalIVPHI->getBackedgeValue());
   // TODO: Check if dropping the flags is needed if
   // !DataAndControlFlowWithoutRuntimeCheck.
+  VPValue *IncVal = CanonicalIVIncrement->getOperand(1);
+  assert(IncVal != CanonicalIVPHI && "Unexpected operand order");
+
   CanonicalIVIncrement->dropPoisonGeneratingFlags();
   DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
+
   // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
   // we have to take unrolling into account. Each part needs to start at
   //   Part * VF
   auto *VecPreheader = Plan.getVectorPreheader();
   VPBuilder Builder(VecPreheader);
 
+  // Create an alias mask for each possibly-aliasing pointer pair. If there
+  // are multiple they are combined together with ANDs.
+  VPValue *AliasMask = nullptr;
+
+  for (auto C : RTChecks) {
+// FIXME: How to pass this info back?
+//HasAliasMask = true;

MacDue wrote:

This FIXME is a little unclear. Does it mean `HasAliasMask` should be set here 
but it's not?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -195,6 +195,13 @@ enum class TailFoldingStyle {
   DataWithEVL,
 };
 
+enum class RTCheckStyle {
+  /// Branch to scalar loop if checks fails at runtime.
+  ScalarFallback,
+  /// Form a mask based on elements which won't be a WAR or RAW hazard

MacDue wrote:

ultra nit: One of these comments ends with a full-stop and the other does not.
```suggestion
  /// Branch to scalar loop if checks fails at runtime.
  ScalarFallback,
  /// Form a mask based on elements which won't be a WAR or RAW hazard.
```

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits


https://github.com/MacDue edited 
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -1331,14 +1356,37 @@ static VPActiveLaneMaskPHIRecipe 
*addVPLaneMaskPhiAndUpdateExitBranch(
   "index.part.next");
 
   // Create the active lane mask instruction in the VPlan preheader.
-  auto *EntryALM =
+  VPValue *Mask =
   Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
DL, "active.lane.mask.entry");
 
   // Now create the ActiveLaneMaskPhi recipe in the main loop using the
   // preheader ActiveLaneMask instruction.
-  auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
+  auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(Mask, DebugLoc());
   LaneMaskPhi->insertAfter(CanonicalIVPHI);
+  VPValue *LaneMask = LaneMaskPhi;
+  if (AliasMask) {
+// Increment phi by correct amount.
+Builder.setInsertPoint(CanonicalIVIncrement);
+
+VPValue *IncrementBy = Builder.createNaryOp(VPInstruction::PopCount,
+{AliasMask}, DL, "popcount");
+Type *IVType = CanonicalIVPHI->getScalarType();
+
+if (IVType->getScalarSizeInBits() < 64) {
+  auto *Cast =
+  new VPScalarCastRecipe(Instruction::Trunc, IncrementBy, IVType);
+  Cast->insertAfter(IncrementBy->getDefiningRecipe());
+  IncrementBy = Cast;
+}
+CanonicalIVIncrement->setOperand(1, IncrementBy);
+
+// And the alias mask so the iteration only processes non-aliasing lanes
+Builder.setInsertPoint(CanonicalIVPHI->getParent(),
+   CanonicalIVPHI->getParent()->getFirstNonPhi());
+LaneMask = Builder.createNaryOp(Instruction::BinaryOps::And,
+{LaneMaskPhi, AliasMask}, DL);

MacDue wrote:

Do we know this AND won't be all-false?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream 
&O, const Twine &Indent,
 }
 #endif
 
+void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
+  IRBuilderBase Builder = State.Builder;
+  Value *SinkValue = State.get(getSinkValue(), true);
+  Value *SourceValue = State.get(getSourceValue(), true);
+
+  auto *Type = SinkValue->getType();
+  Value *AliasMask = Builder.CreateIntrinsic(
+  Intrinsic::experimental_get_alias_lane_mask,
+  {VectorType::get(Builder.getInt1Ty(), State.VF), Type,
+   Builder.getInt64Ty()},
+  {SourceValue, SinkValue, Builder.getInt64(getAccessedElementSize()),
+   Builder.getInt1(WriteAfterRead)},
+  nullptr, "alias.lane.mask");
+  State.set(this, AliasMask, /*IsScalar=*/false);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPAliasLaneMaskRecipe::print(raw_ostream &O, const Twine &Indent,
+  VPSlotTracker &SlotTracker) const {
+  O << Indent << "EMIT ";
+  getVPSingleValue()->printAsOperand(O, SlotTracker);
+  O << " = alias lane mask ";

MacDue wrote:

nit: These seem more commonly printed in all caps with hyphens. 
```suggestion
  O << " = ALIAS-LANE-MASK ";
```

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -952,7 +952,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value 
*VectorTripCountV,
 
   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
   // FIXME: Model VF * UF computation completely in VPlan.
-  assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");

MacDue wrote:

How does removing this assert relate to these changes? 

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Eliminate DISubprogram set from BuildDebugInfoMDMap (PR #118625)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118625

>From 160c6fe1ef922f9edf9ff1f5ac6610444a4f6711 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Sat, 14 Sep 2024 16:02:51 -0700
Subject: [PATCH] [NFC][Utils] Eliminate DISubprogram set from
 BuildDebugInfoMDMap

Summary:
Previously, we'd add all SPs distinct from the cloned one into a set.
Then when cloning a local scope we'd check if it's from one of those
'distinct' SPs by checking if it's in the set. We don't need to do that.
We can just check against the cloned SP directly and drop the set.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118625, branch: 
users/artempyanykh/fast-coro-upstream/6
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index cf4b1c7a045e05..34400d45aa6e72 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -172,18 +172,15 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD,
 };
 
 // Avoid cloning types, compile units, and (other) subprograms.
-SmallPtrSet MappedToSelfSPs;
 for (DISubprogram *ISP : DIFinder.subprograms()) {
-  if (ISP != SPClonedWithinModule) {
+  if (ISP != SPClonedWithinModule)
 mapToSelfIfNew(ISP);
-MappedToSelfSPs.insert(ISP);
-  }
 }
 
 // If a subprogram isn't going to be cloned skip its lexical blocks as 
well.
 for (DIScope *S : DIFinder.scopes()) {
   auto *LScope = dyn_cast(S);
-  if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+  if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
 mapToSelfIfNew(S);
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Utils] Identity map global debug info on first use in CloneFunction* (PR #118627)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118627

>From 6ee8c03c37cc47c2ae21532708982d94a9c71530 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Sun, 15 Sep 2024 04:39:20 -0700
Subject: [PATCH] [Utils] Identity map global debug info on first use in
 CloneFunction*

Summary:
To avoid cloning 'global' debug info, CloneFunction implementation used to 
eagerly identity map a known
subset of global debug into into ValueMap's MD map. In larger modules with 
meaningful volume of
debug info this gets very expensive.

By passing such global metadata via an IdentityMD set for the ValueMapper to 
map on first use, we
get several benefits:

1. Mapping metadata is not cheap, particularly because of tracking. When 
cloning a Function we
identity map lots of global module-level metadata to avoid cloning it, while 
only a fraction of it
is actually used by the function. Mapping on first use is a lot faster for 
modules with meaningful
amount of debug info.

2. Eagerly identity mapping metadata makes it harder to cache module-level data 
(e.g. a set of
metadata nodes in a \a DICompileUnit). With this patch we can cache certain 
module-level metadata
calculations to speed things up further.

Anecdata from compiling a sample cpp file with full debug info shows that this 
moderately speeds up
CoroSplitPass which is one of the heavier users of cloning:

| | Baseline | IdentityMD set |
|-+--+|
| CoroSplitPass   | 306ms| 221ms  |
| CoroCloner  | 101ms| 72ms   |
|-+--+|
| Speed up| 1x   | 1.4x   |

Test Plan:
ninja check-llvm-unit
ninja check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118627, branch: 
users/artempyanykh/fast-coro-upstream/8
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  | 19 +++---
 .../llvm/Transforms/Utils/ValueMapper.h   | 67 ++-
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 59 
 llvm/lib/Transforms/Utils/ValueMapper.cpp | 19 --
 4 files changed, 103 insertions(+), 61 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 50518c746d11ca..9b256f9b4d6890 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -192,7 +192,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const 
Function *OldFunc,
 void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
ValueMapTypeRemapper *TypeMapper = nullptr,
-   ValueMaterializer *Materializer = nullptr);
+   ValueMaterializer *Materializer = nullptr,
+   const MetadataSetTy *IdentityMD = nullptr);
 
 /// Clone OldFunc's body into NewFunc.
 void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
@@ -201,7 +202,8 @@ void CloneFunctionBodyInto(Function *NewFunc, const 
Function *OldFunc,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr,
ValueMapTypeRemapper *TypeMapper = nullptr,
-   ValueMaterializer *Materializer = nullptr);
+   ValueMaterializer *Materializer = nullptr,
+   const MetadataSetTy *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
@@ -241,13 +243,12 @@ DISubprogram *CollectDebugInfoForCloning(const Function 
&F,
  CloneFunctionChangeType Changes,
  DebugInfoFinder &DIFinder);
 
-/// Build a map of debug info to use during Metadata cloning.
-/// Returns true if cloning would need module level changes and false if there
-/// would only be local changes.
-bool BuildDebugInfoMDMap(DenseMap &MD,
- CloneFunctionChangeType Changes,
- DebugInfoFinder &DIFinder,
- DISubprogram *SPClonedWithinModule);
+/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
+/// needs to be identity mapped during Metadata cloning.
+void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
+CloneFunctionChangeType Changes,
+DebugInfoFinder &DIFinder,
+DISubprogram *SPClonedWithinModule);
 
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h 
b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 743cfeb7ef3

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118624

>From d5ed40564f2259c84b657997afbec238f81a2312 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Thu, 12 Sep 2024 15:50:25 -0700
Subject: [PATCH] [NFC][Utils] Extract CloneFunctionBodyInto from
 CloneFunctionInto

Summary:
This and previously extracted `CloneFunction*Into` functions will be used in 
later diffs.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118624, branch: 
users/artempyanykh/fast-coro-upstream/5
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 34 ---
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 96 +++-
 2 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 9a574fc4e4c08e..50518c746d11ca 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -194,6 +194,15 @@ void CloneFunctionMetadataInto(Function *NewFunc, const 
Function *OldFunc,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's body into NewFunc.
+void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
+   ValueToValueMapTy &VMap, RemapFlags RemapFlag,
+   SmallVectorImpl &Returns,
+   const char *NameSuffix = "",
+   ClonedCodeInfo *CodeInfo = nullptr,
+   ValueMapTypeRemapper *TypeMapper = nullptr,
+   ValueMaterializer *Materializer = nullptr);
+
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool 
ModuleLevelChanges,
@@ -214,7 +223,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const 
Function *OldFunc,
 ///
 void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap, bool 
ModuleLevelChanges,
-   SmallVectorImpl &Returns,
+   SmallVectorImpl &Returns,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = nullptr);
 
@@ -361,32 +370,31 @@ void updateProfileCallee(
 /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
 /// basic blocks and extract their scope. These are candidates for duplication
 /// when cloning.
-void identifyNoAliasScopesToClone(
-ArrayRef BBs, SmallVectorImpl &NoAliasDeclScopes);
+void identifyNoAliasScopesToClone(ArrayRef BBs,
+  SmallVectorImpl 
&NoAliasDeclScopes);
 
 /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
 /// instruction range and extract their scope. These are candidates for
 /// duplication when cloning.
-void identifyNoAliasScopesToClone(
-BasicBlock::iterator Start, BasicBlock::iterator End,
-SmallVectorImpl &NoAliasDeclScopes);
+void identifyNoAliasScopesToClone(BasicBlock::iterator Start,
+  BasicBlock::iterator End,
+  SmallVectorImpl 
&NoAliasDeclScopes);
 
 /// Duplicate the specified list of noalias decl scopes.
 /// The 'Ext' string is added as an extension to the name.
 /// Afterwards, the ClonedScopes contains the mapping of the original scope
 /// MDNode onto the cloned scope.
 /// Be aware that the cloned scopes are still part of the original scope 
domain.
-void cloneNoAliasScopes(
-ArrayRef NoAliasDeclScopes,
-DenseMap &ClonedScopes,
-StringRef Ext, LLVMContext &Context);
+void cloneNoAliasScopes(ArrayRef NoAliasDeclScopes,
+DenseMap &ClonedScopes,
+StringRef Ext, LLVMContext &Context);
 
 /// Adapt the metadata for the specified instruction according to the
 /// provided mapping. This is normally used after cloning an instruction, when
 /// some noalias scopes needed to be cloned.
-void adaptNoAliasScopes(
-llvm::Instruction *I, const DenseMap &ClonedScopes,
-LLVMContext &Context);
+void adaptNoAliasScopes(llvm::Instruction *I,
+const DenseMap &ClonedScopes,
+LLVMContext &Context);
 
 /// Clone the specified noalias decl scopes. Then adapt all instructions in the
 /// NewBlocks basicblocks to the cloned versions.
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index c967e78123af1f..cf4b1c7a045e05 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -216,6 +216,59 @@ void llvm::CloneFunctionMetadataInto(Function *NewFunc, 
const

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118623

>From 8ce25caffaeb4cd3e361184eeea8d9ae5675a6be Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Thu, 12 Sep 2024 15:35:38 -0700
Subject: [PATCH] [NFC][Utils] Extract CloneFunctionMetadataInto from
 CloneFunctionInto

Summary:
The new API expects the caller to populate the VMap. We need it this way
for a subsequent change around coroutine cloning.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118623, branch: 
users/artempyanykh/fast-coro-upstream/4
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 12 +
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 28 +---
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h 
b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 7858c9d9def0da..9a574fc4e4c08e 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -182,6 +182,18 @@ void CloneFunctionAttributesInto(Function *NewFunc, const 
Function *OldFunc,
  ValueMapTypeRemapper *TypeMapper = nullptr,
  ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's metadata into NewFunc.
+///
+/// The caller is expected to populate \p VMap beforehand and set an 
appropriate
+/// \p RemapFlag.
+///
+/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a 
different
+/// module. Use CloneFunctionInto for that behavior.
+void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
+   ValueToValueMapTy &VMap, RemapFlags RemapFlag,
+   ValueMapTypeRemapper *TypeMapper = nullptr,
+   ValueMaterializer *Materializer = nullptr);
+
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool 
ModuleLevelChanges,
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp 
b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 6dc5f601b7fcaa..c967e78123af1f 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -200,6 +200,22 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD,
   return ModuleLevelChanges;
 }
 
+void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function 
*OldFunc,
+ ValueToValueMapTy &VMap,
+ RemapFlags RemapFlag,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+  // Duplicate the metadata that is attached to the cloned function.
+  // Subprograms/CUs/types that were already mapped to themselves won't be
+  // duplicated.
+  SmallVector, 1> MDs;
+  OldFunc->getAllMetadata(MDs);
+  for (auto MD : MDs) {
+NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
+TypeMapper, Materializer));
+  }
+}
+
 // Clone OldFunc into NewFunc, transforming the old arguments into references 
to
 // VMap values.
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
@@ -262,15 +278,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const 
Function *OldFunc,
   BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
 
   const auto RemapFlag = ModuleLevelChanges ? RF_None : 
RF_NoModuleLevelChanges;
-  // Duplicate the metadata that is attached to the cloned function.
-  // Subprograms/CUs/types that were already mapped to themselves won't be
-  // duplicated.
-  SmallVector, 1> MDs;
-  OldFunc->getAllMetadata(MDs);
-  for (auto MD : MDs) {
-NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
-TypeMapper, Materializer));
-  }
+
+  CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
+Materializer);
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream 
&O, const Twine &Indent,
 }
 #endif
 
+void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
+  IRBuilderBase Builder = State.Builder;
+  Value *SinkValue = State.get(getSinkValue(), true);
+  Value *SourceValue = State.get(getSourceValue(), true);
+
+  auto *Type = SinkValue->getType();

MacDue wrote:

nit:
```suggestion
  Type *PtrType = SinkValue->getType();
```

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Prebuild a global debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118628

>From 77892ea9851cee330ac55eded4758b5c9ad4dbf8 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Tue, 19 Nov 2024 17:19:27 -0700
Subject: [PATCH] [Coro] Prebuild a global debug info set and share it between
 all coroutine clones

Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work 
priming DIFinder and building
a list of global debug info metadata. For programs compiled with full debug 
info this gets very
expensive.

This diff builds the data once and shares it between all clones.

Anecdata for a sample cpp source file compiled with full debug info:

| | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) |
|-+--++--|
| CoroSplitPass   | 306ms| 221ms  | 68ms |
| CoroCloner  | 101ms| 72ms   | 0.5ms|
| CollectGlobalDI | -| -  | 63ms |
|-+--++--|
| Speed up| 1x   | 1.4x   | 4.5x |

Note that CollectGlobalDI happens once *per coroutine* rather than per clone.

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample internal source file, checked time trace output for scope 
timings.

stack-info: PR: https://github.com/llvm/llvm-project/pull/118628, branch: 
users/artempyanykh/fast-coro-upstream/9
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 29 ++-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 51 +---
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h 
b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index d1887980fb3bcb..e7121d26bd08f3 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,6 +48,7 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
+  const MetadataSetTy &GlobalDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -60,12 +61,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
  Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI)
+ TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
 FKind(Shape.ABI == ABI::Async ? CloneKind::Async
   : CloneKind::Continuation),
-Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
-ActiveSuspend(ActiveSuspend) {
+Builder(OrigF.getContext()), TTI(TTI), 
GlobalDebugInfo(GlobalDebugInfo),
+NewF(NewF), ActiveSuspend(ActiveSuspend) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 assert(NewF && "need existing function for continuation");
@@ -74,9 +75,11 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI)
+ CloneKind FKind, TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo)
   : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-Builder(OrigF.getContext()), TTI(TTI) {}
+Builder(OrigF.getContext()), TTI(TTI),
+GlobalDebugInfo(GlobalDebugInfo) {}
 
   virtual ~BaseCloner() {}
 
@@ -84,12 +87,14 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
-   TargetTransformInfo &TTI) {
+   TargetTransformInfo &TTI,
+   const MetadataSetTy &GlobalDebugInfo) {
 assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
 TimeTraceScope FunctionScope("BaseCloner");
 
-BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
+  GlobalDebugInfo);
 Cloner.create();
 return Cloner.getFunction();
   }
@@ -129,8 +134,9 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-   CloneKind FKind, TargetTransformInfo &TTI)
-  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
+   CloneKind FKind, TargetTransformInfo &TTI,
+   const MetadataSetTy &GlobalDebugInfo)
+  : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo) {}
 
   void

[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118629

>From c32803f9d01e7c2b733d38db57805fcc398bb54a Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Sun, 15 Sep 2024 10:51:38 -0700
Subject: [PATCH] [Analysis] Add DebugInfoCache analysis

Summary:
The analysis simply primes and caches DebugInfoFinders for each DICompileUnit 
in a module. This
allows (future) callers like CoroSplitPass to compute global debug info 
metadata (required for
coroutine function cloning) much faster. Specifically, pay the price of 
DICompileUnit processing
only once per compile unit, rather than once per coroutine.

Test Plan:
Added a smoke test for the new analysis
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118629, branch: 
users/artempyanykh/fast-coro-upstream/10
---
 llvm/include/llvm/Analysis/DebugInfoCache.h   |  50 +
 llvm/include/llvm/IR/DebugInfo.h  |   4 +-
 llvm/lib/Analysis/CMakeLists.txt  |   1 +
 llvm/lib/Analysis/DebugInfoCache.cpp  |  47 
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 llvm/lib/Passes/PassRegistry.def  |   1 +
 llvm/unittests/Analysis/CMakeLists.txt|   1 +
 .../unittests/Analysis/DebugInfoCacheTest.cpp | 211 ++
 8 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 llvm/include/llvm/Analysis/DebugInfoCache.h
 create mode 100644 llvm/lib/Analysis/DebugInfoCache.cpp
 create mode 100644 llvm/unittests/Analysis/DebugInfoCacheTest.cpp

diff --git a/llvm/include/llvm/Analysis/DebugInfoCache.h 
b/llvm/include/llvm/Analysis/DebugInfoCache.h
new file mode 100644
index 00..dbd6802c99ea01
--- /dev/null
+++ b/llvm/include/llvm/Analysis/DebugInfoCache.h
@@ -0,0 +1,50 @@
+//===- llvm/Analysis/DebugInfoCache.h - debug info cache *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file contains an analysis that builds a cache of debug info for each
+// DICompileUnit in a module.
+//
+//===--===//
+
+#ifndef LLVM_ANALYSIS_DEBUGINFOCACHE_H
+#define LLVM_ANALYSIS_DEBUGINFOCACHE_H
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Processes and caches debug info for each DICompileUnit in a module.
+///
+/// The result of the analysis is a set of DebugInfoFinders primed on their
+/// respective DICompileUnit. Such DebugInfoFinders can be used to speed up
+/// function cloning which otherwise requires an expensive traversal of
+/// DICompileUnit-level debug info. See an example usage in CoroSplit.
+class DebugInfoCache {
+public:
+  using DIFinderCache = SmallDenseMap;
+  DIFinderCache Result;
+
+  DebugInfoCache(const Module &M);
+
+  bool invalidate(Module &, const PreservedAnalyses &,
+  ModuleAnalysisManager::Invalidator &);
+};
+
+class DebugInfoCacheAnalysis
+: public AnalysisInfoMixin {
+  friend AnalysisInfoMixin;
+  static AnalysisKey Key;
+
+public:
+  using Result = DebugInfoCache;
+  Result run(Module &M, ModuleAnalysisManager &);
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index 73f45c3769be44..11907fbb7f20b3 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -120,11 +120,13 @@ class DebugInfoFinder {
   /// Process subprogram.
   void processSubprogram(DISubprogram *SP);
 
+  /// Process a compile unit.
+  void processCompileUnit(DICompileUnit *CU);
+
   /// Clear all lists.
   void reset();
 
 private:
-  void processCompileUnit(DICompileUnit *CU);
   void processScope(DIScope *Scope);
   void processType(DIType *DT);
   bool addCompileUnit(DICompileUnit *CU);
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 0db5b80f336cb5..db9a569e301563 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_component_library(LLVMAnalysis
   DDGPrinter.cpp
   ConstraintSystem.cpp
   Delinearization.cpp
+  DebugInfoCache.cpp
   DemandedBits.cpp
   DependenceAnalysis.cpp
   DependenceGraphBuilder.cpp
diff --git a/llvm/lib/Analysis/DebugInfoCache.cpp 
b/llvm/lib/Analysis/DebugInfoCache.cpp
new file mode 100644
index 00..c1a3e89f0a6ccf
--- /dev/null
+++ b/llvm/lib/Analysis/DebugInfoCache.cpp
@@ -0,0 +1,47 @@
+//===- llvm/Analysis/DebugInfoCache.cpp - debug info cache 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-

[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh updated 
https://github.com/llvm/llvm-project/pull/118630

>From 33b2a6aafbaec530f1dc31ad99d5fb6192849386 Mon Sep 17 00:00:00 2001
From: Artem Pianykh 
Date: Sun, 15 Sep 2024 11:00:00 -0700
Subject: [PATCH] [Coro] Use DebugInfoCache to speed up cloning in
 CoroSplitPass

Summary:
We can use a DebugInfoFinder from DebugInfoCache which is already primed on a 
compile unit to speed
up collection of global debug info.

The pass could likely be another 2x+ faster if we avoid rebuilding the set of 
global debug
info. This needs further massaging of CloneFunction and ValueMapper, though, 
and can be done
incrementally on top of this.

Comparing performance of CoroSplitPass at various points in this stack, this is 
anecdata from a sample
cpp file compiled with full debug info:
| | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU 
DIFinder (cur.) |
|-+--++---+---|
| CoroSplitPass   | 306ms| 221ms  | 68ms  | 17ms
  |
| CoroCloner  | 101ms| 72ms   | 0.5ms | 0.5ms   
  |
| CollectGlobalDI | -| -  | 63ms  | 13ms
  |
|-+--++---+---|
| Speed up| 1x   | 1.4x   | 4.5x  | 18x 
  |

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample cpp file with time trace to get the avg. duration of the pass 
and inner scopes.

stack-info: PR: https://github.com/llvm/llvm-project/pull/118630, branch: 
users/artempyanykh/fast-coro-upstream/11
---
 llvm/include/llvm/Transforms/Coroutines/ABI.h | 13 +++--
 llvm/lib/Analysis/CGSCCPassManager.cpp|  7 +++
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 56 +++
 llvm/test/Other/new-pass-manager.ll   |  1 +
 llvm/test/Other/new-pm-defaults.ll|  1 +
 llvm/test/Other/new-pm-lto-defaults.ll|  1 +
 llvm/test/Other/new-pm-pgo-preinline.ll   |  1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |  1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |  1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |  1 +
 .../Other/new-pm-thinlto-prelink-defaults.ll  |  1 +
 .../new-pm-thinlto-prelink-pgo-defaults.ll|  1 +
 ...w-pm-thinlto-prelink-samplepgo-defaults.ll |  1 +
 .../Analysis/CGSCCPassManagerTest.cpp |  4 +-
 14 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Coroutines/ABI.h 
b/llvm/include/llvm/Transforms/Coroutines/ABI.h
index 0b2d405f3caec4..2cf614b6bb1e2a 100644
--- a/llvm/include/llvm/Transforms/Coroutines/ABI.h
+++ b/llvm/include/llvm/Transforms/Coroutines/ABI.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H
 #define LLVM_TRANSFORMS_COROUTINES_ABI_H
 
+#include "llvm/Analysis/DebugInfoCache.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Coroutines/CoroShape.h"
 #include "llvm/Transforms/Coroutines/MaterializationUtils.h"
@@ -53,7 +54,8 @@ class BaseABI {
   // Perform the function splitting according to the ABI.
   virtual void splitCoroutine(Function &F, coro::Shape &Shape,
   SmallVectorImpl &Clones,
-  TargetTransformInfo &TTI) = 0;
+  TargetTransformInfo &TTI,
+  const DebugInfoCache *DICache) = 0;
 
   Function &F;
   coro::Shape &Shape;
@@ -73,7 +75,8 @@ class SwitchABI : public BaseABI {
 
   void splitCoroutine(Function &F, coro::Shape &Shape,
   SmallVectorImpl &Clones,
-  TargetTransformInfo &TTI) override;
+  TargetTransformInfo &TTI,
+  const DebugInfoCache *DICache) override;
 };
 
 class AsyncABI : public BaseABI {
@@ -86,7 +89,8 @@ class AsyncABI : public BaseABI {
 
   void splitCoroutine(Function &F, coro::Shape &Shape,
   SmallVectorImpl &Clones,
-  TargetTransformInfo &TTI) override;
+  TargetTransformInfo &TTI,
+  const DebugInfoCache *DICache) override;
 };
 
 class AnyRetconABI : public BaseABI {
@@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI {
 
   void splitCoroutine(Function &F, coro::Shape &Shape,
   SmallVectorImpl &Clones,
-  TargetTransformInfo &TTI) override;
+  TargetTransformInfo &TTI,
+  const DebugInfoCache *DICache) override;
 };
 
 } // end namespace coro
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp 
b/llvm/lib/Analysis/CGSCCPassManager.cpp
index 948bc2435ab275..3ba085cdb0be8b 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #in

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Pengcheng Wang via llvm-branch-commits


https://github.com/wangpc-pp updated 
https://github.com/llvm/llvm-project/pull/119194

>From b0d87f2a2e0ab0a13bdd85d5406451534e79ba8d Mon Sep 17 00:00:00 2001
From: Wang Pengcheng 
Date: Mon, 9 Dec 2024 19:18:06 +0800
Subject: [PATCH] Rewrite uses in AM/PPC targets

Created using spr 1.3.6-beta.1
---
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 
b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6b37dd9161685..8673deddb7057f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6936,7 +6936,6 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
   RegClassInfo.runOnMachineFunction(*MF);
   RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
  EndLoop->getParent()->end(), false, false);
-  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 
   bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
 
@@ -6979,7 +6978,7 @@ bool 
ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
 
   auto &P = RPTracker.getPressure().MaxSetPressure;
   for (unsigned I = 0, E = P.size(); I < E; ++I)
-if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
+if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
   return true;
 }
   return false;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 44f6db5061e21a..fa45a7fb7fabe6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure(
   };
 
   // For now we only care about float and double type fma.
-  unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
-  *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
+  unsigned VSSRCLimit =
+  RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
 
   // Only reduce register pressure when pressure is high.
   return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-regalloc

Author: Akshat Oke (optimisan)


Changes

Makes Inline Spiller amenable for the new PM.

---
Full diff: https://github.com/llvm/llvm-project/pull/119181.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/Spiller.h (+14-2) 
- (modified) llvm/lib/CodeGen/InlineSpiller.cpp (+14-22) 
- (modified) llvm/lib/CodeGen/RegAllocBasic.cpp (+11-5) 
- (modified) llvm/lib/CodeGen/RegAllocGreedy.cpp (+3-1) 
- (modified) llvm/lib/CodeGen/RegAllocPBQP.cpp (+4-1) 


``diff
diff --git a/llvm/include/llvm/CodeGen/Spiller.h 
b/llvm/include/llvm/CodeGen/Spiller.h
index 51ad36bc6b1f8b..3132cefeb6c68a 100644
--- a/llvm/include/llvm/CodeGen/Spiller.h
+++ b/llvm/include/llvm/CodeGen/Spiller.h
@@ -19,6 +19,10 @@ class MachineFunction;
 class MachineFunctionPass;
 class VirtRegMap;
 class VirtRegAuxInfo;
+class LiveIntervals;
+class LiveStacks;
+class MachineDominatorTree;
+class MachineBlockFrequencyInfo;
 
 /// Spiller interface.
 ///
@@ -41,12 +45,20 @@ class Spiller {
   virtual ArrayRef getReplacedRegs() = 0;
 
   virtual void postOptimization() {}
+
+  struct RequiredAnalyses {
+LiveIntervals &LIS;
+LiveStacks &LSS;
+MachineDominatorTree &MDT;
+const MachineBlockFrequencyInfo &MBFI;
+  };
 };
 
 /// Create and return a spiller that will insert spill code directly instead
 /// of deferring though VirtRegMap.
-Spiller *createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF,
- VirtRegMap &VRM, VirtRegAuxInfo &VRAI);
+Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses,
+ MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp 
b/llvm/lib/CodeGen/InlineSpiller.cpp
index 64f290f5930a1b..b9768d5c63a5d1 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -75,7 +75,6 @@ RestrictStatepointRemat("restrict-statepoint-remat",
cl::desc("Restrict remat for statepoint operands"));
 
 namespace {
-
 class HoistSpillHelper : private LiveRangeEdit::Delegate {
   MachineFunction &MF;
   LiveIntervals &LIS;
@@ -128,15 +127,11 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   DenseMap &SpillsToIns);
 
 public:
-  HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
-   VirtRegMap &vrm)
-  : MF(mf), LIS(pass.getAnalysis().getLIS()),
-LSS(pass.getAnalysis().getLS()),
-MDT(pass.getAnalysis().getDomTree()),
+  HoistSpillHelper(const Spiller::RequiredAnalyses &Analyses,
+   MachineFunction &mf, VirtRegMap &vrm)
+  : MF(mf), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT),
 VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
-TRI(*mf.getSubtarget().getRegisterInfo()),
-MBFI(
-
pass.getAnalysis().getMBFI()),
+TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI),
 IPA(LIS, mf.getNumBlockIDs()) {}
 
   void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
@@ -190,16 +185,12 @@ class InlineSpiller : public Spiller {
   ~InlineSpiller() override = default;
 
 public:
-  InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap 
&VRM,
-VirtRegAuxInfo &VRAI)
-  : MF(MF), LIS(Pass.getAnalysis().getLIS()),
-LSS(Pass.getAnalysis().getLS()),
-MDT(Pass.getAnalysis().getDomTree()),
+  InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF,
+VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
+  : MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT),
 VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
-TRI(*MF.getSubtarget().getRegisterInfo()),
-MBFI(
-
Pass.getAnalysis().getMBFI()),
-HSpiller(Pass, MF, VRM), VRAI(VRAI) {}
+TRI(*MF.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI),
+HSpiller(Analyses, MF, VRM), VRAI(VRAI) {}
 
   void spill(LiveRangeEdit &) override;
   ArrayRef getSpilledRegs() override { return RegsToSpill; }
@@ -237,10 +228,11 @@ Spiller::~Spiller() = default;
 
 void Spiller::anchor() {}
 
-Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
-   MachineFunction &MF, VirtRegMap &VRM,
-   VirtRegAuxInfo &VRAI) {
-  return new InlineSpiller(Pass, MF, VRM, VRAI);
+Spiller *
+llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses,
+  MachineFunction &MF, VirtRegMap &VRM,
+  VirtRegAuxInfo &VRAI) {
+  return new InlineSpiller(Analyses, MF, VRM, VRAI);
 }
 
 
//===--===//
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp 
b/llvm/li

[llvm-branch-commits] [llvm] Spiller: Deatach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#119181** https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#118462** https://app.graphite.dev/github/pr/llvm/llvm-project/118462?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#117309** https://app.graphite.dev/github/pr/llvm/llvm-project/117309?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`



This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Deatach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/119181

None

>From 5b4e72ffece91f7ee370cb484667aa13742b9dae Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 9 Dec 2024 07:58:48 +
Subject: [PATCH] Spiller: Deatach legacy pass and supply analyses instead

---
 llvm/include/llvm/CodeGen/Spiller.h | 16 +++--
 llvm/lib/CodeGen/InlineSpiller.cpp  | 36 +++--
 llvm/lib/CodeGen/RegAllocBasic.cpp  | 16 +
 llvm/lib/CodeGen/RegAllocGreedy.cpp |  4 +++-
 llvm/lib/CodeGen/RegAllocPBQP.cpp   |  5 +++-
 5 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/Spiller.h 
b/llvm/include/llvm/CodeGen/Spiller.h
index 51ad36bc6b1f8b..3132cefeb6c68a 100644
--- a/llvm/include/llvm/CodeGen/Spiller.h
+++ b/llvm/include/llvm/CodeGen/Spiller.h
@@ -19,6 +19,10 @@ class MachineFunction;
 class MachineFunctionPass;
 class VirtRegMap;
 class VirtRegAuxInfo;
+class LiveIntervals;
+class LiveStacks;
+class MachineDominatorTree;
+class MachineBlockFrequencyInfo;
 
 /// Spiller interface.
 ///
@@ -41,12 +45,20 @@ class Spiller {
   virtual ArrayRef getReplacedRegs() = 0;
 
   virtual void postOptimization() {}
+
+  struct RequiredAnalyses {
+LiveIntervals &LIS;
+LiveStacks &LSS;
+MachineDominatorTree &MDT;
+const MachineBlockFrequencyInfo &MBFI;
+  };
 };
 
 /// Create and return a spiller that will insert spill code directly instead
 /// of deferring though VirtRegMap.
-Spiller *createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF,
- VirtRegMap &VRM, VirtRegAuxInfo &VRAI);
+Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses,
+ MachineFunction &MF, VirtRegMap &VRM,
+ VirtRegAuxInfo &VRAI);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp 
b/llvm/lib/CodeGen/InlineSpiller.cpp
index 64f290f5930a1b..b9768d5c63a5d1 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -75,7 +75,6 @@ RestrictStatepointRemat("restrict-statepoint-remat",
cl::desc("Restrict remat for statepoint operands"));
 
 namespace {
-
 class HoistSpillHelper : private LiveRangeEdit::Delegate {
   MachineFunction &MF;
   LiveIntervals &LIS;
@@ -128,15 +127,11 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   DenseMap &SpillsToIns);
 
 public:
-  HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
-   VirtRegMap &vrm)
-  : MF(mf), LIS(pass.getAnalysis().getLIS()),
-LSS(pass.getAnalysis().getLS()),
-MDT(pass.getAnalysis().getDomTree()),
+  HoistSpillHelper(const Spiller::RequiredAnalyses &Analyses,
+   MachineFunction &mf, VirtRegMap &vrm)
+  : MF(mf), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT),
 VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
-TRI(*mf.getSubtarget().getRegisterInfo()),
-MBFI(
-
pass.getAnalysis().getMBFI()),
+TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI),
 IPA(LIS, mf.getNumBlockIDs()) {}
 
   void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
@@ -190,16 +185,12 @@ class InlineSpiller : public Spiller {
   ~InlineSpiller() override = default;
 
 public:
-  InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap 
&VRM,
-VirtRegAuxInfo &VRAI)
-  : MF(MF), LIS(Pass.getAnalysis().getLIS()),
-LSS(Pass.getAnalysis().getLS()),
-MDT(Pass.getAnalysis().getDomTree()),
+  InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF,
+VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
+  : MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT),
 VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
-TRI(*MF.getSubtarget().getRegisterInfo()),
-MBFI(
-
Pass.getAnalysis().getMBFI()),
-HSpiller(Pass, MF, VRM), VRAI(VRAI) {}
+TRI(*MF.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI),
+HSpiller(Analyses, MF, VRM), VRAI(VRAI) {}
 
   void spill(LiveRangeEdit &) override;
   ArrayRef getSpilledRegs() override { return RegsToSpill; }
@@ -237,10 +228,11 @@ Spiller::~Spiller() = default;
 
 void Spiller::anchor() {}
 
-Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
-   MachineFunction &MF, VirtRegMap &VRM,
-   VirtRegAuxInfo &VRAI) {
-  return new InlineSpiller(Pass, MF, VRM, VRAI);
+Spiller *
+llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses,
+  MachineFunction &MF, VirtRegMap &VRM,
+  VirtRegAuxInfo &VRAI) {
+  return new InlineSpiller(Analyses, MF, VRM, VRAI);
 }
 
 
//===---

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan edited 
https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][cuda] Handle gpu.return in AbstractResult pass (PR #119035)

2024-12-09 Thread via llvm-branch-commits


https://github.com/jeanPerier approved this pull request.


https://github.com/llvm/llvm-project/pull/119035
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Pengcheng Wang via llvm-branch-commits


https://github.com/wangpc-pp created 
https://github.com/llvm/llvm-project/pull/119194

To reduce compile time.

This is a follow-up of #118787.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)

2024-12-09 Thread Akshat Oke via llvm-branch-commits


https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/118462

>From ea0cf8d1805dd4ef093d30dd1f4538c9747be851 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 3 Dec 2024 10:12:36 +
Subject: [PATCH] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM

---
 .../llvm}/CodeGen/RegAllocPriorityAdvisor.h   |  79 +++-
 llvm/include/llvm/InitializePasses.h  |   2 +-
 .../llvm/Passes/MachinePassRegistry.def   |   1 +
 llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp   |   6 +-
 .../lib/CodeGen/MLRegAllocPriorityAdvisor.cpp | 184 +++---
 llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp  |   2 +-
 llvm/lib/CodeGen/RegAllocGreedy.cpp   |   9 +-
 llvm/lib/CodeGen/RegAllocGreedy.h |   2 +-
 llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp  | 120 +---
 llvm/lib/Passes/PassBuilder.cpp   |   1 +
 10 files changed, 294 insertions(+), 112 deletions(-)
 rename llvm/{lib => include/llvm}/CodeGen/RegAllocPriorityAdvisor.h (53%)

diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h 
b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
similarity index 53%
rename from llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
rename to llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
index 2d42a43c4c6372..bddfe15bf17751 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
+++ b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h
@@ -9,8 +9,10 @@
 #ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
 #define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/RegAllocEvictionAdvisor.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
@@ -56,12 +58,73 @@ class DefaultPriorityAdvisor : public 
RegAllocPriorityAdvisor {
   unsigned getPriority(const LiveInterval &LI) const override;
 };
 
-class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
+/// Common provider for getting the priority advisor and logging rewards.
+/// Legacy analysis forwards all calls to this provider.
+/// New analysis serves the provider as the analysis result.
+/// Expensive setup is done in the constructor, so that the advisor can be
+/// created quickly for every machine function.
+/// TODO: Remove once legacy PM support is dropped.
+class RegAllocPriorityAdvisorProvider {
 public:
   enum class AdvisorMode : int { Default, Release, Development };
 
-  RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
-  : ImmutablePass(ID), Mode(Mode){};
+  RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {}
+
+  virtual ~RegAllocPriorityAdvisorProvider() = default;
+
+  virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref GetReward) {};
+
+  virtual std::unique_ptr
+  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+
+  void setAnalyses(SlotIndexes *SI) { this->SI = SI; }
+
+  AdvisorMode getAdvisorMode() const { return Mode; }
+
+protected:
+  SlotIndexes *SI;
+
+private:
+  const AdvisorMode Mode;
+};
+
+RegAllocPriorityAdvisorProvider *createReleaseModePriorityAdvisorProvider();
+
+RegAllocPriorityAdvisorProvider *
+createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx);
+
+class RegAllocPriorityAdvisorAnalysis
+: public AnalysisInfoMixin {
+  static AnalysisKey Key;
+  friend AnalysisInfoMixin;
+
+public:
+  struct Result {
+// Owned by this analysis.
+RegAllocPriorityAdvisorProvider *Provider;
+
+bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA,
+MachineFunctionAnalysisManager::Invalidator &Inv) {
+  auto PAC = PA.getChecker();
+  return !PAC.preservedWhenStateless() ||
+ Inv.invalidate(MF, PA);
+}
+  };
+
+  Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+
+private:
+  void initializeProvider(LLVMContext &Ctx);
+  std::unique_ptr Provider;
+};
+
+class RegAllocPriorityAdvisorAnalysisLegacy : public ImmutablePass {
+public:
+  enum class AdvisorMode : int { Default, Release, Development };
+
+  RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode Mode)
+  : ImmutablePass(ID), Mode(Mode) {};
   static char ID;
 
   /// Get an advisor for the given context (i.e. machine function, etc)
@@ -69,7 +132,7 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass 
{
   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
   AdvisorMode getAdvisorMode() const { return Mode; }
   virtual void logRewardIfNeeded(const MachineFunction &MF,
- llvm::function_ref GetReward){};
+ llvm::function_ref GetReward) {};
 
 protected:
   // This analysis preserves everything, and subclasses may have additional
@@ -85,11 +148,13 @@ class RegAllocPriorityAdvisorAnalysis : public 
ImmutablePass {
 
 /// Specialization for the API used by the analysis infrastructure to create
 /// an instance of the pri

[llvm-branch-commits] [lldb][Process] Introduce LoongArch64 hw break/watchpoint support (PR #118770)

2024-12-09 Thread via llvm-branch-commits


https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/118770


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb][Process] Introduce LoongArch64 hw break/watchpoint support (PR #118770)

2024-12-09 Thread via llvm-branch-commits


https://github.com/wangleiat updated 
https://github.com/llvm/llvm-project/pull/118770


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Craig Topper via llvm-branch-commits


topperc wrote:

Why do we need #118787 if we can just update the passes to use 
RegisterClassInfo?

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits


https://github.com/MacDue commented:

A bunch of little comments (mostly just nitpicks from a pass over the PR) 
:slightly_smiling_face: 

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] b619626 - Revert "[Fuchsia][cmake] Allow using FatLTO when building runtimes (#112277)"

2024-12-09 Thread via llvm-branch-commits


Author: Paul Kirth
Date: 2024-12-09T11:23:29-08:00
New Revision: b6196267463a356df89e922bb72c93a35b73d29c

URL: 
https://github.com/llvm/llvm-project/commit/b6196267463a356df89e922bb72c93a35b73d29c
DIFF: 
https://github.com/llvm/llvm-project/commit/b6196267463a356df89e922bb72c93a35b73d29c.diff

LOG: Revert "[Fuchsia][cmake] Allow using FatLTO when building runtimes 
(#112277)"

This reverts commit 57545dbbdbafc51d63873800a45cfd48a283d981.

Added: 


Modified: 
clang/cmake/caches/Fuchsia-stage2.cmake
llvm/cmake/modules/HandleLLVMOptions.cmake

Removed: 




diff  --git a/clang/cmake/caches/Fuchsia-stage2.cmake 
b/clang/cmake/caches/Fuchsia-stage2.cmake
index 9eb295548500ee..784a883a3bf916 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -190,10 +190,6 @@ foreach(target 
aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
 set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL 
"")
 set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES 
"compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
-# Enable FatLTO for Linux and baremetal runtimes
-set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
-set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
-
 # Use .build-id link.
 list(APPEND RUNTIME_BUILD_ID_LINK "${target}")
   endif()
@@ -276,10 +272,6 @@ if(FUCHSIA_SDK)
 set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE 
BOOL "")
 set(RUNTIMES_${target}+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE 
BOOL "")
 
-# Enable FatLTO for Fuchsia runtimes
-set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
-set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
-
 # Use .build-id link.
 list(APPEND RUNTIME_BUILD_ID_LINK "${target}")
   endforeach()
@@ -371,10 +363,6 @@ foreach(target 
armv6m-none-eabi;armv7m-none-eabi;armv8m.main-none-eabi;armv8.1m.
   set(RUNTIMES_${target}_LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
   set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "")
   set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "libc;libcxx" CACHE STRING "")
-
-  # Enable FatLTO for baremetal runtimes
-  set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
-  set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
 endforeach()
 
 foreach(target riscv32-unknown-elf)
@@ -426,10 +414,6 @@ foreach(target riscv32-unknown-elf)
   set(RUNTIMES_${target}_LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
   set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "")
   set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "libc;libcxx" CACHE STRING "")
-
-  # Enable FatLTO for baremetal runtimes
-  set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
-  set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
 endforeach()
 
 set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "")

diff  --git a/llvm/cmake/modules/HandleLLVMOptions.cmake 
b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 55a87f5fdbb138..f19125eb6bf273 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -1285,7 +1285,7 @@ elseif(LLVM_ENABLE_LTO)
   endif()
 endif()
 
-if(LLVM_ENABLE_FATLTO AND ((UNIX AND NOT APPLE) OR FUCHSIA))
+if(LLVM_ENABLE_FATLTO AND UNIX AND NOT APPLE)
   append("-ffat-lto-objects" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
   if(NOT LINKER_IS_LLD_LINK)
 append("-ffat-lto-objects" CMAKE_EXE_LINKER_FLAGS 
CMAKE_SHARED_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Tyler Nowicki via llvm-branch-commits


TylerNowicki wrote:

LGTM

https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Tyler Nowicki via llvm-branch-commits


https://github.com/TylerNowicki approved this pull request.


https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Sam Elliott via llvm-branch-commits


https://github.com/lenary approved this pull request.

LGTM for the target-independent changes.

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] Update maintainers (PR #119166)

2024-12-09 Thread Kostya Kortchinsky via llvm-branch-commits


https://github.com/cryptoad approved this pull request.


https://github.com/llvm/llvm-project/pull/119166
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)

2024-12-09 Thread Felipe de Azevedo Piovezan via llvm-branch-commits



@@ -200,6 +200,22 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD,
   return ModuleLevelChanges;
 }
 
+void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function 
*OldFunc,
+ ValueToValueMapTy &VMap,
+ RemapFlags RemapFlag,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+  // Duplicate the metadata that is attached to the cloned function.

felipepiovezan wrote:

Note that, in the context of this function, there is no "cloned function", it's 
either `NewFunc` or `OldFunc`. 

That  said, I this comment fits better in the documentation of the header, as 
there is important information here that callers should be aware

https://github.com/llvm/llvm-project/pull/118623
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)

2024-12-09 Thread Felipe de Azevedo Piovezan via llvm-branch-commits



@@ -182,6 +182,18 @@ void CloneFunctionAttributesInto(Function *NewFunc, const 
Function *OldFunc,
  ValueMapTypeRemapper *TypeMapper = nullptr,
  ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's metadata into NewFunc.
+///
+/// The caller is expected to populate \p VMap beforehand and set an 
appropriate
+/// \p RemapFlag.
+///
+/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a 
different
+/// module. Use CloneFunctionInto for that behavior.
+void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,

felipepiovezan wrote:

Can `NewFunc` or `OldFunc` be null? If not, we should make them references

https://github.com/llvm/llvm-project/pull/118623
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)

2024-12-09 Thread Felipe de Azevedo Piovezan via llvm-branch-commits



@@ -194,6 +194,15 @@ void CloneFunctionMetadataInto(Function *NewFunc, const 
Function *OldFunc,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's body into NewFunc.
+void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,

felipepiovezan wrote:

Can `NewFunc` / `OldFunc` be nullptr? If not, they should be references

https://github.com/llvm/llvm-project/pull/118624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)

2024-12-09 Thread Felipe de Azevedo Piovezan via llvm-branch-commits



@@ -361,32 +370,31 @@ void updateProfileCallee(
 /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
 /// basic blocks and extract their scope. These are candidates for duplication
 /// when cloning.
-void identifyNoAliasScopesToClone(
-ArrayRef BBs, SmallVectorImpl &NoAliasDeclScopes);
+void identifyNoAliasScopesToClone(ArrayRef BBs,
+  SmallVectorImpl 
&NoAliasDeclScopes);
 
 /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
 /// instruction range and extract their scope. These are candidates for
 /// duplication when cloning.
-void identifyNoAliasScopesToClone(
-BasicBlock::iterator Start, BasicBlock::iterator End,
-SmallVectorImpl &NoAliasDeclScopes);
+void identifyNoAliasScopesToClone(BasicBlock::iterator Start,
+  BasicBlock::iterator End,
+  SmallVectorImpl 
&NoAliasDeclScopes);
 
 /// Duplicate the specified list of noalias decl scopes.
 /// The 'Ext' string is added as an extension to the name.
 /// Afterwards, the ClonedScopes contains the mapping of the original scope
 /// MDNode onto the cloned scope.
 /// Be aware that the cloned scopes are still part of the original scope 
domain.
-void cloneNoAliasScopes(
-ArrayRef NoAliasDeclScopes,
-DenseMap &ClonedScopes,
-StringRef Ext, LLVMContext &Context);
+void cloneNoAliasScopes(ArrayRef NoAliasDeclScopes,
+DenseMap &ClonedScopes,
+StringRef Ext, LLVMContext &Context);
 
 /// Adapt the metadata for the specified instruction according to the
 /// provided mapping. This is normally used after cloning an instruction, when
 /// some noalias scopes needed to be cloned.
-void adaptNoAliasScopes(
-llvm::Instruction *I, const DenseMap &ClonedScopes,
-LLVMContext &Context);

felipepiovezan wrote:

are any of these changes part of the PR? It not, they should not be part of the 
diff.

If you run into this as a result of running `clang-format` in the entire patch, 
a good way to avoid this is by running `clang-format` on the _diff_ only. For 
example, (assuming all the changes are  unstaged) `git add` the changes you 
want to stage, and then `git clang-format --staged`

https://github.com/llvm/llvm-project/pull/118624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)

2024-12-09 Thread via llvm-branch-commits



@@ -15,6 +15,7 @@
 # used for building this FortranFloat128Math library.
 
 include(CheckLibraryExists)
+include(CheckIncludeFile)

jeanPerier wrote:

When using `-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath`, I am seeing a failure 
in my latest test where cmake `check_include_file` for quadmath.h fails below 
while the system has it and a similar cmake command works with the current llvm 
main.

I will invetsigate more, I suspect that some system header paths are set-up in 
the llvm/flang build that are not set-up anymore here.

I am surprised I did not see that earlier my earlier testing (maybe that is 
because I mostly tested out-of-tree and that worked there, I need to check 
that).

https://github.com/llvm/llvm-project/pull/110217
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] Update maintainers (PR #119166)

2024-12-09 Thread Christopher Ferris via llvm-branch-commits


https://github.com/cferris1000 approved this pull request.


https://github.com/llvm/llvm-project/pull/119166
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)

2024-12-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114438

>From de91e1754826028c3abdabd074bbe8ec1b17eb5f Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 31 Oct 2024 12:49:07 -0400
Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor
 existing attribute

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 81 +++
 .../annotate-kernel-features-hsa-call.ll  | 46 ++-
 ...ttr-amdgpu-max-num-workgroups-propagate.ll | 48 +--
 .../AMDGPU/attributor-loop-issue-58639.ll |  3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|  3 +-
 .../CodeGen/AMDGPU/propagate-waves-per-eu.ll  | 59 +++---
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |  9 ++-
 .../AMDGPU/uniform-work-group-multistep.ll|  3 +-
 .../uniform-work-group-recursion-test.ll  |  2 +-
 9 files changed, 138 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 1d16cf2fe95c37..a8f448227b18c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -206,6 +206,19 @@ class AMDGPUInformationCache : public InformationCache {
 return ST.getWavesPerEU(F, FlatWorkGroupSize);
   }
 
+  std::optional>
+  getWavesPerEUAttr(const Function &F) {
+auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
+   /*OnlyFirstRequired=*/true);
+if (!Val)
+  return std::nullopt;
+if (!Val->second) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+  Val->second = ST.getMaxWavesPerEU();
+}
+return std::make_pair(Val->first, *(Val->second));
+  }
+
   std::pair
   getEffectiveWavesPerEU(const Function &F,
  std::pair WavesPerEU,
@@ -776,22 +789,6 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
-  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
- unsigned Max) {
-// Don't add the attribute if it's the implied default.
-if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
-  return ChangeStatus::UNCHANGED;
-
-Function *F = getAssociatedFunction();
-LLVMContext &Ctx = F->getContext();
-SmallString<10> Buffer;
-raw_svector_ostream OS(Buffer);
-OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
-return A.manifestAttrs(getIRPosition(),
-   {Attribute::get(Ctx, AttrName, OS.str())},
-   /*ForceReplace=*/true);
-  }
-
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -1027,29 +1024,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute 
{
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
   : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
 
-  bool isValidState() const override {
-return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
-  }
-
   void initialize(Attributor &A) override {
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 
-if (const auto *AssumedGroupSize = A.getAAFor(
-*this, IRPosition::function(*F), DepClassTy::REQUIRED);
-AssumedGroupSize->isValidState()) {
+auto TakeRange = [&](std::pair R) {
+  auto [Min, Max] = R;
+  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState RangeState(Range);
+  clampStateAndIndicateChange(this->getState(), RangeState);
+  indicateOptimisticFixpoint();
+};
 
-  unsigned Min, Max;
-  std::tie(Min, Max) = InfoCache.getWavesPerEU(
-  *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-   AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+std::pair MaxWavesPerEURange{
+1U, InfoCache.getMaxWavesPerEU(*F)};
 
-  ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-  intersectKnown(Range);
+// If the attribute exists, we will honor it if it is not the default.
+if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+  if (*Attr != MaxWavesPerEURange) {
+TakeRange(*Attr);
+return;
+  }
 }
 
-if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-  indicatePessimisticFixpoint();
+// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
+// calculation of waves per EU involves flat work group size, we can't
+// simply use an assumed flat work group size as a start point, because the
+// update of flat work group size is in an inverse direction of waves per
+// EU. However, we can still do something if it is an entry function. Since
+// an entry function is a terminal node, and flat work group size either
+// from attribute or default will be used anyway, we can take that

[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)

2024-12-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/114726

>From 47f7697d5a278640d7ccb59d512112b13b07871f Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 3 Nov 2024 19:35:26 -0500
Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial
 state

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 105 +++---
 .../annotate-kernel-features-hsa-call.ll  |  46 
 ...ttr-amdgpu-max-num-workgroups-propagate.ll |  48 
 .../AMDGPU/attributor-loop-issue-58639.ll |   3 +-
 .../CodeGen/AMDGPU/direct-indirect-call.ll|   3 +-
 .../AMDGPU/remove-no-kernel-id-attribute.ll   |   9 +-
 .../AMDGPU/uniform-work-group-multistep.ll|   3 +-
 .../uniform-work-group-recursion-test.ll  |   2 +-
 8 files changed, 140 insertions(+), 79 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index a8f448227b18c9..3ca0677d980b9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -745,6 +745,16 @@ struct AAAMDSizeRangeAttribute
   if (!CallerInfo || !CallerInfo->isValidState())
 return false;
 
+  /// When the caller AA is in its initial state, the state remains valid
+  /// but awaits propagation. We skip processing in this case. Note that we
+  /// must return true since the state is still considered valid.
+  if (CallerInfo->isAtInitialState()) {
+LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+  << Caller->getName()
+  << " is still at initial state. Skip the update.\n");
+return true;
+  }
+
   Change |=
   clampStateAndIndicateChange(this->getState(), 
CallerInfo->getState());
 
@@ -789,6 +799,15 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
   }
 
+  /// The initial state of `IntegerRangeState` represents an empty set, which
+  /// does not constitute a valid range. This empty state complicates
+  /// propagation, particularly for arithmetic operations like
+  /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
+  /// initial state during processing.
+  bool isAtInitialState() const {
+return isValidState() && getAssumed().isEmptySet();
+  }
+
   const std::string getAsStr(Attributor *) const override {
 std::string Str;
 raw_string_ostream OS(Str);
@@ -845,6 +864,11 @@ struct AAAMDFlatWorkGroupSize : public 
AAAMDSizeRangeAttribute {
Attributor &A);
 
   ChangeStatus manifest(Attributor &A) override {
+if (isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] Still at initial state. No manifest.\n";);
+  return ChangeStatus::UNCHANGED;
+}
 Function *F = getAssociatedFunction();
 auto &InfoCache = static_cast(A.getInfoCache());
 return emitAttributeIfNotDefaultAfterClamp(
@@ -1071,31 +1095,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute 
{
 auto &InfoCache = static_cast(A.getInfoCache());
 ChangeStatus Change = ChangeStatus::UNCHANGED;
 
+Function *F = getAssociatedFunction();
+
+const auto *AAFlatWorkGroupSize = A.getAAFor(
+*this, IRPosition::function(*F), DepClassTy::REQUIRED);
+if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
+  LLVM_DEBUG(
+  dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+if (AAFlatWorkGroupSize->isAtInitialState()) {
+  LLVM_DEBUG(dbgs() << '[' << getName()
+<< "] AAAMDFlatWorkGroupSize is still at initial "
+   "state. Skip the update.\n");
+  return ChangeStatus::UNCHANGED;
+}
+
+auto CurrentWorkGroupSize = std::make_pair(
+AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
+AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
+
+auto DoUpdate = [&](std::pair WavesPerEU,
+std::pair FlatWorkGroupSize) {
+  auto [Min, Max] =
+  InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
+  ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+  IntegerRangeState IRS(CR);
+  Change |= clampStateAndIndicateChange(this->getState(), IRS);
+};
+
+// // We need to clamp once if we are not at initial state, because
+// // AAAMDFlatWorkGroupSize could be updated in last iteration.
+if (!isAtInitialState()) {
+  auto CurrentWavesPerEU =
+  std::make_pair(getAssumed().getLower().getZExtValue(),
+ getAssumed().getUpper().getZExtValue() - 1);
+  DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
+}
+
 auto CheckCallSite = [&](AbstractCallSite CS) {
   Function *Caller = CS.getInstructi

[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits

artempyanykh wrote:

> nit: Perhaps 'Common' is a better word than 'Global'. Global made me think of 
> global variables but I realized in the patch that is not what you are doing. 
> Seems you are just creating debug info based on the original function and 
> sharing that with the continuations / splits? Or perhaps just 'CoroDebugInfo'?

Good call @TylerNowicki! I like 'common', updated the stack accordingly. 

It was originally "global" in a sense that it was owned by a module (global) 
rather than the function (local) and so didn't have to be cloned as part of 
function cloning. But I agree that it can be confusing this way.

https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)

2024-12-09 Thread Paul Kirth via llvm-branch-commits


https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/112277

>From 1dafa521d5a1e10e3f79f63a661b2e14acff5a4a Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Mon, 14 Oct 2024 15:06:38 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 libcxx/CMakeLists.txt|  4 
 libcxx/src/CMakeLists.txt| 10 ++
 libcxxabi/src/CMakeLists.txt | 10 ++
 3 files changed, 24 insertions(+)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index f1942e963ccc31..5a68237f7336c5 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -102,6 +102,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS
support the C functionality for wide characters. When wide characters are
not supported, several parts of the library will be disabled, notably the
wide character specializations of std::basic_string." ON)
+ option(LIBCXX_ENABLE_FATLTO
+   "Whether to compile libc++ with FatLTO enabled." ON)
+ option(LIBCXX_ENABLE_LTO
+   "Whether to compile libc++ with LTO enabled." ON)
 
 # To use time zone support in libc++ the platform needs to have the IANA
 # database installed. Libc++ will fail to build if this is enabled on a
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index b187677ff2db52..670db758f53173 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -173,6 +173,16 @@ if (APPLE AND LLVM_USE_SANITIZER)
   endif()
 endif()
 
+
+if(LIBCXX_ENABLE_LTO)
+  list(APPEND LIBCXX_COMPILE_FLAGS "-flto")
+  list(APPEND LIBCXX_LINK_FLAGS "-flto")
+endif()
+if(LIBCXX_ENABLE_FATLTO)
+  list(APPEND LIBCXX_COMPILE_FLAGS "-ffat-lto-objects")
+  list(APPEND LIBCXX_LINK_FLAGS "-ffat-lto-objects")
+endif()
+
 split_list(LIBCXX_COMPILE_FLAGS)
 split_list(LIBCXX_LINK_FLAGS)
 
diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 480e528b819bb9..822ede39c6a525 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -143,6 +143,15 @@ if ( APPLE )
   endif()
 endif()
 
+if(LIBCXX_ENABLE_LTO)
+  list(APPEND LIBCXXABI_COMPILE_FLAGS "-flto")
+  list(APPEND LIBCXXABI_LINK_FLAGS "-flto")
+endif()
+if(LIBCXX_ENABLE_FATLTO)
+  list(APPEND LIBCXXABI_COMPILE_FLAGS "-ffat-lto-objects")
+  list(APPEND LIBCXXABI_LINK_FLAGS "-ffat-lto-objects")
+endif()
+
 split_list(LIBCXXABI_COMPILE_FLAGS)
 split_list(LIBCXXABI_LINK_FLAGS)
 
@@ -154,6 +163,7 @@ endif()
 
 include(WarningFlags)
 
+
 # Build the shared library.
 add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} 
${LIBCXXABI_HEADERS})
 cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} 
${LIBCXXABI_ENABLE_PEDANTIC})

>From 38851d29d9eaf5e3c597be3f9f57179f308ba335 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Mon, 14 Oct 2024 15:27:36 -0700
Subject: [PATCH 2/4] Remove newline from diff

Created using spr 1.3.4
---
 libcxxabi/src/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 1a1e57aa0077b4..783f17583c62e0 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -163,7 +163,6 @@ endif()
 
 include(WarningFlags)
 
-
 # Build the shared library.
 add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} 
${LIBCXXABI_HEADERS})
 cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} 
${LIBCXXABI_ENABLE_PEDANTIC})

>From 535f2f2c17a3c80aa12c0106a468a8f2127241fc Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Wed, 16 Oct 2024 11:20:51 -0700
Subject: [PATCH 3/4] Avoid unecessary changes to libc++ cmake

Created using spr 1.3.4
---
 clang/cmake/caches/Fuchsia-stage2.cmake |  8 
 libcxx/CMakeLists.txt   |  4 
 libcxx/src/CMakeLists.txt   | 10 --
 libcxxabi/src/CMakeLists.txt|  9 -
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake 
b/clang/cmake/caches/Fuchsia-stage2.cmake
index 5af98c7b3b3fba..e62f29ecbe6f45 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -192,6 +192,10 @@ foreach(target 
aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
 set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL 
"")
 set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES 
"compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
+# Enable FatLTO for Linux and baremetal runtimes
+set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
+set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
+
 # Use .build-id link.
 list(APPEND RUNTIME_BUILD_ID_LINK "${target}")
   endif()
@@ -274,6 +278,10 @@ if(FUCHSIA_SDK)
 set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE 
B

[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)

2024-12-09 Thread Paul Kirth via llvm-branch-commits


https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/112277

>From 1dafa521d5a1e10e3f79f63a661b2e14acff5a4a Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Mon, 14 Oct 2024 15:06:38 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 libcxx/CMakeLists.txt|  4 
 libcxx/src/CMakeLists.txt| 10 ++
 libcxxabi/src/CMakeLists.txt | 10 ++
 3 files changed, 24 insertions(+)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index f1942e963ccc31..5a68237f7336c5 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -102,6 +102,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS
support the C functionality for wide characters. When wide characters are
not supported, several parts of the library will be disabled, notably the
wide character specializations of std::basic_string." ON)
+ option(LIBCXX_ENABLE_FATLTO
+   "Whether to compile libc++ with FatLTO enabled." ON)
+ option(LIBCXX_ENABLE_LTO
+   "Whether to compile libc++ with LTO enabled." ON)
 
 # To use time zone support in libc++ the platform needs to have the IANA
 # database installed. Libc++ will fail to build if this is enabled on a
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index b187677ff2db52..670db758f53173 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -173,6 +173,16 @@ if (APPLE AND LLVM_USE_SANITIZER)
   endif()
 endif()
 
+
+if(LIBCXX_ENABLE_LTO)
+  list(APPEND LIBCXX_COMPILE_FLAGS "-flto")
+  list(APPEND LIBCXX_LINK_FLAGS "-flto")
+endif()
+if(LIBCXX_ENABLE_FATLTO)
+  list(APPEND LIBCXX_COMPILE_FLAGS "-ffat-lto-objects")
+  list(APPEND LIBCXX_LINK_FLAGS "-ffat-lto-objects")
+endif()
+
 split_list(LIBCXX_COMPILE_FLAGS)
 split_list(LIBCXX_LINK_FLAGS)
 
diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 480e528b819bb9..822ede39c6a525 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -143,6 +143,15 @@ if ( APPLE )
   endif()
 endif()
 
+if(LIBCXX_ENABLE_LTO)
+  list(APPEND LIBCXXABI_COMPILE_FLAGS "-flto")
+  list(APPEND LIBCXXABI_LINK_FLAGS "-flto")
+endif()
+if(LIBCXX_ENABLE_FATLTO)
+  list(APPEND LIBCXXABI_COMPILE_FLAGS "-ffat-lto-objects")
+  list(APPEND LIBCXXABI_LINK_FLAGS "-ffat-lto-objects")
+endif()
+
 split_list(LIBCXXABI_COMPILE_FLAGS)
 split_list(LIBCXXABI_LINK_FLAGS)
 
@@ -154,6 +163,7 @@ endif()
 
 include(WarningFlags)
 
+
 # Build the shared library.
 add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} 
${LIBCXXABI_HEADERS})
 cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} 
${LIBCXXABI_ENABLE_PEDANTIC})

>From 38851d29d9eaf5e3c597be3f9f57179f308ba335 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Mon, 14 Oct 2024 15:27:36 -0700
Subject: [PATCH 2/4] Remove newline from diff

Created using spr 1.3.4
---
 libcxxabi/src/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 1a1e57aa0077b4..783f17583c62e0 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -163,7 +163,6 @@ endif()
 
 include(WarningFlags)
 
-
 # Build the shared library.
 add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} 
${LIBCXXABI_HEADERS})
 cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} 
${LIBCXXABI_ENABLE_PEDANTIC})

>From 535f2f2c17a3c80aa12c0106a468a8f2127241fc Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Wed, 16 Oct 2024 11:20:51 -0700
Subject: [PATCH 3/4] Avoid unecessary changes to libc++ cmake

Created using spr 1.3.4
---
 clang/cmake/caches/Fuchsia-stage2.cmake |  8 
 libcxx/CMakeLists.txt   |  4 
 libcxx/src/CMakeLists.txt   | 10 --
 libcxxabi/src/CMakeLists.txt|  9 -
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake 
b/clang/cmake/caches/Fuchsia-stage2.cmake
index 5af98c7b3b3fba..e62f29ecbe6f45 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -192,6 +192,10 @@ foreach(target 
aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
 set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL 
"")
 set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES 
"compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
+# Enable FatLTO for Linux and baremetal runtimes
+set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "")
+set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "")
+
 # Use .build-id link.
 list(APPEND RUNTIME_BUILD_ID_LINK "${target}")
   endif()
@@ -274,6 +278,10 @@ if(FUCHSIA_SDK)
 set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE 
B

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)

2024-12-09 Thread Michael Kruse via llvm-branch-commits


Meinersbur wrote:

> I built things as of 
> [ac1f46f](https://github.com/llvm/llvm-project/commit/ac1f46faeef2507bb680f14b9256ac38817824df)
>  in 
> [conda-forge/flang-feedstock#80](https://github.com/conda-forge/flang-feedstock/pull/80),
>  and getting the following failure on linux:
> 
> ```
> [228/375] Linking CXX shared library lib/libFlangOpenMPTransforms.so.20.0git
> FAILED: lib/libFlangOpenMPTransforms.so.20.0git 
> : && $BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++ -fPIC 
> -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell 
> -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 
> -ffunction-sections -pipe -isystem $PREFIX/include 
> -fdebug-prefix-map=$SRC_DIR=/usr/local/src/conda/flang-split-20.0.0.dev1 
> -fdebug-prefix-map=$PREFIX=/usr/local/src/conda-prefix -fPIC 
> -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time 
> -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings 
> -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long 
> -Wimplicit-fallthrough -Wno-maybe-uninitialized -Wno-nonnull 
> -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move 
> -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment 
> -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color 
> -ffunction-sections -fdata-sections -Wno-deprecated-copy 
> -Wno-ctad-maybe-unsupported -fno-strict-aliasing -fno-semantic-interposition 
> -O3 -DNDEBUG -fno-semantic-interposition  -Wl,-O2 -Wl,--sort-common 
> -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags 
> -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,$PREFIX/lib 
> -Wl,-rpath-link,$PREFIX/lib -L$PREFIX/lib -Wl,-z,defs -Wl,-z,nodelete   
> -Wl,-rpath-link,$SRC_DIR/build/lib  -Wl,--gc-sections  
> -Wl,--dependency-file=lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/link.d
>  -shared -Wl,-soname,libFlangOpenMPTransforms.so.20.0git -o 
> lib/libFlangOpenMPTransforms.so.20.0git 
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/FunctionFiltering.cpp.o
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/GenericLoopConversion.cpp.o
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MapsForPrivatizedSymbols.cpp.o
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MapInfoFinalization.cpp.o
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MarkDeclareTarget.cpp.o
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/LowerWorkshare.cpp.o
>   -Wl,-rpath,"\$ORIGIN/../lib:$SRC_DIR/build/lib:"  
> lib/libFIRCodeGen.so.20.0git  lib/libFortranCommon.so.20.0git  
> $PREFIX/lib/libMLIRFuncDialect.a  $PREFIX/lib/libMLIROpenMPDialect.a  
> $PREFIX/lib/libMLIRIR.a  $PREFIX/lib/libMLIRPass.a  
> $PREFIX/lib/libMLIRTransformUtils.a  lib/libFIRAnalysis.so.20.0git  
> lib/libFIRBuilder.so.20.0git  lib/libHLFIRDialect.so.20.0git  
> lib/libFIRSupport.so.20.0git  lib/libFIRDialect.so.20.0git  
> lib/libFIRDialectSupport.so.20.0git  $PREFIX/lib/libclang-cpp.so.20.0git  
> $PREFIX/lib/libMLIRFuncDialect.a  $PREFIX/lib/libMLIRLLVMDialect.a  
> $PREFIX/lib/libMLIRMemorySlotInterfaces.a  
> $PREFIX/lib/libMLIROpenACCMPCommon.a  $PREFIX/lib/libMLIRSubsetOpInterface.a  
> $PREFIX/lib/libMLIRValueBoundsOpInterface.a  
> $PREFIX/lib/libMLIRDestinationStyleOpInterface.a  
> $PREFIX/lib/libMLIRRewrite.a  $PREFIX/lib/libMLIRRewritePDL.a  
> $PREFIX/lib/libMLIRPDLToPDLInterp.a  $PREFIX/lib/libMLIRPass.a  
> $PREFIX/lib/libMLIRAnalysis.a  $PREFIX/lib/libMLIRCallInterfaces.a  
> $PREFIX/lib/libMLIRControlFlowInterfaces.a  
> $PREFIX/lib/libMLIRLoopLikeInterface.a  
> $PREFIX/lib/libMLIRDataLayoutInterfaces.a  
> $PREFIX/lib/libMLIRInferIntRangeInterface.a  $PREFIX/lib/libMLIRPresburger.a  
> $PREFIX/lib/libMLIRViewLikeInterface.a  $PREFIX/lib/libMLIRPDLInterpDialect.a 
>  $PREFIX/lib/libMLIRFunctionInterfaces.a  $PREFIX/lib/libMLIRPDLDialect.a  
> $PREFIX/lib/libMLIRInferTypeOpInterface.a  
> $PREFIX/lib/libMLIRSideEffectInterfaces.a  $PREFIX/lib/libMLIRIR.a  
> $PREFIX/lib/libMLIRSupport.a  -lpthread  $PREFIX/lib/libLLVM.so.20.0git  
> -Wl,-rpath-link,$SRC_DIR/build/lib && :
> $BUILD_PREFIX/bin/../lib/gcc/x86_64-conda-linux-gnu/13.3.0/../../../../x86_64-conda-linux-gnu/bin/ld:
>  
> lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/GenericLoopConversion.cpp.o:
>  in function `(anonymous 
> namespace)::GenericLoopConversionPattern::rewriteToDistributeParallelDo(mlir::omp::LoopOp,
>  mlir::ConversionPatternRewriter&) const [clone .isra.0]':
> GenericLoopConversion.cpp:(.text._ZNK12_GLOBAL__N_128GenericLoopConversionPattern29rewriteToDistributeParallelDoEN4mlir3omp6LoopOpERNS1_25ConversionPatternRewriterE.isra.0+0x442):
>  undefined reference to 
> `Fortran::common::openmp::genEntryBlock(mlir::OpBuilder&, 
> Fortran::common::openmp::EntryBlockArgs const&, mlir::Region&)'
> collect2: error: ld returned 1 exit status
> ```

I don't see this in any configuration that I am te

[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)

2024-12-09 Thread Louis Dionne via llvm-branch-commits



@@ -1285,7 +1285,7 @@ elseif(LLVM_ENABLE_LTO)
   endif()
 endif()
 
-if(LLVM_ENABLE_FATLTO AND UNIX AND NOT APPLE)
+if(LLVM_ENABLE_FATLTO AND ((UNIX AND NOT APPLE) OR FUCHSIA))

ldionne wrote:

Sorry, I think I initially read `if (LLVM_ENABLE_FATLTO OR ((UNIX AND NOT 
APPLE) OR FUCHSIA))`.

https://github.com/llvm/llvm-project/pull/112277
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)

2024-12-09 Thread Louis Dionne via llvm-branch-commits


https://github.com/ldionne approved this pull request.


https://github.com/llvm/llvm-project/pull/112277
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Matt Arsenault via llvm-branch-commits



@@ -123,6 +123,7 @@ namespace {
 const TargetRegisterInfo *TRI = nullptr;
 const MachineFrameInfo *MFI = nullptr;
 MachineRegisterInfo *MRI = nullptr;
+RegisterClassInfo RegClassInfo;

arsenm wrote:

The way RegisterClassInfo is currently used, it gets recomputed for every pass 
that uses it. It should probably move to be a normal analysis 

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Sam Elliott via llvm-branch-commits



@@ -123,6 +123,7 @@ namespace {
 const TargetRegisterInfo *TRI = nullptr;
 const MachineFrameInfo *MFI = nullptr;
 MachineRegisterInfo *MRI = nullptr;
+RegisterClassInfo RegClassInfo;

lenary wrote:

I was thinking this, especially as it has some saved state to work out when it 
needs to recompute. I think that's probably a good follow-up?

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Louis Dionne via llvm-branch-commits



@@ -79,42 +79,46 @@ namespace std {
 } // namespace std
 
 */
-
-#include <__config>
-#include <__memory/allocator.h>
-#include <__memory/allocator_destructor.h>
-#include <__memory/allocator_traits.h>
-#include <__memory/unique_ptr.h>
-#include <__type_traits/add_cv_quals.h>
-#include <__type_traits/add_pointer.h>
-#include <__type_traits/aligned_storage.h>
-#include <__type_traits/conditional.h>
-#include <__type_traits/decay.h>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/is_constructible.h>
-#include <__type_traits/is_function.h>
-#include <__type_traits/is_nothrow_constructible.h>
-#include <__type_traits/is_reference.h>
-#include <__type_traits/is_same.h>
-#include <__type_traits/is_void.h>
-#include <__type_traits/remove_cv.h>
-#include <__type_traits/remove_cvref.h>
-#include <__type_traits/remove_reference.h>
-#include <__utility/forward.h>
-#include <__utility/in_place.h>
-#include <__utility/move.h>
-#include <__utility/unreachable.h>
-#include <__verbose_abort>
-#include 
-#include 
-#include 
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#include <__configuration/language.h>
+
+#if defined(_LIBCPP_CXX03_LANG) && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)

ldionne wrote:

It would be nice for this patch to be really straightforward to review, and 
that would be the case if not for the formatting changes. I would suggest 
frontloading a patch that does

```c++
#if 0
// nothing for now
#else
// existing code
#endif
```

That will cause all the formatting stuff to happen in that patch. We can then 
follow up with a patch that turns these `#if 0` into something real.

https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Louis Dionne via llvm-branch-commits



@@ -152,11 +152,6 @@ _LIBCPP_HARDENING_MODE_DEBUG
 #  define _LIBCPP_TOSTRING2(x) #x
 #  define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)
 
-// NOLINTNEXTLINE(libcpp-cpp-version-check)
-#  if __cplusplus < 201103L
-#define _LIBCPP_CXX03_LANG
-#  endif

ldionne wrote:

In the current state of this patch, the "intersection" between the 03 headers 
and the normal headers is basically the definition of `_LIBCPP_CXX03_LANG`, but 
we're including all of `__configuration/language.h` for that. I think that we 
should strive to make that intersection empty, because anything in that 
intersection can cause incompatibilities and confusion.

Since `_LIBCPP_CXX03_LANG` is so simple, I would instead just check the value 
of `__cplusplus` directly at the top-level, and not share anything between the 
03 headers and the normal headers.

Note that `__config_site` is also something that technically falls inside that 
intersection and that's tricky to handle, we can discuss that separately.

https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Louis Dionne via llvm-branch-commits



@@ -11,10 +11,6 @@
 
 #include <__config>
 
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)

ldionne wrote:

Merge conflict?

https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Louis Dionne via llvm-branch-commits


https://github.com/ldionne commented:

Let's write down the order for landing this in smaller pieces:

1. Land the `#if 0` change which addresses the formatting issues.
2. Land the CMake changes that start installing the C++03 headers.
3. Switch to `#if __cplusplus < C++11 && USE_FROZEN_HEADERS` (or whatever), and 
also add the CI with XFAILs. If there are too many XFAILs, the CI bit should 
probably be left to a separate patch.

We can treat even C++23-only headers the same for now, by including 
`__cxx03/__config` in them. That's the status quo and we should start with 
that. However, as a follow-up, we should go through all of the >= 11 headers 
and make them "not care" about the C++03 frozen headers. We could in addition 
perhaps diagnose the use of `-std=c++03` in the normal `__config` file with an 
error.

https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)

2024-12-09 Thread Louis Dionne via llvm-branch-commits


https://github.com/ldionne edited 
https://github.com/llvm/llvm-project/pull/109002
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)

2024-12-09 Thread Matt Arsenault via llvm-branch-commits



@@ -123,6 +123,7 @@ namespace {
 const TargetRegisterInfo *TRI = nullptr;
 const MachineFrameInfo *MFI = nullptr;
 MachineRegisterInfo *MRI = nullptr;
+RegisterClassInfo RegClassInfo;

arsenm wrote:

Relatedly, we also do not serialize the set of reserved registers in MIR 

https://github.com/llvm/llvm-project/pull/119194
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118629
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2024-12-09 Thread Benjamin Maxwell via llvm-branch-commits



@@ -418,7 +418,13 @@ class LoopVectorizationPlanner {
   /// Build VPlans for the specified \p UserVF and \p UserIC if they are
   /// non-zero or all applicable candidate VFs otherwise. If vectorization and
   /// interleaving should be avoided up-front, no plans are generated.
-  void plan(ElementCount UserVF, unsigned UserIC);
+  /// RTChecks is a list of pointer pairs that should be checked for aliasing,
+  /// setting HasAliasMask to true in the case that an alias mask is generated

MacDue wrote:

Outdated comment? Is this `DiffChecks` now?

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118630
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Utils] Identity map module-level debug info on first use in CloneFunction* (PR #118627)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118627
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118623
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118628
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Utils] Identity map module-level debug info on first use in CloneFunction* (PR #118627)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118627
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [NFC][Utils] Eliminate DISubprogram set from BuildDebugInfoMDMap (PR #118625)

2024-12-09 Thread Artem Pianykh via llvm-branch-commits


https://github.com/artempyanykh edited 
https://github.com/llvm/llvm-project/pull/118625
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)

2024-12-09 Thread Arthur Eubanks via llvm-branch-commits


https://github.com/aeubanks approved this pull request.


https://github.com/llvm/llvm-project/pull/119181
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libc] ba6d4d9 - Revert "[libc] Add unistd overlay (#118882)"

2024-12-09 Thread via llvm-branch-commits


Author: Michael Jones
Date: 2024-12-09T16:08:35-08:00
New Revision: ba6d4d9c66231a1684ddfca619dbd91d9872f06d

URL: 
https://github.com/llvm/llvm-project/commit/ba6d4d9c66231a1684ddfca619dbd91d9872f06d
DIFF: 
https://github.com/llvm/llvm-project/commit/ba6d4d9c66231a1684ddfca619dbd91d9872f06d.diff

LOG: Revert "[libc] Add unistd overlay (#118882)"

This reverts commit 7db970fe4a0249234712ae6899d34b94260b09cd.

Added: 


Modified: 
libc/hdr/CMakeLists.txt
libc/hdr/unistd_macros.h
libc/src/unistd/dup.h
libc/src/unistd/dup2.h
libc/src/unistd/dup3.h
libc/src/unistd/fork.h
libc/src/unistd/ftruncate.h
libc/src/unistd/getcwd.h
libc/src/unistd/geteuid.h
libc/src/unistd/getopt.h
libc/src/unistd/getpid.h
libc/src/unistd/getppid.h
libc/src/unistd/getuid.h
libc/src/unistd/isatty.h
libc/src/unistd/link.h
libc/src/unistd/linux/ftruncate.cpp
libc/src/unistd/linux/lseek.cpp
libc/src/unistd/linux/sysconf.cpp
libc/src/unistd/linux/truncate.cpp
libc/src/unistd/lseek.h
libc/src/unistd/pread.h
libc/src/unistd/pwrite.h
libc/src/unistd/read.h
libc/src/unistd/readlink.h
libc/src/unistd/readlinkat.h
libc/src/unistd/swab.h
libc/src/unistd/symlink.h
libc/src/unistd/symlinkat.h
libc/src/unistd/syscall.h
libc/src/unistd/sysconf.h
libc/src/unistd/truncate.h
libc/src/unistd/write.h

Removed: 
libc/hdr/unistd_overlay.h



diff  --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index 7f523c50e86943..5eb311f4bb2298 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -126,13 +126,10 @@ add_proxy_header_library(
 libc.include.llvm-libc-macros.sys_stat_macros
 )
 
-add_header_library(unistd_overlay HDRS unistd_overlay.h)
 add_proxy_header_library(
   unistd_macros
   HDRS
 unistd_macros.h
-  DEPENDS
-.unistd_overlay
   FULL_BUILD_DEPENDS
 libc.include.unistd
 libc.include.llvm-libc-macros.unistd_macros

diff  --git a/libc/hdr/unistd_macros.h b/libc/hdr/unistd_macros.h
index 5c2b24354dd3ee..132e123280139f 100644
--- a/libc/hdr/unistd_macros.h
+++ b/libc/hdr/unistd_macros.h
@@ -15,7 +15,7 @@
 
 #else // Overlay mode
 
-#include "unistd_overlay.h"
+#include 
 
 #endif // LLVM_LIBC_FULL_BUILD
 

diff  --git a/libc/hdr/unistd_overlay.h b/libc/hdr/unistd_overlay.h
deleted file mode 100644
index e3001e0cda08f5..00
--- a/libc/hdr/unistd_overlay.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- Including unistd.h in overlay mode ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-
-#ifndef LLVM_LIBC_HDR_UNISTD_OVERLAY_H
-#define LLVM_LIBC_HDR_UNISTD_OVERLAY_H
-
-#ifdef LIBC_FULL_BUILD
-#error "This header should only be included in overlay mode"
-#endif
-
-// Overlay mode
-
-// glibc  header might provide extern inline definitions for few
-// functions, causing external alias errors.  They are guarded by
-// `__USE_EXTERN_INLINES` macro.  We temporarily disable `__USE_EXTERN_INLINES`
-// macro by defining `__NO_INLINE__` before including .
-// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
-// with `_FORTIFY_SOURCE`.
-
-#ifdef _FORTIFY_SOURCE
-#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE
-#undef _FORTIFY_SOURCE
-#endif
-
-#ifdef __USE_EXTERN_INLINES
-#define LIBC_OLD_USE_EXTERN_INLINES
-#undef __USE_EXTERN_INLINES
-#endif
-
-#ifdef __USE_FORTIFY_LEVEL
-#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
-#undef __USE_FORTIFY_LEVEL
-#define __USE_FORTIFY_LEVEL 0
-#endif
-
-#ifndef __NO_INLINE__
-#define __NO_INLINE__ 1
-#define LIBC_SET_NO_INLINE
-#endif
-
-#include 
-
-#ifdef LIBC_OLD_FORTIFY_SOURCE
-#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE
-#undef LIBC_OLD_FORTIFY_SOURCE
-#endif
-
-#ifdef LIBC_SET_NO_INLINE
-#undef __NO_INLINE__
-#undef LIBC_SET_NO_INLINE
-#endif
-
-#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
-#undef __USE_FORTIFY_LEVEL
-#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
-#undef LIBC_OLD_USE_FORTIFY_LEVEL
-#endif
-
-#ifdef LIBC_OLD_USE_EXTERN_INLINES
-#define __USE_EXTERN_INLINES
-#undef LIBC_OLD_USE_EXTERN_INLINES
-#endif
-
-#endif // LLVM_LIBC_HDR_UNISTD_OVERLAY_H

diff  --git a/libc/src/unistd/dup.h b/libc/src/unistd/dup.h
index 57601455acc61c..63f093c0ee4365 100644
--- a/libc/src/unistd/dup.h
+++ b/libc/src/unistd/dup.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_UNISTD_DUP_H
 #define LLVM_LIBC_SRC_UNISTD_DUP_H
 
-#include "hdr/unistd_macros.h"
 #include "src/__support/macros/config.h"
+#include 
 
 namespace LIBC_NAMESPACE_DECL {
 

diff  --git a/libc/src/unistd/dup2.h b/libc/src/unistd/dup2.h
index e2cf62389bca87..060c112daf08fb 100644
--- a/libc/src/unistd/dup

[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)

2024-12-09 Thread via llvm-branch-commits


h-vetinari wrote:

Thanks for your inputs @Meinersbur! 
https://github.com/llvm/llvm-project/pull/110217/commits/a03606822b0eaef8efdeb00c4f2c33e1a029f79f
 fixed the compilation issues on linux.

I have another question - when building the runtimes (separately as you 
described), it seems that `-DBUILD_SHARED_LIBS=ON` gets ignored also on linux, 
in the sense that the resulting library is `libflang_rt.a` and not 
`libflang_rt.so`. You had commented that this is a known limitation on windows, 
but AFAIU this should still work on linux (as previously)?

https://github.com/llvm/llvm-project/pull/110217
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)

2024-12-09 Thread Michael Kruse via llvm-branch-commits


https://github.com/Meinersbur edited 
https://github.com/llvm/llvm-project/pull/110217
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)

2024-12-09 Thread Chuanqi Xu via llvm-branch-commits


ChuanqiXu9 wrote:

Sent https://github.com/llvm/llvm-project/pull/119333

It looks like the lldb's failure is from we forgot to update the 
ExternalASTConsumer (I met this the second time. I am wondering if we can make 
it more automatically). The other windows failure  is a pattern match failure.

https://github.com/llvm/llvm-project/pull/83237
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] 4fe98aa - Revert "[PAC][ELF][AArch64] Support signed personality function pointer (#113…"

2024-12-09 Thread via llvm-branch-commits


Author: Daniil Kovalev
Date: 2024-12-10T09:10:23+03:00
New Revision: 4fe98aa2253925a6bc24bd30f2c654756c77cc28

URL: 
https://github.com/llvm/llvm-project/commit/4fe98aa2253925a6bc24bd30f2c654756c77cc28
DIFF: 
https://github.com/llvm/llvm-project/commit/4fe98aa2253925a6bc24bd30f2c654756c77cc28.diff

LOG: Revert "[PAC][ELF][AArch64] Support signed personality function pointer 
(#113…"

This reverts commit 4fb1cda6606ba75782aa1964835abf1a69e2adae.

Added: 


Modified: 
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGen/ptrauth-module-flags.c
llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
llvm/include/llvm/Target/TargetLoweringObjectFile.h
llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
llvm/lib/Target/TargetLoweringObjectFile.cpp
llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn

Removed: 
llvm/lib/Target/AArch64/AArch64MachineModuleInfo.cpp
llvm/lib/Target/AArch64/AArch64MachineModuleInfo.h
llvm/test/CodeGen/AArch64/ptrauth-sign-personality.ll



diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 841fb1ced9a02b..d3d5c0743a520b 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1218,9 +1218,6 @@ void CodeGenModule::Release() {
   getModule().addModuleFlag(llvm::Module::Min, "ptrauth-elf-got", 1);
 
 if (getTriple().isOSLinux()) {
-  if (LangOpts.PointerAuthCalls)
-getModule().addModuleFlag(llvm::Module::Min, 
"ptrauth-sign-personality",
-  1);
   assert(getTriple().isOSBinFormatELF());
   using namespace llvm::ELF;
   uint64_t PAuthABIVersion =

diff  --git a/clang/test/CodeGen/ptrauth-module-flags.c 
b/clang/test/CodeGen/ptrauth-module-flags.c
index e441d52cb7c62b..5a7e9a7c2a36fe 100644
--- a/clang/test/CodeGen/ptrauth-module-flags.c
+++ b/clang/test/CodeGen/ptrauth-module-flags.c
@@ -1,13 +1,8 @@
 // RUN: %clang_cc1 -triple aarch64-linux-gnu   -emit-llvm %s  
-o - | FileCheck %s --check-prefix=OFF
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-elf-got -emit-llvm %s  
-o - | FileCheck %s --check-prefix=ELFGOT
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls   -emit-llvm %s  
-o - | FileCheck %s --check-prefix=PERSONALITY
 
 // ELFGOT:  !llvm.module.flags = !{
 // ELFGOT-SAME: !1
 // ELFGOT:  !1 = !{i32 8, !"ptrauth-elf-got", i32 1}
 
-// PERSONALITY:  !llvm.module.flags = !{
-// PERSONALITY-SAME: !1
-// PERSONALITY:  !1 = !{i32 8, !"ptrauth-sign-personality", i32 1}
-
 // OFF-NOT: "ptrauth-

diff  --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h 
b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index a2a9e5d499e527..8eef45ce565deb 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -52,13 +52,7 @@ class TargetLoweringObjectFileELF : public 
TargetLoweringObjectFile {
   void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
   void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &DL,
-const MCSymbol *Sym,
-const MachineModuleInfo *MMI) const override;
-
-  virtual void emitPersonalityValueImpl(MCStreamer &Streamer,
-const DataLayout &DL,
-const MCSymbol *Sym,
-const MachineModuleInfo *MMI) const;
+const MCSymbol *Sym) const override;
 
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.

diff  --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h 
b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 4864ba843f4886..0c09cfe684783b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -82,8 +82,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
 
   virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
-const MCSymbol *Sym,
-const MachineModuleInfo *MMI) const;
+const MCSymbol *Sym) const;
 
   /// Emit the module-level metadata that the platform cares about.
   virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {}

diff  --git a/ll

82 matches

Mail list logo