[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
llvmbot wrote: @llvm/pr-subscribers-backend-arm Author: Pengcheng Wang (wangpc-pp) Changes To reduce compile time. This is a follow-up of #118787. --- Full diff: https://github.com/llvm/llvm-project/pull/119194.diff 5 Files Affected: - (modified) llvm/lib/CodeGen/MachineLICM.cpp (+3-1) - (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+1-1) - (modified) llvm/lib/CodeGen/MachineSink.cpp (+1-1) - (modified) llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (+1-2) - (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+2-2) ``diff diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index d21059189b1844..8aaa5605f28b70 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -123,6 +123,7 @@ namespace { const TargetRegisterInfo *TRI = nullptr; const MachineFrameInfo *MFI = nullptr; MachineRegisterInfo *MRI = nullptr; +RegisterClassInfo RegClassInfo; TargetSchedModel SchedModel; bool PreRegAlloc = false; bool HasProfileData = false; @@ -389,6 +390,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) { MFI = &MF.getFrameInfo(); MRI = &MF.getRegInfo(); SchedModel.init(&ST); + RegClassInfo.runOnMachineFunction(MF); HasProfileData = MF.getFunction().hasProfileData(); @@ -405,7 +407,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) { std::fill(RegPressure.begin(), RegPressure.end(), 0); RegLimit.resize(NumRPS); for (unsigned i = 0, e = NumRPS; i != e; ++i) - RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); + RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i); } if (HoistConstLoads) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 3ee0ba1fea5079..e2bbebfc5f5462 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1326,7 +1326,7 @@ class HighRegisterPressureDetector { // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); + PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); } // There are two patterns of last-use. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 7d0bedab7cdabc..d407d8a965ea67 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1094,7 +1094,7 @@ bool MachineSinking::registerPressureSetExceedsLimit( std::vector BBRegisterPressure = getBBRegisterPressure(MBB); for (; *PS != -1; PS++) if (Weight + BBRegisterPressure[*PS] >= -TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) +RegClassInfo.getRegPressureSetLimit(*PS)) return true; return false; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index e6b37dd9161685..8673deddb7057f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6936,7 +6936,6 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, RegClassInfo.runOnMachineFunction(*MF); RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(), EndLoop->getParent()->end(), false, false); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); bumpCrossIterationPressure(RPTracker, CrossIterationNeeds); @@ -6979,7 +6978,7 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, auto &P = RPTracker.getPressure().MaxSetPressure; for (unsigned I = 0, E = P.size(); I < E; ++I) -if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) { +if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) { return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 44f6db5061e21a..fa45a7fb7fabe6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( }; // For now we only care about float and double type fma. - unsigned VSSRCLimit = TRI->getRegPressureSetLimit( - *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); + unsigned VSSRCLimit = + RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC); // Only reduce register pressure when pressure is high. return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > `` https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
@@ -587,42 +587,48 @@ template */ -#include <__config> - -#include <__atomic/aliases.h> -#include <__atomic/atomic.h> -#include <__atomic/atomic_base.h> -#include <__atomic/atomic_flag.h> -#include <__atomic/atomic_init.h> -#include <__atomic/atomic_lock_free.h> -#include <__atomic/atomic_sync.h> -#include <__atomic/check_memory_order.h> -#include <__atomic/contention_t.h> -#include <__atomic/cxx_atomic_impl.h> -#include <__atomic/fence.h> -#include <__atomic/is_always_lock_free.h> -#include <__atomic/kill_dependency.h> -#include <__atomic/memory_order.h> -#include - -#if _LIBCPP_STD_VER >= 20 -# include <__atomic/atomic_ref.h> -#endif - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#if !_LIBCPP_HAS_ATOMIC_HEADER -# error is not implemented -#endif - -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -# include -# include -# include -#endif +#include <__configuration/cxx03.h> + +#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_USE_CXX03_HEADERS) +# include <__cxx03/algorithm> philnik777 wrote: I've checked the top level headers manually and fixed a few more. https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
wangpc-pp wrote: > Why do we need #118787 if we can just update the passes to use > RegisterClassInfo? Because the APIs are messy and confusing, we don't know if there will be some future users that use the raw limit directly. https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
llvmbot wrote: @llvm/pr-subscribers-backend-powerpc Author: Pengcheng Wang (wangpc-pp) Changes To reduce compile time. This is a follow-up of #118787. --- Full diff: https://github.com/llvm/llvm-project/pull/119194.diff 5 Files Affected: - (modified) llvm/lib/CodeGen/MachineLICM.cpp (+3-1) - (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+1-1) - (modified) llvm/lib/CodeGen/MachineSink.cpp (+1-1) - (modified) llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp (+1-2) - (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+2-2) ``diff diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index d21059189b1844..8aaa5605f28b70 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -123,6 +123,7 @@ namespace { const TargetRegisterInfo *TRI = nullptr; const MachineFrameInfo *MFI = nullptr; MachineRegisterInfo *MRI = nullptr; +RegisterClassInfo RegClassInfo; TargetSchedModel SchedModel; bool PreRegAlloc = false; bool HasProfileData = false; @@ -389,6 +390,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) { MFI = &MF.getFrameInfo(); MRI = &MF.getRegInfo(); SchedModel.init(&ST); + RegClassInfo.runOnMachineFunction(MF); HasProfileData = MF.getFunction().hasProfileData(); @@ -405,7 +407,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) { std::fill(RegPressure.begin(), RegPressure.end(), 0); RegLimit.resize(NumRPS); for (unsigned i = 0, e = NumRPS; i != e; ++i) - RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); + RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i); } if (HoistConstLoads) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 3ee0ba1fea5079..e2bbebfc5f5462 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1326,7 +1326,7 @@ class HighRegisterPressureDetector { // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); + PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); } // There are two patterns of last-use. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 7d0bedab7cdabc..d407d8a965ea67 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1094,7 +1094,7 @@ bool MachineSinking::registerPressureSetExceedsLimit( std::vector BBRegisterPressure = getBBRegisterPressure(MBB); for (; *PS != -1; PS++) if (Weight + BBRegisterPressure[*PS] >= -TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) +RegClassInfo.getRegPressureSetLimit(*PS)) return true; return false; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index e6b37dd9161685..8673deddb7057f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6936,7 +6936,6 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, RegClassInfo.runOnMachineFunction(*MF); RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(), EndLoop->getParent()->end(), false, false); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); bumpCrossIterationPressure(RPTracker, CrossIterationNeeds); @@ -6979,7 +6978,7 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, auto &P = RPTracker.getPressure().MaxSetPressure; for (unsigned I = 0, E = P.size(); I < E; ++I) -if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) { +if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) { return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 44f6db5061e21a..fa45a7fb7fabe6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( }; // For now we only care about float and double type fma. - unsigned VSSRCLimit = TRI->getRegPressureSetLimit( - *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); + unsigned VSSRCLimit = + RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC); // Only reduce register pressure when pressure is high. return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > `` https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
@@ -1827,232 +1827,147 @@ template */ -#include <__config> - -#include <__algorithm/adjacent_find.h> -#include <__algorithm/all_of.h> -#include <__algorithm/any_of.h> -#include <__algorithm/binary_search.h> -#include <__algorithm/copy.h> -#include <__algorithm/copy_backward.h> -#include <__algorithm/copy_if.h> -#include <__algorithm/copy_n.h> -#include <__algorithm/count.h> -#include <__algorithm/count_if.h> -#include <__algorithm/equal.h> -#include <__algorithm/equal_range.h> -#include <__algorithm/fill.h> -#include <__algorithm/fill_n.h> -#include <__algorithm/find.h> -#include <__algorithm/find_end.h> -#include <__algorithm/find_first_of.h> -#include <__algorithm/find_if.h> -#include <__algorithm/find_if_not.h> -#include <__algorithm/for_each.h> -#include <__algorithm/generate.h> -#include <__algorithm/generate_n.h> -#include <__algorithm/includes.h> -#include <__algorithm/inplace_merge.h> -#include <__algorithm/is_heap.h> -#include <__algorithm/is_heap_until.h> -#include <__algorithm/is_partitioned.h> -#include <__algorithm/is_permutation.h> -#include <__algorithm/is_sorted.h> -#include <__algorithm/is_sorted_until.h> -#include <__algorithm/iter_swap.h> -#include <__algorithm/lexicographical_compare.h> -#include <__algorithm/lower_bound.h> -#include <__algorithm/make_heap.h> -#include <__algorithm/max.h> -#include <__algorithm/max_element.h> -#include <__algorithm/merge.h> -#include <__algorithm/min.h> -#include <__algorithm/min_element.h> -#include <__algorithm/minmax.h> -#include <__algorithm/minmax_element.h> -#include <__algorithm/mismatch.h> -#include <__algorithm/move.h> -#include <__algorithm/move_backward.h> -#include <__algorithm/next_permutation.h> -#include <__algorithm/none_of.h> -#include <__algorithm/nth_element.h> -#include <__algorithm/partial_sort.h> -#include <__algorithm/partial_sort_copy.h> -#include <__algorithm/partition.h> -#include <__algorithm/partition_copy.h> -#include <__algorithm/partition_point.h> -#include <__algorithm/pop_heap.h> -#include <__algorithm/prev_permutation.h> -#include <__algorithm/push_heap.h> -#include <__algorithm/remove.h> -#include <__algorithm/remove_copy.h> -#include <__algorithm/remove_copy_if.h> -#include <__algorithm/remove_if.h> -#include <__algorithm/replace.h> -#include <__algorithm/replace_copy.h> -#include <__algorithm/replace_copy_if.h> -#include <__algorithm/replace_if.h> -#include <__algorithm/reverse.h> -#include <__algorithm/reverse_copy.h> -#include <__algorithm/rotate.h> -#include <__algorithm/rotate_copy.h> -#include <__algorithm/search.h> -#include <__algorithm/search_n.h> -#include <__algorithm/set_difference.h> -#include <__algorithm/set_intersection.h> -#include <__algorithm/set_symmetric_difference.h> -#include <__algorithm/set_union.h> -#include <__algorithm/shuffle.h> -#include <__algorithm/sort.h> -#include <__algorithm/sort_heap.h> -#include <__algorithm/stable_partition.h> -#include <__algorithm/stable_sort.h> -#include <__algorithm/swap_ranges.h> -#include <__algorithm/transform.h> -#include <__algorithm/unique.h> -#include <__algorithm/unique_copy.h> -#include <__algorithm/upper_bound.h> - -#if _LIBCPP_STD_VER >= 17 -# include <__algorithm/clamp.h> -# include <__algorithm/for_each_n.h> -# include <__algorithm/pstl.h> -# include <__algorithm/sample.h> -#endif // _LIBCPP_STD_VER >= 17 - -#if _LIBCPP_STD_VER >= 20 -# include <__algorithm/in_found_result.h> -# include <__algorithm/in_fun_result.h> -# include <__algorithm/in_in_out_result.h> -# include <__algorithm/in_in_result.h> -# include <__algorithm/in_out_out_result.h> -# include <__algorithm/in_out_result.h> -# include <__algorithm/lexicographical_compare_three_way.h> -# include <__algorithm/min_max_result.h> -# include <__algorithm/ranges_adjacent_find.h> -# include <__algorithm/ranges_all_of.h> -# include <__algorithm/ranges_any_of.h> -# include <__algorithm/ranges_binary_search.h> -# include <__algorithm/ranges_clamp.h> -# include <__algorithm/ranges_contains.h> -# include <__algorithm/ranges_copy.h> -# include <__algorithm/ranges_copy_backward.h> -# include <__algorithm/ranges_copy_if.h> -# include <__algorithm/ranges_copy_n.h> -# include <__algorithm/ranges_count.h> -# include <__algorithm/ranges_count_if.h> -# include <__algorithm/ranges_equal.h> -# include <__algorithm/ranges_equal_range.h> -# include <__algorithm/ranges_fill.h> -# include <__algorithm/ranges_fill_n.h> -# include <__algorithm/ranges_find.h> -# include <__algorithm/ranges_find_end.h> -# include <__algorithm/ranges_find_first_of.h> -# include <__algorithm/ranges_find_if.h> -# include <__algorithm/ranges_find_if_not.h> -# include <__algorithm/ranges_for_each.h> -# include <__algorithm/ranges_for_each_n.h> -# include <__algorithm/ranges_generate.h> -# include <__algorithm/ranges_generate_n.h> -# include <__algorithm/ranges_includes.h> -# include <__algorithm/ranges_inplace_merge.h> -# include <__algorithm/ranges_is_heap.h> -# include <__a
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1416,14 +1466,14 @@ void VPlanTransforms::addActiveLaneMask( auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), [](VPUser *U) { return isa(U); }); - assert(FoundWidenCanonicalIVUser && + assert(FoundWidenCanonicalIVUser && *FoundWidenCanonicalIVUser && MacDue wrote: This looks a little odd. Doesn't `find_if` return an iterator? ```suggestion auto IVUsers = Plan.getCanonicalIV()->users(); /// ... assert(FoundWidenCanonicalIVUser != IVUsers.end() && "Must have widened canonical IV when tail folding!"); ``` https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -77,9 +77,13 @@ struct VPlanTransforms { /// creation) and instead it is handled using active-lane-mask. \p /// DataAndControlFlowWithoutRuntimeCheck implies \p /// UseActiveLaneMaskForControlFlow. + /// RTChecks refers to the pointer pairs that need aliasing elements to be + /// masked off each loop iteration. MacDue wrote: No docs for PSE? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -3073,6 +3075,56 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { } }; +// Given a pointer A that is being stored to, and pointer B that is being +// read from, both with unknown lengths, create a mask that disables +// elements which could overlap across a loop iteration. For example, if A +// is X and B is X + 2 with VF being 4, only the final two elements of the +// loaded vector can be stored since they don't overlap with the stored +// vector. %b.vec = load %b ; = [s, t, u, v] +// [...] +// store %a, %b.vec ; only u and v can be stored as their addresses don't +// overlap with %a + (VF - 1) MacDue wrote: This is specifically RAW? Of something like: ``` store A[x] load A[x + 2] ``` Perhaps I'm muddled on what "final two elements" means, but isn't the first two elements store that is valid (so it won't overwrite the elements for the load)? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -427,6 +428,29 @@ Value *VPInstruction::generate(VPTransformState &State) { {PredTy, ScalarTC->getType()}, {VIVElem0, ScalarTC}, nullptr, Name); } + // Count the number of bits set in each lane and reduce the result to a scalar + case VPInstruction::PopCount: { +Value *Op = State.get(getOperand(0)); +auto *VT = Op->getType(); MacDue wrote: nit: Spell out type if it's not present on the RHS. ```suggestion Type *VT = Op->getType(); ``` https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1300,14 +1301,38 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( cast(CanonicalIVPHI->getBackedgeValue()); // TODO: Check if dropping the flags is needed if // !DataAndControlFlowWithoutRuntimeCheck. + VPValue *IncVal = CanonicalIVIncrement->getOperand(1); + assert(IncVal != CanonicalIVPHI && "Unexpected operand order"); + CanonicalIVIncrement->dropPoisonGeneratingFlags(); DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); + // We can't use StartV directly in the ActiveLaneMask VPInstruction, since // we have to take unrolling into account. Each part needs to start at // Part * VF auto *VecPreheader = Plan.getVectorPreheader(); VPBuilder Builder(VecPreheader); + // Create an alias mask for each possibly-aliasing pointer pair. If there + // are multiple they are combined together with ANDs. + VPValue *AliasMask = nullptr; + + for (auto C : RTChecks) { +// FIXME: How to pass this info back? +//HasAliasMask = true; MacDue wrote: This FIXME is a little unclear. Does it mean `HasAliasMask` should be set here but it's not? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -195,6 +195,13 @@ enum class TailFoldingStyle { DataWithEVL, }; +enum class RTCheckStyle { + /// Branch to scalar loop if checks fails at runtime. + ScalarFallback, + /// Form a mask based on elements which won't be a WAR or RAW hazard MacDue wrote: ultra nit: One of these comments ends with a full-stop and the other does not. ```suggestion /// Branch to scalar loop if checks fails at runtime. ScalarFallback, /// Form a mask based on elements which won't be a WAR or RAW hazard. ``` https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1331,14 +1356,37 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( "index.part.next"); // Create the active lane mask instruction in the VPlan preheader. - auto *EntryALM = + VPValue *Mask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC}, DL, "active.lane.mask.entry"); // Now create the ActiveLaneMaskPhi recipe in the main loop using the // preheader ActiveLaneMask instruction. - auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc()); + auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(Mask, DebugLoc()); LaneMaskPhi->insertAfter(CanonicalIVPHI); + VPValue *LaneMask = LaneMaskPhi; + if (AliasMask) { +// Increment phi by correct amount. +Builder.setInsertPoint(CanonicalIVIncrement); + +VPValue *IncrementBy = Builder.createNaryOp(VPInstruction::PopCount, +{AliasMask}, DL, "popcount"); +Type *IVType = CanonicalIVPHI->getScalarType(); + +if (IVType->getScalarSizeInBits() < 64) { + auto *Cast = + new VPScalarCastRecipe(Instruction::Trunc, IncrementBy, IVType); + Cast->insertAfter(IncrementBy->getDefiningRecipe()); + IncrementBy = Cast; +} +CanonicalIVIncrement->setOperand(1, IncrementBy); + +// And the alias mask so the iteration only processes non-aliasing lanes +Builder.setInsertPoint(CanonicalIVPHI->getParent(), + CanonicalIVPHI->getParent()->getFirstNonPhi()); +LaneMask = Builder.createNaryOp(Instruction::BinaryOps::And, +{LaneMaskPhi, AliasMask}, DL); MacDue wrote: Do we know this AND won't be all-false? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPAliasLaneMaskRecipe::execute(VPTransformState &State) { + IRBuilderBase Builder = State.Builder; + Value *SinkValue = State.get(getSinkValue(), true); + Value *SourceValue = State.get(getSourceValue(), true); + + auto *Type = SinkValue->getType(); + Value *AliasMask = Builder.CreateIntrinsic( + Intrinsic::experimental_get_alias_lane_mask, + {VectorType::get(Builder.getInt1Ty(), State.VF), Type, + Builder.getInt64Ty()}, + {SourceValue, SinkValue, Builder.getInt64(getAccessedElementSize()), + Builder.getInt1(WriteAfterRead)}, + nullptr, "alias.lane.mask"); + State.set(this, AliasMask, /*IsScalar=*/false); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPAliasLaneMaskRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = alias lane mask "; MacDue wrote: nit: These seem more commonly printed in all caps with hyphens. ```suggestion O << " = ALIAS-LANE-MASK "; ``` https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -952,7 +952,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); // FIXME: Model VF * UF computation completely in VPlan. - assert(VFxUF.getNumUsers() && "VFxUF expected to always have users"); MacDue wrote: How does removing this assert relate to these changes? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Eliminate DISubprogram set from BuildDebugInfoMDMap (PR #118625)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118625 >From 160c6fe1ef922f9edf9ff1f5ac6610444a4f6711 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Sat, 14 Sep 2024 16:02:51 -0700 Subject: [PATCH] [NFC][Utils] Eliminate DISubprogram set from BuildDebugInfoMDMap Summary: Previously, we'd add all SPs distinct from the cloned one into a set. Then when cloning a local scope we'd check if it's from one of those 'distinct' SPs by checking if it's in the set. We don't need to do that. We can just check against the cloned SP directly and drop the set. Test Plan: ninja check-llvm-unit check-llvm stack-info: PR: https://github.com/llvm/llvm-project/pull/118625, branch: users/artempyanykh/fast-coro-upstream/6 --- llvm/lib/Transforms/Utils/CloneFunction.cpp | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index cf4b1c7a045e05..34400d45aa6e72 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -172,18 +172,15 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD, }; // Avoid cloning types, compile units, and (other) subprograms. -SmallPtrSet MappedToSelfSPs; for (DISubprogram *ISP : DIFinder.subprograms()) { - if (ISP != SPClonedWithinModule) { + if (ISP != SPClonedWithinModule) mapToSelfIfNew(ISP); -MappedToSelfSPs.insert(ISP); - } } // If a subprogram isn't going to be cloned skip its lexical blocks as well. for (DIScope *S : DIFinder.scopes()) { auto *LScope = dyn_cast(S); - if (LScope && MappedToSelfSPs.count(LScope->getSubprogram())) + if (LScope && LScope->getSubprogram() != SPClonedWithinModule) mapToSelfIfNew(S); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Utils] Identity map global debug info on first use in CloneFunction* (PR #118627)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118627 >From 6ee8c03c37cc47c2ae21532708982d94a9c71530 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Sun, 15 Sep 2024 04:39:20 -0700 Subject: [PATCH] [Utils] Identity map global debug info on first use in CloneFunction* Summary: To avoid cloning 'global' debug info, CloneFunction implementation used to eagerly identity map a known subset of global debug into into ValueMap's MD map. In larger modules with meaningful volume of debug info this gets very expensive. By passing such global metadata via an IdentityMD set for the ValueMapper to map on first use, we get several benefits: 1. Mapping metadata is not cheap, particularly because of tracking. When cloning a Function we identity map lots of global module-level metadata to avoid cloning it, while only a fraction of it is actually used by the function. Mapping on first use is a lot faster for modules with meaningful amount of debug info. 2. Eagerly identity mapping metadata makes it harder to cache module-level data (e.g. a set of metadata nodes in a \a DICompileUnit). With this patch we can cache certain module-level metadata calculations to speed things up further. Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up CoroSplitPass which is one of the heavier users of cloning: | | Baseline | IdentityMD set | |-+--+| | CoroSplitPass | 306ms| 221ms | | CoroCloner | 101ms| 72ms | |-+--+| | Speed up| 1x | 1.4x | Test Plan: ninja check-llvm-unit ninja check-llvm stack-info: PR: https://github.com/llvm/llvm-project/pull/118627, branch: users/artempyanykh/fast-coro-upstream/8 --- llvm/include/llvm/Transforms/Utils/Cloning.h | 19 +++--- .../llvm/Transforms/Utils/ValueMapper.h | 67 ++- llvm/lib/Transforms/Utils/CloneFunction.cpp | 59 llvm/lib/Transforms/Utils/ValueMapper.cpp | 19 -- 4 files changed, 103 insertions(+), 61 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 50518c746d11ca..9b256f9b4d6890 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -192,7 +192,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, RemapFlags RemapFlag, ValueMapTypeRemapper *TypeMapper = nullptr, - ValueMaterializer *Materializer = nullptr); + ValueMaterializer *Materializer = nullptr, + const MetadataSetTy *IdentityMD = nullptr); /// Clone OldFunc's body into NewFunc. void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc, @@ -201,7 +202,8 @@ void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr, ValueMapTypeRemapper *TypeMapper = nullptr, - ValueMaterializer *Materializer = nullptr); + ValueMaterializer *Materializer = nullptr, + const MetadataSetTy *IdentityMD = nullptr); void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, @@ -241,13 +243,12 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F, CloneFunctionChangeType Changes, DebugInfoFinder &DIFinder); -/// Build a map of debug info to use during Metadata cloning. -/// Returns true if cloning would need module level changes and false if there -/// would only be local changes. -bool BuildDebugInfoMDMap(DenseMap &MD, - CloneFunctionChangeType Changes, - DebugInfoFinder &DIFinder, - DISubprogram *SPClonedWithinModule); +/// Based on \p Changes and \p DIFinder populate \p MD with debug info that +/// needs to be identity mapped during Metadata cloning. +void FindDebugInfoToIdentityMap(MetadataSetTy &MD, +CloneFunctionChangeType Changes, +DebugInfoFinder &DIFinder, +DISubprogram *SPClonedWithinModule); /// This class captures the data input to the InlineFunction call, and records /// the auxiliary results produced by it. diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h index 743cfeb7ef3
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118624 >From d5ed40564f2259c84b657997afbec238f81a2312 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Thu, 12 Sep 2024 15:50:25 -0700 Subject: [PATCH] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto Summary: This and previously extracted `CloneFunction*Into` functions will be used in later diffs. Test Plan: ninja check-llvm-unit check-llvm stack-info: PR: https://github.com/llvm/llvm-project/pull/118624, branch: users/artempyanykh/fast-coro-upstream/5 --- llvm/include/llvm/Transforms/Utils/Cloning.h | 34 --- llvm/lib/Transforms/Utils/CloneFunction.cpp | 96 +++- 2 files changed, 76 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 9a574fc4e4c08e..50518c746d11ca 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -194,6 +194,15 @@ void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's body into NewFunc. +void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, RemapFlags RemapFlag, + SmallVectorImpl &Returns, + const char *NameSuffix = "", + ClonedCodeInfo *CodeInfo = nullptr, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, @@ -214,7 +223,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, /// void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl &Returns, + SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr); @@ -361,32 +370,31 @@ void updateProfileCallee( /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified /// basic blocks and extract their scope. These are candidates for duplication /// when cloning. -void identifyNoAliasScopesToClone( -ArrayRef BBs, SmallVectorImpl &NoAliasDeclScopes); +void identifyNoAliasScopesToClone(ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes); /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified /// instruction range and extract their scope. These are candidates for /// duplication when cloning. -void identifyNoAliasScopesToClone( -BasicBlock::iterator Start, BasicBlock::iterator End, -SmallVectorImpl &NoAliasDeclScopes); +void identifyNoAliasScopesToClone(BasicBlock::iterator Start, + BasicBlock::iterator End, + SmallVectorImpl &NoAliasDeclScopes); /// Duplicate the specified list of noalias decl scopes. /// The 'Ext' string is added as an extension to the name. /// Afterwards, the ClonedScopes contains the mapping of the original scope /// MDNode onto the cloned scope. /// Be aware that the cloned scopes are still part of the original scope domain. -void cloneNoAliasScopes( -ArrayRef NoAliasDeclScopes, -DenseMap &ClonedScopes, -StringRef Ext, LLVMContext &Context); +void cloneNoAliasScopes(ArrayRef NoAliasDeclScopes, +DenseMap &ClonedScopes, +StringRef Ext, LLVMContext &Context); /// Adapt the metadata for the specified instruction according to the /// provided mapping. This is normally used after cloning an instruction, when /// some noalias scopes needed to be cloned. -void adaptNoAliasScopes( -llvm::Instruction *I, const DenseMap &ClonedScopes, -LLVMContext &Context); +void adaptNoAliasScopes(llvm::Instruction *I, +const DenseMap &ClonedScopes, +LLVMContext &Context); /// Clone the specified noalias decl scopes. Then adapt all instructions in the /// NewBlocks basicblocks to the cloned versions. diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index c967e78123af1f..cf4b1c7a045e05 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -216,6 +216,59 @@ void llvm::CloneFunctionMetadataInto(Function *NewFunc, const
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118623 >From 8ce25caffaeb4cd3e361184eeea8d9ae5675a6be Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Thu, 12 Sep 2024 15:35:38 -0700 Subject: [PATCH] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto Summary: The new API expects the caller to populate the VMap. We need it this way for a subsequent change around coroutine cloning. Test Plan: ninja check-llvm-unit check-llvm stack-info: PR: https://github.com/llvm/llvm-project/pull/118623, branch: users/artempyanykh/fast-coro-upstream/4 --- llvm/include/llvm/Transforms/Utils/Cloning.h | 12 + llvm/lib/Transforms/Utils/CloneFunction.cpp | 28 +--- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 7858c9d9def0da..9a574fc4e4c08e 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -182,6 +182,18 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's metadata into NewFunc. +/// +/// The caller is expected to populate \p VMap beforehand and set an appropriate +/// \p RemapFlag. +/// +/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a different +/// module. Use CloneFunctionInto for that behavior. +void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, RemapFlags RemapFlag, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 6dc5f601b7fcaa..c967e78123af1f 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -200,6 +200,22 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD, return ModuleLevelChanges; } +void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + RemapFlags RemapFlag, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + // Duplicate the metadata that is attached to the cloned function. + // Subprograms/CUs/types that were already mapped to themselves won't be + // duplicated. + SmallVector, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) { +NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag, +TypeMapper, Materializer)); + } +} + // Clone OldFunc into NewFunc, transforming the old arguments into references to // VMap values. void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, @@ -262,15 +278,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule); const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges; - // Duplicate the metadata that is attached to the cloned function. - // Subprograms/CUs/types that were already mapped to themselves won't be - // duplicated. - SmallVector, 1> MDs; - OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) { -NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag, -TypeMapper, Materializer)); - } + + CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper, +Materializer); // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPAliasLaneMaskRecipe::execute(VPTransformState &State) { + IRBuilderBase Builder = State.Builder; + Value *SinkValue = State.get(getSinkValue(), true); + Value *SourceValue = State.get(getSourceValue(), true); + + auto *Type = SinkValue->getType(); MacDue wrote: nit: ```suggestion Type *PtrType = SinkValue->getType(); ``` https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a global debug info set and share it between all coroutine clones (PR #118628)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118628 >From 77892ea9851cee330ac55eded4758b5c9ad4dbf8 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Tue, 19 Nov 2024 17:19:27 -0700 Subject: [PATCH] [Coro] Prebuild a global debug info set and share it between all coroutine clones Summary: CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building a list of global debug info metadata. For programs compiled with full debug info this gets very expensive. This diff builds the data once and shares it between all clones. Anecdata for a sample cpp source file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) | |-+--++--| | CoroSplitPass | 306ms| 221ms | 68ms | | CoroCloner | 101ms| 72ms | 0.5ms| | CollectGlobalDI | -| - | 63ms | |-+--++--| | Speed up| 1x | 1.4x | 4.5x | Note that CollectGlobalDI happens once *per coroutine* rather than per clone. Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample internal source file, checked time trace output for scope timings. stack-info: PR: https://github.com/llvm/llvm-project/pull/118628, branch: users/artempyanykh/fast-coro-upstream/9 --- llvm/lib/Transforms/Coroutines/CoroCloner.h | 29 ++- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 51 +--- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index d1887980fb3bcb..e7121d26bd08f3 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -48,6 +48,7 @@ class BaseCloner { CloneKind FKind; IRBuilder<> Builder; TargetTransformInfo &TTI; + const MetadataSetTy &GlobalDebugInfo; ValueToValueMapTy VMap; Function *NewF = nullptr; @@ -60,12 +61,12 @@ class BaseCloner { /// Create a cloner for a continuation lowering. BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, Function *NewF, AnyCoroSuspendInst *ActiveSuspend, - TargetTransformInfo &TTI) + TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo) : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(Shape.ABI == ABI::Async ? CloneKind::Async : CloneKind::Continuation), -Builder(OrigF.getContext()), TTI(TTI), NewF(NewF), -ActiveSuspend(ActiveSuspend) { +Builder(OrigF.getContext()), TTI(TTI), GlobalDebugInfo(GlobalDebugInfo), +NewF(NewF), ActiveSuspend(ActiveSuspend) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || Shape.ABI == ABI::Async); assert(NewF && "need existing function for continuation"); @@ -74,9 +75,11 @@ class BaseCloner { public: BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, - CloneKind FKind, TargetTransformInfo &TTI) + CloneKind FKind, TargetTransformInfo &TTI, + const MetadataSetTy &GlobalDebugInfo) : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind), -Builder(OrigF.getContext()), TTI(TTI) {} +Builder(OrigF.getContext()), TTI(TTI), +GlobalDebugInfo(GlobalDebugInfo) {} virtual ~BaseCloner() {} @@ -84,12 +87,14 @@ class BaseCloner { static Function *createClone(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, Function *NewF, AnyCoroSuspendInst *ActiveSuspend, - TargetTransformInfo &TTI) { + TargetTransformInfo &TTI, + const MetadataSetTy &GlobalDebugInfo) { assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce || Shape.ABI == ABI::Async); TimeTraceScope FunctionScope("BaseCloner"); -BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI); +BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI, + GlobalDebugInfo); Cloner.create(); return Cloner.getFunction(); } @@ -129,8 +134,9 @@ class SwitchCloner : public BaseCloner { protected: /// Create a cloner for a switch lowering. SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, - CloneKind FKind, TargetTransformInfo &TTI) - : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {} + CloneKind FKind, TargetTransformInfo &TTI, + const MetadataSetTy &GlobalDebugInfo) + : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo) {} void
[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118629 >From c32803f9d01e7c2b733d38db57805fcc398bb54a Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Sun, 15 Sep 2024 10:51:38 -0700 Subject: [PATCH] [Analysis] Add DebugInfoCache analysis Summary: The analysis simply primes and caches DebugInfoFinders for each DICompileUnit in a module. This allows (future) callers like CoroSplitPass to compute global debug info metadata (required for coroutine function cloning) much faster. Specifically, pay the price of DICompileUnit processing only once per compile unit, rather than once per coroutine. Test Plan: Added a smoke test for the new analysis ninja check-llvm-unit check-llvm stack-info: PR: https://github.com/llvm/llvm-project/pull/118629, branch: users/artempyanykh/fast-coro-upstream/10 --- llvm/include/llvm/Analysis/DebugInfoCache.h | 50 + llvm/include/llvm/IR/DebugInfo.h | 4 +- llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/DebugInfoCache.cpp | 47 llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/unittests/Analysis/CMakeLists.txt| 1 + .../unittests/Analysis/DebugInfoCacheTest.cpp | 211 ++ 8 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/Analysis/DebugInfoCache.h create mode 100644 llvm/lib/Analysis/DebugInfoCache.cpp create mode 100644 llvm/unittests/Analysis/DebugInfoCacheTest.cpp diff --git a/llvm/include/llvm/Analysis/DebugInfoCache.h b/llvm/include/llvm/Analysis/DebugInfoCache.h new file mode 100644 index 00..dbd6802c99ea01 --- /dev/null +++ b/llvm/include/llvm/Analysis/DebugInfoCache.h @@ -0,0 +1,50 @@ +//===- llvm/Analysis/DebugInfoCache.h - debug info cache *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains an analysis that builds a cache of debug info for each +// DICompileUnit in a module. +// +//===--===// + +#ifndef LLVM_ANALYSIS_DEBUGINFOCACHE_H +#define LLVM_ANALYSIS_DEBUGINFOCACHE_H + +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Processes and caches debug info for each DICompileUnit in a module. +/// +/// The result of the analysis is a set of DebugInfoFinders primed on their +/// respective DICompileUnit. Such DebugInfoFinders can be used to speed up +/// function cloning which otherwise requires an expensive traversal of +/// DICompileUnit-level debug info. See an example usage in CoroSplit. +class DebugInfoCache { +public: + using DIFinderCache = SmallDenseMap; + DIFinderCache Result; + + DebugInfoCache(const Module &M); + + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &); +}; + +class DebugInfoCacheAnalysis +: public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = DebugInfoCache; + Result run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 73f45c3769be44..11907fbb7f20b3 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -120,11 +120,13 @@ class DebugInfoFinder { /// Process subprogram. void processSubprogram(DISubprogram *SP); + /// Process a compile unit. + void processCompileUnit(DICompileUnit *CU); + /// Clear all lists. void reset(); private: - void processCompileUnit(DICompileUnit *CU); void processScope(DIScope *Scope); void processType(DIType *DT); bool addCompileUnit(DICompileUnit *CU); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 0db5b80f336cb5..db9a569e301563 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_component_library(LLVMAnalysis DDGPrinter.cpp ConstraintSystem.cpp Delinearization.cpp + DebugInfoCache.cpp DemandedBits.cpp DependenceAnalysis.cpp DependenceGraphBuilder.cpp diff --git a/llvm/lib/Analysis/DebugInfoCache.cpp b/llvm/lib/Analysis/DebugInfoCache.cpp new file mode 100644 index 00..c1a3e89f0a6ccf --- /dev/null +++ b/llvm/lib/Analysis/DebugInfoCache.cpp @@ -0,0 +1,47 @@ +//===- llvm/Analysis/DebugInfoCache.cpp - debug info cache ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-
[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118630 >From 33b2a6aafbaec530f1dc31ad99d5fb6192849386 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Sun, 15 Sep 2024 11:00:00 -0700 Subject: [PATCH] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass Summary: We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed up collection of global debug info. The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done incrementally on top of this. Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample cpp file compiled with full debug info: | | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) | |-+--++---+---| | CoroSplitPass | 306ms| 221ms | 68ms | 17ms | | CoroCloner | 101ms| 72ms | 0.5ms | 0.5ms | | CollectGlobalDI | -| - | 63ms | 13ms | |-+--++---+---| | Speed up| 1x | 1.4x | 4.5x | 18x | Test Plan: ninja check-llvm-unit ninja check-llvm Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes. stack-info: PR: https://github.com/llvm/llvm-project/pull/118630, branch: users/artempyanykh/fast-coro-upstream/11 --- llvm/include/llvm/Transforms/Coroutines/ABI.h | 13 +++-- llvm/lib/Analysis/CGSCCPassManager.cpp| 7 +++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 56 +++ llvm/test/Other/new-pass-manager.ll | 1 + llvm/test/Other/new-pm-defaults.ll| 1 + llvm/test/Other/new-pm-lto-defaults.ll| 1 + llvm/test/Other/new-pm-pgo-preinline.ll | 1 + .../Other/new-pm-thinlto-postlink-defaults.ll | 1 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 1 + ...-pm-thinlto-postlink-samplepgo-defaults.ll | 1 + .../Other/new-pm-thinlto-prelink-defaults.ll | 1 + .../new-pm-thinlto-prelink-pgo-defaults.ll| 1 + ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 1 + .../Analysis/CGSCCPassManagerTest.cpp | 4 +- 14 files changed, 73 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Transforms/Coroutines/ABI.h b/llvm/include/llvm/Transforms/Coroutines/ABI.h index 0b2d405f3caec4..2cf614b6bb1e2a 100644 --- a/llvm/include/llvm/Transforms/Coroutines/ABI.h +++ b/llvm/include/llvm/Transforms/Coroutines/ABI.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_COROUTINES_ABI_H #define LLVM_TRANSFORMS_COROUTINES_ABI_H +#include "llvm/Analysis/DebugInfoCache.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Coroutines/CoroShape.h" #include "llvm/Transforms/Coroutines/MaterializationUtils.h" @@ -53,7 +54,8 @@ class BaseABI { // Perform the function splitting according to the ABI. virtual void splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, - TargetTransformInfo &TTI) = 0; + TargetTransformInfo &TTI, + const DebugInfoCache *DICache) = 0; Function &F; coro::Shape &Shape; @@ -73,7 +75,8 @@ class SwitchABI : public BaseABI { void splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, - TargetTransformInfo &TTI) override; + TargetTransformInfo &TTI, + const DebugInfoCache *DICache) override; }; class AsyncABI : public BaseABI { @@ -86,7 +89,8 @@ class AsyncABI : public BaseABI { void splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, - TargetTransformInfo &TTI) override; + TargetTransformInfo &TTI, + const DebugInfoCache *DICache) override; }; class AnyRetconABI : public BaseABI { @@ -99,7 +103,8 @@ class AnyRetconABI : public BaseABI { void splitCoroutine(Function &F, coro::Shape &Shape, SmallVectorImpl &Clones, - TargetTransformInfo &TTI) override; + TargetTransformInfo &TTI, + const DebugInfoCache *DICache) override; }; } // end namespace coro diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index 948bc2435ab275..3ba085cdb0be8b 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallPtrSet.h" #in
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/119194 >From b0d87f2a2e0ab0a13bdd85d5406451534e79ba8d Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 9 Dec 2024 19:18:06 +0800 Subject: [PATCH] Rewrite uses in AM/PPC targets Created using spr 1.3.6-beta.1 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 +-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index e6b37dd9161685..8673deddb7057f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6936,7 +6936,6 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, RegClassInfo.runOnMachineFunction(*MF); RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(), EndLoop->getParent()->end(), false, false); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); bumpCrossIterationPressure(RPTracker, CrossIterationNeeds); @@ -6979,7 +6978,7 @@ bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, auto &P = RPTracker.getPressure().MaxSetPressure; for (unsigned I = 0, E = P.size(); I < E; ++I) -if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) { +if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) { return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 44f6db5061e21a..fa45a7fb7fabe6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -643,8 +643,8 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( }; // For now we only care about float and double type fma. - unsigned VSSRCLimit = TRI->getRegPressureSetLimit( - *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); + unsigned VSSRCLimit = + RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC); // Only reduce register pressure when pressure is high. return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
llvmbot wrote: @llvm/pr-subscribers-llvm-regalloc Author: Akshat Oke (optimisan) Changes Makes Inline Spiller amenable for the new PM. --- Full diff: https://github.com/llvm/llvm-project/pull/119181.diff 5 Files Affected: - (modified) llvm/include/llvm/CodeGen/Spiller.h (+14-2) - (modified) llvm/lib/CodeGen/InlineSpiller.cpp (+14-22) - (modified) llvm/lib/CodeGen/RegAllocBasic.cpp (+11-5) - (modified) llvm/lib/CodeGen/RegAllocGreedy.cpp (+3-1) - (modified) llvm/lib/CodeGen/RegAllocPBQP.cpp (+4-1) ``diff diff --git a/llvm/include/llvm/CodeGen/Spiller.h b/llvm/include/llvm/CodeGen/Spiller.h index 51ad36bc6b1f8b..3132cefeb6c68a 100644 --- a/llvm/include/llvm/CodeGen/Spiller.h +++ b/llvm/include/llvm/CodeGen/Spiller.h @@ -19,6 +19,10 @@ class MachineFunction; class MachineFunctionPass; class VirtRegMap; class VirtRegAuxInfo; +class LiveIntervals; +class LiveStacks; +class MachineDominatorTree; +class MachineBlockFrequencyInfo; /// Spiller interface. /// @@ -41,12 +45,20 @@ class Spiller { virtual ArrayRef getReplacedRegs() = 0; virtual void postOptimization() {} + + struct RequiredAnalyses { +LiveIntervals &LIS; +LiveStacks &LSS; +MachineDominatorTree &MDT; +const MachineBlockFrequencyInfo &MBFI; + }; }; /// Create and return a spiller that will insert spill code directly instead /// of deferring though VirtRegMap. -Spiller *createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, - VirtRegMap &VRM, VirtRegAuxInfo &VRAI); +Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, + MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI); } // end namespace llvm diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 64f290f5930a1b..b9768d5c63a5d1 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -75,7 +75,6 @@ RestrictStatepointRemat("restrict-statepoint-remat", cl::desc("Restrict remat for statepoint operands")); namespace { - class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineFunction &MF; LiveIntervals &LIS; @@ -128,15 +127,11 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { DenseMap &SpillsToIns); public: - HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : MF(mf), LIS(pass.getAnalysis().getLIS()), -LSS(pass.getAnalysis().getLS()), -MDT(pass.getAnalysis().getDomTree()), + HoistSpillHelper(const Spiller::RequiredAnalyses &Analyses, + MachineFunction &mf, VirtRegMap &vrm) + : MF(mf), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), -TRI(*mf.getSubtarget().getRegisterInfo()), -MBFI( - pass.getAnalysis().getMBFI()), +TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI), IPA(LIS, mf.getNumBlockIDs()) {} void addToMergeableSpills(MachineInstr &Spill, int StackSlot, @@ -190,16 +185,12 @@ class InlineSpiller : public Spiller { ~InlineSpiller() override = default; public: - InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, -VirtRegAuxInfo &VRAI) - : MF(MF), LIS(Pass.getAnalysis().getLIS()), -LSS(Pass.getAnalysis().getLS()), -MDT(Pass.getAnalysis().getDomTree()), + InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, +VirtRegMap &VRM, VirtRegAuxInfo &VRAI) + : MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), -TRI(*MF.getSubtarget().getRegisterInfo()), -MBFI( - Pass.getAnalysis().getMBFI()), -HSpiller(Pass, MF, VRM), VRAI(VRAI) {} +TRI(*MF.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI), +HSpiller(Analyses, MF, VRM), VRAI(VRAI) {} void spill(LiveRangeEdit &) override; ArrayRef getSpilledRegs() override { return RegsToSpill; } @@ -237,10 +228,11 @@ Spiller::~Spiller() = default; void Spiller::anchor() {} -Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass, - MachineFunction &MF, VirtRegMap &VRM, - VirtRegAuxInfo &VRAI) { - return new InlineSpiller(Pass, MF, VRM, VRAI); +Spiller * +llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses, + MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI) { + return new InlineSpiller(Analyses, MF, VRM, VRAI); } //===--===// diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/li
[llvm-branch-commits] [llvm] Spiller: Deatach legacy pass and supply analyses instead (PR #119181)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#119181** https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/119181?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#118462** https://app.graphite.dev/github/pr/llvm/llvm-project/118462?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#117309** https://app.graphite.dev/github/pr/llvm/llvm-project/117309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Deatach legacy pass and supply analyses instead (PR #119181)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/119181 None >From 5b4e72ffece91f7ee370cb484667aa13742b9dae Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 9 Dec 2024 07:58:48 + Subject: [PATCH] Spiller: Deatach legacy pass and supply analyses instead --- llvm/include/llvm/CodeGen/Spiller.h | 16 +++-- llvm/lib/CodeGen/InlineSpiller.cpp | 36 +++-- llvm/lib/CodeGen/RegAllocBasic.cpp | 16 + llvm/lib/CodeGen/RegAllocGreedy.cpp | 4 +++- llvm/lib/CodeGen/RegAllocPBQP.cpp | 5 +++- 5 files changed, 46 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/CodeGen/Spiller.h b/llvm/include/llvm/CodeGen/Spiller.h index 51ad36bc6b1f8b..3132cefeb6c68a 100644 --- a/llvm/include/llvm/CodeGen/Spiller.h +++ b/llvm/include/llvm/CodeGen/Spiller.h @@ -19,6 +19,10 @@ class MachineFunction; class MachineFunctionPass; class VirtRegMap; class VirtRegAuxInfo; +class LiveIntervals; +class LiveStacks; +class MachineDominatorTree; +class MachineBlockFrequencyInfo; /// Spiller interface. /// @@ -41,12 +45,20 @@ class Spiller { virtual ArrayRef getReplacedRegs() = 0; virtual void postOptimization() {} + + struct RequiredAnalyses { +LiveIntervals &LIS; +LiveStacks &LSS; +MachineDominatorTree &MDT; +const MachineBlockFrequencyInfo &MBFI; + }; }; /// Create and return a spiller that will insert spill code directly instead /// of deferring though VirtRegMap. -Spiller *createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, - VirtRegMap &VRM, VirtRegAuxInfo &VRAI); +Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, + MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI); } // end namespace llvm diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 64f290f5930a1b..b9768d5c63a5d1 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -75,7 +75,6 @@ RestrictStatepointRemat("restrict-statepoint-remat", cl::desc("Restrict remat for statepoint operands")); namespace { - class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineFunction &MF; LiveIntervals &LIS; @@ -128,15 +127,11 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { DenseMap &SpillsToIns); public: - HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : MF(mf), LIS(pass.getAnalysis().getLIS()), -LSS(pass.getAnalysis().getLS()), -MDT(pass.getAnalysis().getDomTree()), + HoistSpillHelper(const Spiller::RequiredAnalyses &Analyses, + MachineFunction &mf, VirtRegMap &vrm) + : MF(mf), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), -TRI(*mf.getSubtarget().getRegisterInfo()), -MBFI( - pass.getAnalysis().getMBFI()), +TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI), IPA(LIS, mf.getNumBlockIDs()) {} void addToMergeableSpills(MachineInstr &Spill, int StackSlot, @@ -190,16 +185,12 @@ class InlineSpiller : public Spiller { ~InlineSpiller() override = default; public: - InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, -VirtRegAuxInfo &VRAI) - : MF(MF), LIS(Pass.getAnalysis().getLIS()), -LSS(Pass.getAnalysis().getLS()), -MDT(Pass.getAnalysis().getDomTree()), + InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, +VirtRegMap &VRM, VirtRegAuxInfo &VRAI) + : MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), -TRI(*MF.getSubtarget().getRegisterInfo()), -MBFI( - Pass.getAnalysis().getMBFI()), -HSpiller(Pass, MF, VRM), VRAI(VRAI) {} +TRI(*MF.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI), +HSpiller(Analyses, MF, VRM), VRAI(VRAI) {} void spill(LiveRangeEdit &) override; ArrayRef getSpilledRegs() override { return RegsToSpill; } @@ -237,10 +228,11 @@ Spiller::~Spiller() = default; void Spiller::anchor() {} -Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass, - MachineFunction &MF, VirtRegMap &VRM, - VirtRegAuxInfo &VRAI) { - return new InlineSpiller(Pass, MF, VRM, VRAI); +Spiller * +llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses, + MachineFunction &MF, VirtRegMap &VRM, + VirtRegAuxInfo &VRAI) { + return new InlineSpiller(Analyses, MF, VRM, VRAI); } //===---
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Handle gpu.return in AbstractResult pass (PR #119035)
https://github.com/jeanPerier approved this pull request. https://github.com/llvm/llvm-project/pull/119035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/119194 To reduce compile time. This is a follow-up of #118787. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM (PR #118462)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/118462 >From ea0cf8d1805dd4ef093d30dd1f4538c9747be851 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Tue, 3 Dec 2024 10:12:36 + Subject: [PATCH] [CodeGen][NewPM] Port RegAllocPriorityAdvisor analysis to NPM --- .../llvm}/CodeGen/RegAllocPriorityAdvisor.h | 79 +++- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Passes/MachinePassRegistry.def | 1 + llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp | 6 +- .../lib/CodeGen/MLRegAllocPriorityAdvisor.cpp | 184 +++--- llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 9 +- llvm/lib/CodeGen/RegAllocGreedy.h | 2 +- llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp | 120 +--- llvm/lib/Passes/PassBuilder.cpp | 1 + 10 files changed, 294 insertions(+), 112 deletions(-) rename llvm/{lib => include/llvm}/CodeGen/RegAllocPriorityAdvisor.h (53%) diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h similarity index 53% rename from llvm/lib/CodeGen/RegAllocPriorityAdvisor.h rename to llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h index 2d42a43c4c6372..bddfe15bf17751 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h +++ b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h @@ -9,8 +9,10 @@ #ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H #define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { @@ -56,12 +58,73 @@ class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor { unsigned getPriority(const LiveInterval &LI) const override; }; -class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { +/// Common provider for getting the priority advisor and logging rewards. +/// Legacy analysis forwards all calls to this provider. +/// New analysis serves the provider as the analysis result. +/// Expensive setup is done in the constructor, so that the advisor can be +/// created quickly for every machine function. +/// TODO: Remove once legacy PM support is dropped. +class RegAllocPriorityAdvisorProvider { public: enum class AdvisorMode : int { Default, Release, Development }; - RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) - : ImmutablePass(ID), Mode(Mode){}; + RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {} + + virtual ~RegAllocPriorityAdvisorProvider() = default; + + virtual void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref GetReward) {}; + + virtual std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; + + void setAnalyses(SlotIndexes *SI) { this->SI = SI; } + + AdvisorMode getAdvisorMode() const { return Mode; } + +protected: + SlotIndexes *SI; + +private: + const AdvisorMode Mode; +}; + +RegAllocPriorityAdvisorProvider *createReleaseModePriorityAdvisorProvider(); + +RegAllocPriorityAdvisorProvider * +createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx); + +class RegAllocPriorityAdvisorAnalysis +: public AnalysisInfoMixin { + static AnalysisKey Key; + friend AnalysisInfoMixin; + +public: + struct Result { +// Owned by this analysis. +RegAllocPriorityAdvisorProvider *Provider; + +bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA, +MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless() || + Inv.invalidate(MF, PA); +} + }; + + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + +private: + void initializeProvider(LLVMContext &Ctx); + std::unique_ptr Provider; +}; + +class RegAllocPriorityAdvisorAnalysisLegacy : public ImmutablePass { +public: + enum class AdvisorMode : int { Default, Release, Development }; + + RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode Mode) + : ImmutablePass(ID), Mode(Mode) {}; static char ID; /// Get an advisor for the given context (i.e. machine function, etc) @@ -69,7 +132,7 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } virtual void logRewardIfNeeded(const MachineFunction &MF, - llvm::function_ref GetReward){}; + llvm::function_ref GetReward) {}; protected: // This analysis preserves everything, and subclasses may have additional @@ -85,11 +148,13 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { /// Specialization for the API used by the analysis infrastructure to create /// an instance of the pri
[llvm-branch-commits] [lldb][Process] Introduce LoongArch64 hw break/watchpoint support (PR #118770)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/118770 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb][Process] Introduce LoongArch64 hw break/watchpoint support (PR #118770)
https://github.com/wangleiat updated https://github.com/llvm/llvm-project/pull/118770 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
topperc wrote: Why do we need #118787 if we can just update the passes to use RegisterClassInfo? https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
https://github.com/MacDue commented: A bunch of little comments (mostly just nitpicks from a pass over the PR) :slightly_smiling_face: https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b619626 - Revert "[Fuchsia][cmake] Allow using FatLTO when building runtimes (#112277)"
Author: Paul Kirth Date: 2024-12-09T11:23:29-08:00 New Revision: b6196267463a356df89e922bb72c93a35b73d29c URL: https://github.com/llvm/llvm-project/commit/b6196267463a356df89e922bb72c93a35b73d29c DIFF: https://github.com/llvm/llvm-project/commit/b6196267463a356df89e922bb72c93a35b73d29c.diff LOG: Revert "[Fuchsia][cmake] Allow using FatLTO when building runtimes (#112277)" This reverts commit 57545dbbdbafc51d63873800a45cfd48a283d981. Added: Modified: clang/cmake/caches/Fuchsia-stage2.cmake llvm/cmake/modules/HandleLLVMOptions.cmake Removed: diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 9eb295548500ee..784a883a3bf916 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -190,10 +190,6 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") -# Enable FatLTO for Linux and baremetal runtimes -set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") -set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") - # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endif() @@ -276,10 +272,6 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") -# Enable FatLTO for Fuchsia runtimes -set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") -set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") - # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endforeach() @@ -371,10 +363,6 @@ foreach(target armv6m-none-eabi;armv7m-none-eabi;armv8m.main-none-eabi;armv8.1m. set(RUNTIMES_${target}_LLVM_INCLUDE_TESTS OFF CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "libc;libcxx" CACHE STRING "") - - # Enable FatLTO for baremetal runtimes - set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") - set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") endforeach() foreach(target riscv32-unknown-elf) @@ -426,10 +414,6 @@ foreach(target riscv32-unknown-elf) set(RUNTIMES_${target}_LLVM_INCLUDE_TESTS OFF CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "libc;libcxx" CACHE STRING "") - - # Enable FatLTO for baremetal runtimes - set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") - set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") endforeach() set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "") diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 55a87f5fdbb138..f19125eb6bf273 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -1285,7 +1285,7 @@ elseif(LLVM_ENABLE_LTO) endif() endif() -if(LLVM_ENABLE_FATLTO AND ((UNIX AND NOT APPLE) OR FUCHSIA)) +if(LLVM_ENABLE_FATLTO AND UNIX AND NOT APPLE) append("-ffat-lto-objects" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) if(NOT LINKER_IS_LLD_LINK) append("-ffat-lto-objects" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
TylerNowicki wrote: LGTM https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
https://github.com/TylerNowicki approved this pull request. https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
https://github.com/lenary approved this pull request. LGTM for the target-independent changes. https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] Update maintainers (PR #119166)
https://github.com/cryptoad approved this pull request. https://github.com/llvm/llvm-project/pull/119166 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)
@@ -200,6 +200,22 @@ bool llvm::BuildDebugInfoMDMap(DenseMap &MD, return ModuleLevelChanges; } +void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + RemapFlags RemapFlag, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + // Duplicate the metadata that is attached to the cloned function. felipepiovezan wrote: Note that, in the context of this function, there is no "cloned function", it's either `NewFunc` or `OldFunc`. That said, I this comment fits better in the documentation of the header, as there is important information here that callers should be aware https://github.com/llvm/llvm-project/pull/118623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)
@@ -182,6 +182,18 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's metadata into NewFunc. +/// +/// The caller is expected to populate \p VMap beforehand and set an appropriate +/// \p RemapFlag. +/// +/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a different +/// module. Use CloneFunctionInto for that behavior. +void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, felipepiovezan wrote: Can `NewFunc` or `OldFunc` be null? If not, we should make them references https://github.com/llvm/llvm-project/pull/118623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)
@@ -194,6 +194,15 @@ void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's body into NewFunc. +void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc, felipepiovezan wrote: Can `NewFunc` / `OldFunc` be nullptr? If not, they should be references https://github.com/llvm/llvm-project/pull/118624 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)
@@ -361,32 +370,31 @@ void updateProfileCallee( /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified /// basic blocks and extract their scope. These are candidates for duplication /// when cloning. -void identifyNoAliasScopesToClone( -ArrayRef BBs, SmallVectorImpl &NoAliasDeclScopes); +void identifyNoAliasScopesToClone(ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes); /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified /// instruction range and extract their scope. These are candidates for /// duplication when cloning. -void identifyNoAliasScopesToClone( -BasicBlock::iterator Start, BasicBlock::iterator End, -SmallVectorImpl &NoAliasDeclScopes); +void identifyNoAliasScopesToClone(BasicBlock::iterator Start, + BasicBlock::iterator End, + SmallVectorImpl &NoAliasDeclScopes); /// Duplicate the specified list of noalias decl scopes. /// The 'Ext' string is added as an extension to the name. /// Afterwards, the ClonedScopes contains the mapping of the original scope /// MDNode onto the cloned scope. /// Be aware that the cloned scopes are still part of the original scope domain. -void cloneNoAliasScopes( -ArrayRef NoAliasDeclScopes, -DenseMap &ClonedScopes, -StringRef Ext, LLVMContext &Context); +void cloneNoAliasScopes(ArrayRef NoAliasDeclScopes, +DenseMap &ClonedScopes, +StringRef Ext, LLVMContext &Context); /// Adapt the metadata for the specified instruction according to the /// provided mapping. This is normally used after cloning an instruction, when /// some noalias scopes needed to be cloned. -void adaptNoAliasScopes( -llvm::Instruction *I, const DenseMap &ClonedScopes, -LLVMContext &Context); felipepiovezan wrote: are any of these changes part of the PR? It not, they should not be part of the diff. If you run into this as a result of running `clang-format` in the entire patch, a good way to avoid this is by running `clang-format` on the _diff_ only. For example, (assuming all the changes are unstaged) `git add` the changes you want to stage, and then `git clang-format --staged` https://github.com/llvm/llvm-project/pull/118624 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
@@ -15,6 +15,7 @@ # used for building this FortranFloat128Math library. include(CheckLibraryExists) +include(CheckIncludeFile) jeanPerier wrote: When using `-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath`, I am seeing a failure in my latest test where cmake `check_include_file` for quadmath.h fails below while the system has it and a similar cmake command works with the current llvm main. I will invetsigate more, I suspect that some system header paths are set-up in the llvm/flang build that are not set-up anymore here. I am surprised I did not see that earlier my earlier testing (maybe that is because I mostly tested out-of-tree and that worked there, I need to check that). https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] Update maintainers (PR #119166)
https://github.com/cferris1000 approved this pull request. https://github.com/llvm/llvm-project/pull/119166 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute (PR #114438)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114438 >From de91e1754826028c3abdabd074bbe8ec1b17eb5f Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 31 Oct 2024 12:49:07 -0400 Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDWavesPerEU` honor existing attribute --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 81 +++ .../annotate-kernel-features-hsa-call.ll | 46 ++- ...ttr-amdgpu-max-num-workgroups-propagate.ll | 48 +-- .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 59 +++--- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 ++- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 9 files changed, 138 insertions(+), 116 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 1d16cf2fe95c37..a8f448227b18c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -206,6 +206,19 @@ class AMDGPUInformationCache : public InformationCache { return ST.getWavesPerEU(F, FlatWorkGroupSize); } + std::optional> + getWavesPerEUAttr(const Function &F) { +auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", + /*OnlyFirstRequired=*/true); +if (!Val) + return std::nullopt; +if (!Val->second) { + const GCNSubtarget &ST = TM.getSubtarget(F); + Val->second = ST.getMaxWavesPerEU(); +} +return std::make_pair(Val->first, *(Val->second)); + } + std::pair getEffectiveWavesPerEU(const Function &F, std::pair WavesPerEU, @@ -776,22 +789,6 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } - ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, - unsigned Max) { -// Don't add the attribute if it's the implied default. -if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) - return ChangeStatus::UNCHANGED; - -Function *F = getAssociatedFunction(); -LLVMContext &Ctx = F->getContext(); -SmallString<10> Buffer; -raw_svector_ostream OS(Buffer); -OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; -return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, AttrName, OS.str())}, - /*ForceReplace=*/true); - } - const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -1027,29 +1024,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} - bool isValidState() const override { -return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); - } - void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); -if (const auto *AssumedGroupSize = A.getAAFor( -*this, IRPosition::function(*F), DepClassTy::REQUIRED); -AssumedGroupSize->isValidState()) { +auto TakeRange = [&](std::pair R) { + auto [Min, Max] = R; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); +}; - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getWavesPerEU( - *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); +std::pair MaxWavesPerEURange{ +1U, InfoCache.getMaxWavesPerEU(*F)}; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - intersectKnown(Range); +// If the attribute exists, we will honor it if it is not the default. +if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + if (*Attr != MaxWavesPerEURange) { +TakeRange(*Attr); +return; + } } -if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); +// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the +// calculation of waves per EU involves flat work group size, we can't +// simply use an assumed flat work group size as a start point, because the +// update of flat work group size is in an inverse direction of waves per +// EU. However, we can still do something if it is an entry function. Since +// an entry function is a terminal node, and flat work group size either +// from attribute or default will be used anyway, we can take that
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114726 >From 47f7697d5a278640d7ccb59d512112b13b07871f Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 3 Nov 2024 19:35:26 -0500 Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial state --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 105 +++--- .../annotate-kernel-features-hsa-call.ll | 46 ...ttr-amdgpu-max-num-workgroups-propagate.ll | 48 .../AMDGPU/attributor-loop-issue-58639.ll | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll| 3 +- .../AMDGPU/remove-no-kernel-id-attribute.ll | 9 +- .../AMDGPU/uniform-work-group-multistep.ll| 3 +- .../uniform-work-group-recursion-test.ll | 2 +- 8 files changed, 140 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index a8f448227b18c9..3ca0677d980b9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -745,6 +745,16 @@ struct AAAMDSizeRangeAttribute if (!CallerInfo || !CallerInfo->isValidState()) return false; + /// When the caller AA is in its initial state, the state remains valid + /// but awaits propagation. We skip processing in this case. Note that we + /// must return true since the state is still considered valid. + if (CallerInfo->isAtInitialState()) { +LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller " + << Caller->getName() + << " is still at initial state. Skip the update.\n"); +return true; + } + Change |= clampStateAndIndicateChange(this->getState(), CallerInfo->getState()); @@ -789,6 +799,15 @@ struct AAAMDSizeRangeAttribute /*ForceReplace=*/true); } + /// The initial state of `IntegerRangeState` represents an empty set, which + /// does not constitute a valid range. This empty state complicates + /// propagation, particularly for arithmetic operations like + /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the + /// initial state during processing. + bool isAtInitialState() const { +return isValidState() && getAssumed().isEmptySet(); + } + const std::string getAsStr(Attributor *) const override { std::string Str; raw_string_ostream OS(Str); @@ -845,6 +864,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { Attributor &A); ChangeStatus manifest(Attributor &A) override { +if (isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] Still at initial state. No manifest.\n";); + return ChangeStatus::UNCHANGED; +} Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); return emitAttributeIfNotDefaultAfterClamp( @@ -1071,31 +1095,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); ChangeStatus Change = ChangeStatus::UNCHANGED; +Function *F = getAssociatedFunction(); + +const auto *AAFlatWorkGroupSize = A.getAAFor( +*this, IRPosition::function(*F), DepClassTy::REQUIRED); +if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) { + LLVM_DEBUG( + dbgs() << '[' << getName() + << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n"); + return ChangeStatus::UNCHANGED; +} + +if (AAFlatWorkGroupSize->isAtInitialState()) { + LLVM_DEBUG(dbgs() << '[' << getName() +<< "] AAAMDFlatWorkGroupSize is still at initial " + "state. Skip the update.\n"); + return ChangeStatus::UNCHANGED; +} + +auto CurrentWorkGroupSize = std::make_pair( +AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(), +AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1); + +auto DoUpdate = [&](std::pair WavesPerEU, +std::pair FlatWorkGroupSize) { + auto [Min, Max] = + InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize); + ConstantRange CR(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState IRS(CR); + Change |= clampStateAndIndicateChange(this->getState(), IRS); +}; + +// // We need to clamp once if we are not at initial state, because +// // AAAMDFlatWorkGroupSize could be updated in last iteration. +if (!isAtInitialState()) { + auto CurrentWavesPerEU = + std::make_pair(getAssumed().getLower().getZExtValue(), + getAssumed().getUpper().getZExtValue() - 1); + DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize); +} + auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstructi
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
artempyanykh wrote: > nit: Perhaps 'Common' is a better word than 'Global'. Global made me think of > global variables but I realized in the patch that is not what you are doing. > Seems you are just creating debug info based on the original function and > sharing that with the continuations / splits? Or perhaps just 'CoroDebugInfo'? Good call @TylerNowicki! I like 'common', updated the stack accordingly. It was originally "global" in a sense that it was owned by a module (global) rather than the function (local) and so didn't have to be cloned as part of function cloning. But I agree that it can be confusing this way. https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/112277 >From 1dafa521d5a1e10e3f79f63a661b2e14acff5a4a Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:06:38 -0700 Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- libcxx/CMakeLists.txt| 4 libcxx/src/CMakeLists.txt| 10 ++ libcxxabi/src/CMakeLists.txt | 10 ++ 3 files changed, 24 insertions(+) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index f1942e963ccc31..5a68237f7336c5 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -102,6 +102,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS support the C functionality for wide characters. When wide characters are not supported, several parts of the library will be disabled, notably the wide character specializations of std::basic_string." ON) + option(LIBCXX_ENABLE_FATLTO + "Whether to compile libc++ with FatLTO enabled." ON) + option(LIBCXX_ENABLE_LTO + "Whether to compile libc++ with LTO enabled." ON) # To use time zone support in libc++ the platform needs to have the IANA # database installed. Libc++ will fail to build if this is enabled on a diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index b187677ff2db52..670db758f53173 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -173,6 +173,16 @@ if (APPLE AND LLVM_USE_SANITIZER) endif() endif() + +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-flto") + list(APPEND LIBCXX_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXX_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXX_COMPILE_FLAGS) split_list(LIBCXX_LINK_FLAGS) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 480e528b819bb9..822ede39c6a525 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -143,6 +143,15 @@ if ( APPLE ) endif() endif() +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-flto") + list(APPEND LIBCXXABI_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXXABI_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXXABI_COMPILE_FLAGS) split_list(LIBCXXABI_LINK_FLAGS) @@ -154,6 +163,7 @@ endif() include(WarningFlags) + # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 38851d29d9eaf5e3c597be3f9f57179f308ba335 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:27:36 -0700 Subject: [PATCH 2/4] Remove newline from diff Created using spr 1.3.4 --- libcxxabi/src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 1a1e57aa0077b4..783f17583c62e0 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -163,7 +163,6 @@ endif() include(WarningFlags) - # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 535f2f2c17a3c80aa12c0106a468a8f2127241fc Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 16 Oct 2024 11:20:51 -0700 Subject: [PATCH 3/4] Avoid unecessary changes to libc++ cmake Created using spr 1.3.4 --- clang/cmake/caches/Fuchsia-stage2.cmake | 8 libcxx/CMakeLists.txt | 4 libcxx/src/CMakeLists.txt | 10 -- libcxxabi/src/CMakeLists.txt| 9 - 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 5af98c7b3b3fba..e62f29ecbe6f45 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -192,6 +192,10 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +# Enable FatLTO for Linux and baremetal runtimes +set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") +set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") + # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endif() @@ -274,6 +278,10 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE B
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/112277 >From 1dafa521d5a1e10e3f79f63a661b2e14acff5a4a Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:06:38 -0700 Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- libcxx/CMakeLists.txt| 4 libcxx/src/CMakeLists.txt| 10 ++ libcxxabi/src/CMakeLists.txt | 10 ++ 3 files changed, 24 insertions(+) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index f1942e963ccc31..5a68237f7336c5 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -102,6 +102,10 @@ option(LIBCXX_ENABLE_WIDE_CHARACTERS support the C functionality for wide characters. When wide characters are not supported, several parts of the library will be disabled, notably the wide character specializations of std::basic_string." ON) + option(LIBCXX_ENABLE_FATLTO + "Whether to compile libc++ with FatLTO enabled." ON) + option(LIBCXX_ENABLE_LTO + "Whether to compile libc++ with LTO enabled." ON) # To use time zone support in libc++ the platform needs to have the IANA # database installed. Libc++ will fail to build if this is enabled on a diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index b187677ff2db52..670db758f53173 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -173,6 +173,16 @@ if (APPLE AND LLVM_USE_SANITIZER) endif() endif() + +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-flto") + list(APPEND LIBCXX_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXX_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXX_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXX_COMPILE_FLAGS) split_list(LIBCXX_LINK_FLAGS) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 480e528b819bb9..822ede39c6a525 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -143,6 +143,15 @@ if ( APPLE ) endif() endif() +if(LIBCXX_ENABLE_LTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-flto") + list(APPEND LIBCXXABI_LINK_FLAGS "-flto") +endif() +if(LIBCXX_ENABLE_FATLTO) + list(APPEND LIBCXXABI_COMPILE_FLAGS "-ffat-lto-objects") + list(APPEND LIBCXXABI_LINK_FLAGS "-ffat-lto-objects") +endif() + split_list(LIBCXXABI_COMPILE_FLAGS) split_list(LIBCXXABI_LINK_FLAGS) @@ -154,6 +163,7 @@ endif() include(WarningFlags) + # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 38851d29d9eaf5e3c597be3f9f57179f308ba335 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Mon, 14 Oct 2024 15:27:36 -0700 Subject: [PATCH 2/4] Remove newline from diff Created using spr 1.3.4 --- libcxxabi/src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 1a1e57aa0077b4..783f17583c62e0 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -163,7 +163,6 @@ endif() include(WarningFlags) - # Build the shared library. add_library(cxxabi_shared_objects OBJECT EXCLUDE_FROM_ALL ${LIBCXXABI_SOURCES} ${LIBCXXABI_HEADERS}) cxx_add_warning_flags(cxxabi_shared_objects ${LIBCXXABI_ENABLE_WERROR} ${LIBCXXABI_ENABLE_PEDANTIC}) >From 535f2f2c17a3c80aa12c0106a468a8f2127241fc Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 16 Oct 2024 11:20:51 -0700 Subject: [PATCH 3/4] Avoid unecessary changes to libc++ cmake Created using spr 1.3.4 --- clang/cmake/caches/Fuchsia-stage2.cmake | 8 libcxx/CMakeLists.txt | 4 libcxx/src/CMakeLists.txt | 10 -- libcxxabi/src/CMakeLists.txt| 9 - 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 5af98c7b3b3fba..e62f29ecbe6f45 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -192,6 +192,10 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn set(RUNTIMES_${target}_LLVM_TOOLS_DIR "${CMAKE_BINARY_DIR}/bin" CACHE BOOL "") set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +# Enable FatLTO for Linux and baremetal runtimes +set(RUNTIMES_${target}_LLVM_ENABLE_LTO ON CACHE BOOL "") +set(RUNTIMES_${target}_LLVM_ENABLE_FATLTO ON CACHE BOOL "") + # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endif() @@ -274,6 +278,10 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE B
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
Meinersbur wrote: > I built things as of > [ac1f46f](https://github.com/llvm/llvm-project/commit/ac1f46faeef2507bb680f14b9256ac38817824df) > in > [conda-forge/flang-feedstock#80](https://github.com/conda-forge/flang-feedstock/pull/80), > and getting the following failure on linux: > > ``` > [228/375] Linking CXX shared library lib/libFlangOpenMPTransforms.so.20.0git > FAILED: lib/libFlangOpenMPTransforms.so.20.0git > : && $BUILD_PREFIX/bin/x86_64-conda-linux-gnu-c++ -fPIC > -fvisibility-inlines-hidden -fmessage-length=0 -march=nocona -mtune=haswell > -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 > -ffunction-sections -pipe -isystem $PREFIX/include > -fdebug-prefix-map=$SRC_DIR=/usr/local/src/conda/flang-split-20.0.0.dev1 > -fdebug-prefix-map=$PREFIX=/usr/local/src/conda-prefix -fPIC > -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time > -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings > -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long > -Wimplicit-fallthrough -Wno-maybe-uninitialized -Wno-nonnull > -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move > -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment > -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color > -ffunction-sections -fdata-sections -Wno-deprecated-copy > -Wno-ctad-maybe-unsupported -fno-strict-aliasing -fno-semantic-interposition > -O3 -DNDEBUG -fno-semantic-interposition -Wl,-O2 -Wl,--sort-common > -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags > -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,$PREFIX/lib > -Wl,-rpath-link,$PREFIX/lib -L$PREFIX/lib -Wl,-z,defs -Wl,-z,nodelete > -Wl,-rpath-link,$SRC_DIR/build/lib -Wl,--gc-sections > -Wl,--dependency-file=lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/link.d > -shared -Wl,-soname,libFlangOpenMPTransforms.so.20.0git -o > lib/libFlangOpenMPTransforms.so.20.0git > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/FunctionFiltering.cpp.o > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/GenericLoopConversion.cpp.o > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MapsForPrivatizedSymbols.cpp.o > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MapInfoFinalization.cpp.o > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/MarkDeclareTarget.cpp.o > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/LowerWorkshare.cpp.o > -Wl,-rpath,"\$ORIGIN/../lib:$SRC_DIR/build/lib:" > lib/libFIRCodeGen.so.20.0git lib/libFortranCommon.so.20.0git > $PREFIX/lib/libMLIRFuncDialect.a $PREFIX/lib/libMLIROpenMPDialect.a > $PREFIX/lib/libMLIRIR.a $PREFIX/lib/libMLIRPass.a > $PREFIX/lib/libMLIRTransformUtils.a lib/libFIRAnalysis.so.20.0git > lib/libFIRBuilder.so.20.0git lib/libHLFIRDialect.so.20.0git > lib/libFIRSupport.so.20.0git lib/libFIRDialect.so.20.0git > lib/libFIRDialectSupport.so.20.0git $PREFIX/lib/libclang-cpp.so.20.0git > $PREFIX/lib/libMLIRFuncDialect.a $PREFIX/lib/libMLIRLLVMDialect.a > $PREFIX/lib/libMLIRMemorySlotInterfaces.a > $PREFIX/lib/libMLIROpenACCMPCommon.a $PREFIX/lib/libMLIRSubsetOpInterface.a > $PREFIX/lib/libMLIRValueBoundsOpInterface.a > $PREFIX/lib/libMLIRDestinationStyleOpInterface.a > $PREFIX/lib/libMLIRRewrite.a $PREFIX/lib/libMLIRRewritePDL.a > $PREFIX/lib/libMLIRPDLToPDLInterp.a $PREFIX/lib/libMLIRPass.a > $PREFIX/lib/libMLIRAnalysis.a $PREFIX/lib/libMLIRCallInterfaces.a > $PREFIX/lib/libMLIRControlFlowInterfaces.a > $PREFIX/lib/libMLIRLoopLikeInterface.a > $PREFIX/lib/libMLIRDataLayoutInterfaces.a > $PREFIX/lib/libMLIRInferIntRangeInterface.a $PREFIX/lib/libMLIRPresburger.a > $PREFIX/lib/libMLIRViewLikeInterface.a $PREFIX/lib/libMLIRPDLInterpDialect.a > $PREFIX/lib/libMLIRFunctionInterfaces.a $PREFIX/lib/libMLIRPDLDialect.a > $PREFIX/lib/libMLIRInferTypeOpInterface.a > $PREFIX/lib/libMLIRSideEffectInterfaces.a $PREFIX/lib/libMLIRIR.a > $PREFIX/lib/libMLIRSupport.a -lpthread $PREFIX/lib/libLLVM.so.20.0git > -Wl,-rpath-link,$SRC_DIR/build/lib && : > $BUILD_PREFIX/bin/../lib/gcc/x86_64-conda-linux-gnu/13.3.0/../../../../x86_64-conda-linux-gnu/bin/ld: > > lib/Optimizer/OpenMP/CMakeFiles/FlangOpenMPTransforms.dir/GenericLoopConversion.cpp.o: > in function `(anonymous > namespace)::GenericLoopConversionPattern::rewriteToDistributeParallelDo(mlir::omp::LoopOp, > mlir::ConversionPatternRewriter&) const [clone .isra.0]': > GenericLoopConversion.cpp:(.text._ZNK12_GLOBAL__N_128GenericLoopConversionPattern29rewriteToDistributeParallelDoEN4mlir3omp6LoopOpERNS1_25ConversionPatternRewriterE.isra.0+0x442): > undefined reference to > `Fortran::common::openmp::genEntryBlock(mlir::OpBuilder&, > Fortran::common::openmp::EntryBlockArgs const&, mlir::Region&)' > collect2: error: ld returned 1 exit status > ``` I don't see this in any configuration that I am te
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
@@ -1285,7 +1285,7 @@ elseif(LLVM_ENABLE_LTO) endif() endif() -if(LLVM_ENABLE_FATLTO AND UNIX AND NOT APPLE) +if(LLVM_ENABLE_FATLTO AND ((UNIX AND NOT APPLE) OR FUCHSIA)) ldionne wrote: Sorry, I think I initially read `if (LLVM_ENABLE_FATLTO OR ((UNIX AND NOT APPLE) OR FUCHSIA))`. https://github.com/llvm/llvm-project/pull/112277 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [libcxx] [libcxxabi] [Fuchsia][cmake] Allow using FatLTO when building runtimes (PR #112277)
https://github.com/ldionne approved this pull request. https://github.com/llvm/llvm-project/pull/112277 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
@@ -123,6 +123,7 @@ namespace { const TargetRegisterInfo *TRI = nullptr; const MachineFrameInfo *MFI = nullptr; MachineRegisterInfo *MRI = nullptr; +RegisterClassInfo RegClassInfo; arsenm wrote: The way RegisterClassInfo is currently used, it gets recomputed for every pass that uses it. It should probably move to be a normal analysis https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
@@ -123,6 +123,7 @@ namespace { const TargetRegisterInfo *TRI = nullptr; const MachineFrameInfo *MFI = nullptr; MachineRegisterInfo *MRI = nullptr; +RegisterClassInfo RegClassInfo; lenary wrote: I was thinking this, especially as it has some saved state to work out when it needs to recompute. I think that's probably a good follow-up? https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
@@ -79,42 +79,46 @@ namespace std { } // namespace std */ - -#include <__config> -#include <__memory/allocator.h> -#include <__memory/allocator_destructor.h> -#include <__memory/allocator_traits.h> -#include <__memory/unique_ptr.h> -#include <__type_traits/add_cv_quals.h> -#include <__type_traits/add_pointer.h> -#include <__type_traits/aligned_storage.h> -#include <__type_traits/conditional.h> -#include <__type_traits/decay.h> -#include <__type_traits/enable_if.h> -#include <__type_traits/is_constructible.h> -#include <__type_traits/is_function.h> -#include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_reference.h> -#include <__type_traits/is_same.h> -#include <__type_traits/is_void.h> -#include <__type_traits/remove_cv.h> -#include <__type_traits/remove_cvref.h> -#include <__type_traits/remove_reference.h> -#include <__utility/forward.h> -#include <__utility/in_place.h> -#include <__utility/move.h> -#include <__utility/unreachable.h> -#include <__verbose_abort> -#include -#include -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +#include <__configuration/language.h> + +#if defined(_LIBCPP_CXX03_LANG) && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) ldionne wrote: It would be nice for this patch to be really straightforward to review, and that would be the case if not for the formatting changes. I would suggest frontloading a patch that does ```c++ #if 0 // nothing for now #else // existing code #endif ``` That will cause all the formatting stuff to happen in that patch. We can then follow up with a patch that turns these `#if 0` into something real. https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
@@ -152,11 +152,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_TOSTRING2(x) #x # define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) -// NOLINTNEXTLINE(libcpp-cpp-version-check) -# if __cplusplus < 201103L -#define _LIBCPP_CXX03_LANG -# endif ldionne wrote: In the current state of this patch, the "intersection" between the 03 headers and the normal headers is basically the definition of `_LIBCPP_CXX03_LANG`, but we're including all of `__configuration/language.h` for that. I think that we should strive to make that intersection empty, because anything in that intersection can cause incompatibilities and confusion. Since `_LIBCPP_CXX03_LANG` is so simple, I would instead just check the value of `__cplusplus` directly at the top-level, and not share anything between the 03 headers and the normal headers. Note that `__config_site` is also something that technically falls inside that intersection and that's tricky to handle, we can discuss that separately. https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
@@ -11,10 +11,6 @@ #include <__config> -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) ldionne wrote: Merge conflict? https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
https://github.com/ldionne commented: Let's write down the order for landing this in smaller pieces: 1. Land the `#if 0` change which addresses the formatting issues. 2. Land the CMake changes that start installing the C++03 headers. 3. Switch to `#if __cplusplus < C++11 && USE_FROZEN_HEADERS` (or whatever), and also add the CI with XFAILs. If there are too many XFAILs, the CI bit should probably be left to a separate patch. We can treat even C++23-only headers the same for now, by including `__cxx03/__config` in them. That's the status quo and we should start with that. However, as a follow-up, we should go through all of the >= 11 headers and make them "not care" about the C++03 frozen headers. We could in addition perhaps diagnose the use of `-std=c++03` in the normal `__config` file with an error. https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [llvm] [libc++][C++03] Use `__cxx03/` headers in C++03 mode (PR #109002)
https://github.com/ldionne edited https://github.com/llvm/llvm-project/pull/109002 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Use cached version of getRegPressureSetLimit (PR #119194)
@@ -123,6 +123,7 @@ namespace { const TargetRegisterInfo *TRI = nullptr; const MachineFrameInfo *MFI = nullptr; MachineRegisterInfo *MRI = nullptr; +RegisterClassInfo RegClassInfo; arsenm wrote: Relatedly, we also do not serialize the set of reserved registers in MIR https://github.com/llvm/llvm-project/pull/119194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionBodyInto from CloneFunctionInto (PR #118624)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118624 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Analysis] Add DebugInfoCache analysis (PR #118629)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118629 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -418,7 +418,13 @@ class LoopVectorizationPlanner { /// Build VPlans for the specified \p UserVF and \p UserIC if they are /// non-zero or all applicable candidate VFs otherwise. If vectorization and /// interleaving should be avoided up-front, no plans are generated. - void plan(ElementCount UserVF, unsigned UserIC); + /// RTChecks is a list of pointer pairs that should be checked for aliasing, + /// setting HasAliasMask to true in the case that an alias mask is generated MacDue wrote: Outdated comment? Is this `DiffChecks` now? https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Use DebugInfoCache to speed up cloning in CoroSplitPass (PR #118630)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118630 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Utils] Identity map module-level debug info on first use in CloneFunction* (PR #118627)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118627 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Extract CloneFunctionMetadataInto from CloneFunctionInto (PR #118623)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Coro] Prebuild a module-level debug info set and share it between all coroutine clones (PR #118628)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118628 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Utils] Identity map module-level debug info on first use in CloneFunction* (PR #118627)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118627 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][Utils] Eliminate DISubprogram set from BuildDebugInfoMDMap (PR #118625)
https://github.com/artempyanykh edited https://github.com/llvm/llvm-project/pull/118625 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Spiller: Detach legacy pass and supply analyses instead (PR #119181)
https://github.com/aeubanks approved this pull request. https://github.com/llvm/llvm-project/pull/119181 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] ba6d4d9 - Revert "[libc] Add unistd overlay (#118882)"
Author: Michael Jones Date: 2024-12-09T16:08:35-08:00 New Revision: ba6d4d9c66231a1684ddfca619dbd91d9872f06d URL: https://github.com/llvm/llvm-project/commit/ba6d4d9c66231a1684ddfca619dbd91d9872f06d DIFF: https://github.com/llvm/llvm-project/commit/ba6d4d9c66231a1684ddfca619dbd91d9872f06d.diff LOG: Revert "[libc] Add unistd overlay (#118882)" This reverts commit 7db970fe4a0249234712ae6899d34b94260b09cd. Added: Modified: libc/hdr/CMakeLists.txt libc/hdr/unistd_macros.h libc/src/unistd/dup.h libc/src/unistd/dup2.h libc/src/unistd/dup3.h libc/src/unistd/fork.h libc/src/unistd/ftruncate.h libc/src/unistd/getcwd.h libc/src/unistd/geteuid.h libc/src/unistd/getopt.h libc/src/unistd/getpid.h libc/src/unistd/getppid.h libc/src/unistd/getuid.h libc/src/unistd/isatty.h libc/src/unistd/link.h libc/src/unistd/linux/ftruncate.cpp libc/src/unistd/linux/lseek.cpp libc/src/unistd/linux/sysconf.cpp libc/src/unistd/linux/truncate.cpp libc/src/unistd/lseek.h libc/src/unistd/pread.h libc/src/unistd/pwrite.h libc/src/unistd/read.h libc/src/unistd/readlink.h libc/src/unistd/readlinkat.h libc/src/unistd/swab.h libc/src/unistd/symlink.h libc/src/unistd/symlinkat.h libc/src/unistd/syscall.h libc/src/unistd/sysconf.h libc/src/unistd/truncate.h libc/src/unistd/write.h Removed: libc/hdr/unistd_overlay.h diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 7f523c50e86943..5eb311f4bb2298 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -126,13 +126,10 @@ add_proxy_header_library( libc.include.llvm-libc-macros.sys_stat_macros ) -add_header_library(unistd_overlay HDRS unistd_overlay.h) add_proxy_header_library( unistd_macros HDRS unistd_macros.h - DEPENDS -.unistd_overlay FULL_BUILD_DEPENDS libc.include.unistd libc.include.llvm-libc-macros.unistd_macros diff --git a/libc/hdr/unistd_macros.h b/libc/hdr/unistd_macros.h index 5c2b24354dd3ee..132e123280139f 100644 --- a/libc/hdr/unistd_macros.h +++ b/libc/hdr/unistd_macros.h @@ -15,7 +15,7 @@ #else // Overlay mode -#include "unistd_overlay.h" +#include #endif // LLVM_LIBC_FULL_BUILD diff --git a/libc/hdr/unistd_overlay.h b/libc/hdr/unistd_overlay.h deleted file mode 100644 index e3001e0cda08f5..00 --- a/libc/hdr/unistd_overlay.h +++ /dev/null @@ -1,69 +0,0 @@ -//===-- Including unistd.h in overlay mode ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// - -#ifndef LLVM_LIBC_HDR_UNISTD_OVERLAY_H -#define LLVM_LIBC_HDR_UNISTD_OVERLAY_H - -#ifdef LIBC_FULL_BUILD -#error "This header should only be included in overlay mode" -#endif - -// Overlay mode - -// glibc header might provide extern inline definitions for few -// functions, causing external alias errors. They are guarded by -// `__USE_EXTERN_INLINES` macro. We temporarily disable `__USE_EXTERN_INLINES` -// macro by defining `__NO_INLINE__` before including . -// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled -// with `_FORTIFY_SOURCE`. - -#ifdef _FORTIFY_SOURCE -#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE -#undef _FORTIFY_SOURCE -#endif - -#ifdef __USE_EXTERN_INLINES -#define LIBC_OLD_USE_EXTERN_INLINES -#undef __USE_EXTERN_INLINES -#endif - -#ifdef __USE_FORTIFY_LEVEL -#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL -#undef __USE_FORTIFY_LEVEL -#define __USE_FORTIFY_LEVEL 0 -#endif - -#ifndef __NO_INLINE__ -#define __NO_INLINE__ 1 -#define LIBC_SET_NO_INLINE -#endif - -#include - -#ifdef LIBC_OLD_FORTIFY_SOURCE -#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE -#undef LIBC_OLD_FORTIFY_SOURCE -#endif - -#ifdef LIBC_SET_NO_INLINE -#undef __NO_INLINE__ -#undef LIBC_SET_NO_INLINE -#endif - -#ifdef LIBC_OLD_USE_FORTIFY_LEVEL -#undef __USE_FORTIFY_LEVEL -#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL -#undef LIBC_OLD_USE_FORTIFY_LEVEL -#endif - -#ifdef LIBC_OLD_USE_EXTERN_INLINES -#define __USE_EXTERN_INLINES -#undef LIBC_OLD_USE_EXTERN_INLINES -#endif - -#endif // LLVM_LIBC_HDR_UNISTD_OVERLAY_H diff --git a/libc/src/unistd/dup.h b/libc/src/unistd/dup.h index 57601455acc61c..63f093c0ee4365 100644 --- a/libc/src/unistd/dup.h +++ b/libc/src/unistd/dup.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_DUP_H #define LLVM_LIBC_SRC_UNISTD_DUP_H -#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" +#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/dup2.h b/libc/src/unistd/dup2.h index e2cf62389bca87..060c112daf08fb 100644 --- a/libc/src/unistd/dup
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
h-vetinari wrote: Thanks for your inputs @Meinersbur! https://github.com/llvm/llvm-project/pull/110217/commits/a03606822b0eaef8efdeb00c4f2c33e1a029f79f fixed the compilation issues on linux. I have another question - when building the runtimes (separately as you described), it seems that `-DBUILD_SHARED_LIBS=ON` gets ignored also on linux, in the sense that the resulting library is `libflang_rt.a` and not `libflang_rt.so`. You had commented that this is a known limitation on windows, but AFAIU this should still work on linux (as previously)? https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=flang-rt (PR #110217)
https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/110217 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ChuanqiXu9 wrote: Sent https://github.com/llvm/llvm-project/pull/119333 It looks like the lldb's failure is from we forgot to update the ExternalASTConsumer (I met this the second time. I am wondering if we can make it more automatically). The other windows failure is a pattern match failure. https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 4fe98aa - Revert "[PAC][ELF][AArch64] Support signed personality function pointer (#113…"
Author: Daniil Kovalev Date: 2024-12-10T09:10:23+03:00 New Revision: 4fe98aa2253925a6bc24bd30f2c654756c77cc28 URL: https://github.com/llvm/llvm-project/commit/4fe98aa2253925a6bc24bd30f2c654756c77cc28 DIFF: https://github.com/llvm/llvm-project/commit/4fe98aa2253925a6bc24bd30f2c654756c77cc28.diff LOG: Revert "[PAC][ELF][AArch64] Support signed personality function pointer (#113…" This reverts commit 4fb1cda6606ba75782aa1964835abf1a69e2adae. Added: Modified: clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGen/ptrauth-module-flags.c llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h llvm/include/llvm/Target/TargetLoweringObjectFile.h llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp llvm/lib/Target/AArch64/AArch64TargetObjectFile.h llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h llvm/lib/Target/TargetLoweringObjectFile.cpp llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn Removed: llvm/lib/Target/AArch64/AArch64MachineModuleInfo.cpp llvm/lib/Target/AArch64/AArch64MachineModuleInfo.h llvm/test/CodeGen/AArch64/ptrauth-sign-personality.ll diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 841fb1ced9a02b..d3d5c0743a520b 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1218,9 +1218,6 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Min, "ptrauth-elf-got", 1); if (getTriple().isOSLinux()) { - if (LangOpts.PointerAuthCalls) -getModule().addModuleFlag(llvm::Module::Min, "ptrauth-sign-personality", - 1); assert(getTriple().isOSBinFormatELF()); using namespace llvm::ELF; uint64_t PAuthABIVersion = diff --git a/clang/test/CodeGen/ptrauth-module-flags.c b/clang/test/CodeGen/ptrauth-module-flags.c index e441d52cb7c62b..5a7e9a7c2a36fe 100644 --- a/clang/test/CodeGen/ptrauth-module-flags.c +++ b/clang/test/CodeGen/ptrauth-module-flags.c @@ -1,13 +1,8 @@ // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=OFF // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-elf-got -emit-llvm %s -o - | FileCheck %s --check-prefix=ELFGOT -// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm %s -o - | FileCheck %s --check-prefix=PERSONALITY // ELFGOT: !llvm.module.flags = !{ // ELFGOT-SAME: !1 // ELFGOT: !1 = !{i32 8, !"ptrauth-elf-got", i32 1} -// PERSONALITY: !llvm.module.flags = !{ -// PERSONALITY-SAME: !1 -// PERSONALITY: !1 = !{i32 8, !"ptrauth-sign-personality", i32 1} - // OFF-NOT: "ptrauth- diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index a2a9e5d499e527..8eef45ce565deb 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -52,13 +52,7 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &DL, -const MCSymbol *Sym, -const MachineModuleInfo *MMI) const override; - - virtual void emitPersonalityValueImpl(MCStreamer &Streamer, -const DataLayout &DL, -const MCSymbol *Sym, -const MachineModuleInfo *MMI) const; +const MCSymbol *Sym) const override; /// Given a constant with the SectionKind, return a section that it should be /// placed in. diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 4864ba843f4886..0c09cfe684783b 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -82,8 +82,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { virtual void Initialize(MCContext &ctx, const TargetMachine &TM); virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, -const MCSymbol *Sym, -const MachineModuleInfo *MMI) const; +const MCSymbol *Sym) const; /// Emit the module-level metadata that the platform cares about. virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {} diff --git a/ll