[llvm-branch-commits] [llvm] [ADT] Use range-based helper functions in SmallSet (PR #108585)
@@ -234,19 +225,12 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); -return Set.find(V) != Set.end(); + return llvm::is_contained(Vector, V); +return llvm::is_contained(Set, V); dwblaikie wrote: FWIW, I think is_contained is more legible than find!=end and seems fine to use here & I agree with the comment on is_contained suggesting it for uses like this. https://github.com/llvm/llvm-project/pull/108585 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713 >From fb2ed73b44facf865312d7efe32053718fcd6458 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 12 Sep 2024 15:25:43 -0400 Subject: [PATCH] [Attributor] Use more appropriate approach to check flat address space --- llvm/include/llvm/Transforms/IPO/Attributor.h | 7 ++--- .../Transforms/IPO/AttributorAttributes.cpp | 26 ++- .../CodeGen/AMDGPU/simple-indirect-call.ll| 5 ++-- .../Attributor/address_space_info.ll | 4 ++- .../Attributor/memory_locations_gpu.ll| 8 +++--- .../test/Transforms/Attributor/nocapture-1.ll | 4 +-- .../reduced/openmp_opt_constant_type_crash.ll | 1 - .../Transforms/Attributor/value-simplify.ll | 3 +-- .../Transforms/OpenMP/nested_parallelism.ll | 4 +-- .../OpenMP/spmdization_kernel_env_dep.ll | 25 +- 10 files changed, 51 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 921fe945539510..59bae547522ea7 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6267,11 +6267,12 @@ struct AAAddressSpace : public StateWrapper { return (AA->getIdAddr() == &ID); } - // No address space which indicates the associated value is dead. - static const uint32_t NoAddressSpace = ~0U; - /// Unique ID (due to the unique address) static const char ID; + +protected: + // Invalid address space which indicates the associated value is dead. + static const uint32_t InvalidAddressSpace = ~0U; }; struct AAAllocationInfo : public StateWrapper { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 217c7cccb5775a..b2888f556d7d0d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); -if (getAssociatedType()->getPointerAddressSpace()) + +if (!A.getInfoCache().getDL().getFlatAddressSpace().has_value()) { + indicatePessimisticFixpoint(); + return; +} + +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value(); +unsigned AS = getAssociatedType()->getPointerAddressSpace(); +if (AS != FlatAS) { + [[maybe_unused]] bool R = takeAddressSpace(AS); + assert(R && "The take should happen"); indicateOptimisticFixpoint(); +} } ChangeStatus updateImpl(Attributor &A) override { @@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); -if (getAddressSpace() == NoAddressSpace || +if (getAddressSpace() == InvalidAddressSpace || getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +Value *AssociatedValue = &getAssociatedValue(); +Value *OriginalValue = peelAddrspacecast(AssociatedValue); + PointerType *NewPtrTy = PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = @@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (!isValidState()) return "addrspace()"; return "addrspace(" + - (AssumedAddressSpace == NoAddressSpace + (AssumedAddressSpace == InvalidAddressSpace ? "none" : std::to_string(AssumedAddressSpace)) + ")"; } private: - uint32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = InvalidAddressSpace; bool takeAddressSpace(uint32_t AS) { -if (AssumedAddressSpace == NoAddressSpace) { +if (AssumedAddressSpace == InvalidAddressSpace) { AssumedAddressSpace = AS; return true; } diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index cca7b49996ff3b..971161a1c59855 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -34,8 +34,9 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { ; ATTRIBUTOR_GCN-NEXT:[[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT:store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT:[[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT:[[FPTR_CAST:%.*]] = a
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/109203 None >From 886b8947ae2dfa496a56ff0251f6fe22dca5060e Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/108923 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/108921 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/108923 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108787 >From e6fb6d58ce5cd8d02d769e1a4eb9664449fb785b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 15 Sep 2024 23:06:14 -0400 Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace` This has been moved to `DataLayout`. --- .../llvm/Analysis/TargetTransformInfo.h | 21 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 -- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 - llvm/lib/Analysis/TargetTransformInfo.cpp | 4 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 8 --- .../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 .../Transforms/Scalar/InferAddressSpaces.cpp | 5 +++-- .../AMDGPU/noop-ptrint-pair.ll| 2 +- .../old-pass-regressions-inseltpoison.ll | 2 +- .../AMDGPU/old-pass-regressions.ll| 2 +- .../InferAddressSpaces/AMDGPU/ptrmask.ll | 2 +- 11 files changed, 7 insertions(+), 50 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 3411163549de2f..42d3ee328fc1a5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -451,24 +451,6 @@ class TargetTransformInfo { /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address. bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const; - /// Returns the address space ID for a target's 'flat' address space. Note - /// this is not necessarily the same as addrspace(0), which LLVM sometimes - /// refers to as the generic address space. The flat address space is a - /// generic address space that can be used access multiple segments of memory - /// with different address spaces. Access of a memory location through a - /// pointer with this address space is expected to be legal but slower - /// compared to the same memory location accessed through a pointer with a - /// different address space. - // - /// This is for targets with different pointer representations which can - /// be converted with the addrspacecast instruction. If a pointer is converted - /// to this address space, optimizations should attempt to replace the access - /// with the source address space. - /// - /// \returns ~0u if the target does not have such a flat address space to - /// optimize away. - unsigned getFlatAddressSpace() const; - /// Return any intrinsic address operand indexes which may be rewritten if /// they use a flat address space pointer. /// @@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept { virtual bool isAlwaysUniform(const Value *V) = 0; virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0; - virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; @@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.addrspacesMayAlias(AS0, AS1); } - unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const override { return Impl.collectFlatAddressOperands(OpIndexes, IID); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2819af30cd1704..3902a19a90755a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -115,8 +115,6 @@ class TargetTransformInfoImplBase { return true; } - unsigned getFlatAddressSpace() const { return -1; } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2f2a6a09ffc44d..ce585dfc7a2f39 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -292,11 +292,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return true; } - unsigned getFlatAddressSpace() { -// Return an invalid address space. -return -1; - } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a101..42e3f277c1e291 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -305,10 +30
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258 >From 4f56200b16374ae69b717847872d53a80f505869 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 11 Sep 2024 12:23:32 -0400 Subject: [PATCH] [Attributor] Take the address space from addrspacecast directly If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal. --- .../Transforms/IPO/AttributorAttributes.cpp | 60 ++- llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 33 ++ 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b2888f556d7d0d..22f983af85af3e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12587,16 +12587,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } ChangeStatus updateImpl(Attributor &A) override { +assert(A.getInfoCache().getDL().getFlatAddressSpace().has_value()); +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value(); uint32_t OldAddressSpace = AssumedAddressSpace; -auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, -DepClassTy::REQUIRED); -auto Pred = [&](Value &Obj) { + +auto CheckAddressSpace = [&](Value &Obj) { if (isa(&Obj)) return true; + // If an argument in flat address space only has addrspace cast uses, and + // those casts are same, then we take the dst addrspace. + if (auto *Arg = dyn_cast(&Obj)) { +if (Arg->getType()->getPointerAddressSpace() == FlatAS) { + unsigned CastAddrSpace = FlatAS; + for (auto *U : Arg->users()) { +auto *ASCI = dyn_cast(U); +if (!ASCI) + return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); +if (CastAddrSpace != FlatAS && +CastAddrSpace != ASCI->getDestAddressSpace()) + return false; +CastAddrSpace = ASCI->getDestAddressSpace(); + } + if (CastAddrSpace != FlatAS) +return takeAddressSpace(CastAddrSpace); +} + } return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); }; -if (!AUO->forallUnderlyingObjects(Pred)) +auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, +DepClassTy::REQUIRED); +if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) return indicatePessimisticFixpoint(); return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED @@ -12605,17 +12626,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -if (getAddressSpace() == InvalidAddressSpace || -getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) +unsigned NewAS = getAddressSpace(); + +if (NewAS == InvalidAddressSpace || +NewAS == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value(); + Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); +Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS); PointerType *NewPtrTy = -PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); +PointerType::get(getAssociatedType()->getContext(), NewAS); bool UseOriginalValue = -OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace(); +OriginalValue->getType()->getPointerAddressSpace() == NewAS; bool Changed = false; @@ -12675,12 +12700,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { return AssumedAddressSpace == AS; } - static Value *peelAddrspacecast(Value *V) { -if (auto *I = dyn_cast(V)) - return peelAddrspacecast(I->getPointerOperand()); + static Value *peelAddrspacecast(Value *V, unsigned FlatAS) { +if (auto *I = dyn_cast(V)) { + assert(I->getSrcAddressSpace() != FlatAS && + "there should not be flat AS -> non-flat AS"); + return I->getPointerOperand(); +} if (auto *C = dyn_cast(V)) - if (C->getOpcode() == Instruction::AddrSpaceCast) -return peelAddrspacecast(C->getOperand(0)); + if (C->getOpcode() == Instruction::AddrSpaceCast) { +assert(C->getOperand(0)->getType()
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/109184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/109214 Convert `cuf.allocate` and `cuf.deallocate` to the runtime entry points added in #109213 >From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 18 Sep 2024 15:42:19 -0700 Subject: [PATCH] [flang][cuda] Convert module allocation/deallocation to runtime calls --- .../Optimizer/Transforms/CufOpConversion.cpp | 63 --- flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 2dc37f4df3aeec..e61105491ca69f 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -7,6 +7,7 @@ //===--===// #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" @@ -14,6 +15,10 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +<<< HEAD +=== +#include "flang/Runtime/CUDA/allocatable.h" +>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate device module variable) #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" @@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda; namespace { template -static bool needDoubleDescriptor(OpTy op) { +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) +return true; + return false; +} + +template +static bool hasDoubleDescriptors(OpTy op) { if (auto declareOp = mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) { op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -108,17 +118,22 @@ struct CufAllocateOpConversion if (op.getPinned()) return mlir::failure(); -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} // Allocation for local descriptor falls back on the standard runtime // AllocatableAllocate as the dedicated allocator is set in the descriptor // before the call. -auto mod = op->template getParentOfType(); -fir::FirOpBuilder builder(rewriter, mod); -mlir::Location loc = op.getLoc(); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -133,17 +148,23 @@ struct CufDeallocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::DeallocateOp op, mlir::PatternRewriter &rewriter) const override { -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); -// Deallocation for local descriptor falls back on the standard runtime -// AllocatableDeallocate as the dedicated deallocator is set in the -// descriptor before the call. auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builde
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) Changes Convert `cuf.allocate` and `cuf.deallocate` to the runtime entry points added in #109213 --- Full diff: https://github.com/llvm/llvm-project/pull/109214.diff 2 Files Affected: - (modified) flang/lib/Optimizer/Transforms/CufOpConversion.cpp (+40-23) - (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+38-2) ``diff diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 2dc37f4df3aeec..e61105491ca69f 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -7,6 +7,7 @@ //===--===// #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" @@ -14,6 +15,10 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +<<< HEAD +=== +#include "flang/Runtime/CUDA/allocatable.h" +>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate device module variable) #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" @@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda; namespace { template -static bool needDoubleDescriptor(OpTy op) { +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) +return true; + return false; +} + +template +static bool hasDoubleDescriptors(OpTy op) { if (auto declareOp = mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) { op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -108,17 +118,22 @@ struct CufAllocateOpConversion if (op.getPinned()) return mlir::failure(); -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} // Allocation for local descriptor falls back on the standard runtime // AllocatableAllocate as the dedicated allocator is set in the descriptor // before the call. -auto mod = op->template getParentOfType(); -fir::FirOpBuilder builder(rewriter, mod); -mlir::Location loc = op.getLoc(); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -133,17 +148,23 @@ struct CufDeallocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::DeallocateOp op, mlir::PatternRewriter &rewriter) const override { -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); -// Deallocation for local descriptor falls back on the standard runtime -// AllocatableDeallocate as the dedicated deallocator is set in the -// descriptor before the call. auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} + +// Deallocation for local descriptor falls back on the standard runtime +
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109214 >From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 18 Sep 2024 15:42:19 -0700 Subject: [PATCH 1/3] [flang][cuda] Convert module allocation/deallocation to runtime calls --- .../Optimizer/Transforms/CufOpConversion.cpp | 63 --- flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 2dc37f4df3aeec..e61105491ca69f 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -7,6 +7,7 @@ //===--===// #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" @@ -14,6 +15,10 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +<<< HEAD +=== +#include "flang/Runtime/CUDA/allocatable.h" +>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate device module variable) #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" @@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda; namespace { template -static bool needDoubleDescriptor(OpTy op) { +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) +return true; + return false; +} + +template +static bool hasDoubleDescriptors(OpTy op) { if (auto declareOp = mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) { op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -108,17 +118,22 @@ struct CufAllocateOpConversion if (op.getPinned()) return mlir::failure(); -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} // Allocation for local descriptor falls back on the standard runtime // AllocatableAllocate as the dedicated allocator is set in the descriptor // before the call. -auto mod = op->template getParentOfType(); -fir::FirOpBuilder builder(rewriter, mod); -mlir::Location loc = op.getLoc(); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -133,17 +148,23 @@ struct CufDeallocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::DeallocateOp op, mlir::PatternRewriter &rewriter) const override { -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); -// Deallocation for local descriptor falls back on the standard runtime -// AllocatableDeallocate as the dedicated deallocator is set in the -// descriptor before the call. auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} + +// Deallocation fo
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109214 >From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 18 Sep 2024 15:42:19 -0700 Subject: [PATCH 1/2] [flang][cuda] Convert module allocation/deallocation to runtime calls --- .../Optimizer/Transforms/CufOpConversion.cpp | 63 --- flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 2dc37f4df3aeec..e61105491ca69f 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -7,6 +7,7 @@ //===--===// #include "flang/Common/Fortran.h" +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" @@ -14,6 +15,10 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +<<< HEAD +=== +#include "flang/Runtime/CUDA/allocatable.h" +>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate device module variable) #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" @@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda; namespace { template -static bool needDoubleDescriptor(OpTy op) { +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) +return true; + return false; +} + +template +static bool hasDoubleDescriptors(OpTy op) { if (auto declareOp = mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) { op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -108,17 +118,22 @@ struct CufAllocateOpConversion if (op.getPinned()) return mlir::failure(); -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} // Allocation for local descriptor falls back on the standard runtime // AllocatableAllocate as the dedicated allocator is set in the descriptor // before the call. -auto mod = op->template getParentOfType(); -fir::FirOpBuilder builder(rewriter, mod); -mlir::Location loc = op.getLoc(); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -133,17 +148,23 @@ struct CufDeallocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::DeallocateOp op, mlir::PatternRewriter &rewriter) const override { -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); -// Deallocation for local descriptor falls back on the standard runtime -// AllocatableDeallocate as the dedicated deallocator is set in the -// descriptor before the call. auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} + +// Deallocation fo
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/109203 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) Changes For the modules containing context roots, the way IPO happens will potentially result in imported functions that are differently specialized (even if themselves not inlined) than their originals. So we want to convert them to local rather than elide them. Eventually we'd perform this as a ThinLTO directive. --- Full diff: https://github.com/llvm/llvm-project/pull/109203.diff 2 Files Affected: - (modified) llvm/lib/Transforms/IPO/ElimAvailExtern.cpp (+8-5) - (modified) llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll (+11-2) ``diff diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file `` https://github.com/llvm/llvm-project/pull/109203 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin ready_for_review https://github.com/llvm/llvm-project/pull/109203 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/vzakhari approved this pull request. Great! https://github.com/llvm/llvm-project/pull/109214 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109214 >From be4731f339d6fd9b45cd7cc93e3dd8ff83e80576 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 18 Sep 2024 15:42:19 -0700 Subject: [PATCH] [flang][cuda] Convert module allocation/deallocation to runtime calls --- .../Optimizer/Transforms/CufOpConversion.cpp | 59 +++ flang/test/Fir/CUDA/cuda-allocate.fir | 40 - 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 2dc37f4df3aeec..ac796e83b07078 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -14,6 +14,7 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Runtime/CUDA/allocatable.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" @@ -35,13 +36,19 @@ using namespace Fortran::runtime::cuda; namespace { template -static bool needDoubleDescriptor(OpTy op) { +static bool isPinned(OpTy op) { + if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned) +return true; + return false; +} + +template +static bool hasDoubleDescriptors(OpTy op) { if (auto declareOp = mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -49,8 +56,7 @@ static bool needDoubleDescriptor(OpTy op) { op.getBox().getDefiningOp())) { if (mlir::isa_and_nonnull( declareOp.getMemref().getDefiningOp())) { - if (declareOp.getDataAttr() && - *declareOp.getDataAttr() == cuf::DataAttribute::Pinned) + if (isPinned(declareOp)) return false; return true; } @@ -108,17 +114,22 @@ struct CufAllocateOpConversion if (op.getPinned()) return mlir::failure(); -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); +auto mod = op->getParentOfType(); +fir::FirOpBuilder builder(rewriter, mod); +mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Allocation for module variable are done with custom runtime entry point + // so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} // Allocation for local descriptor falls back on the standard runtime // AllocatableAllocate as the dedicated allocator is set in the descriptor // before the call. -auto mod = op->template getParentOfType(); -fir::FirOpBuilder builder(rewriter, mod); -mlir::Location loc = op.getLoc(); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -133,17 +144,23 @@ struct CufDeallocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::DeallocateOp op, mlir::PatternRewriter &rewriter) const override { -// TODO: Allocation of module variable will need more work as the descriptor -// will be duplicated and needs to be synced after allocation. -if (needDoubleDescriptor(op)) - return mlir::failure(); -// Deallocation for local descriptor falls back on the standard runtime -// AllocatableDeallocate as the dedicated deallocator is set in the -// descriptor before the call. auto mod = op->getParentOfType(); fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + +if (hasDoubleDescriptors(op)) { + // Deallocation for module variable are done with custom runtime entry + // point so the descriptors can be synchronized. + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); +} + +// Deallocation for local descriptor falls back on the standard runtime +// AllocatableDeallocate as the dedicated deallocator is set in the +// descriptor before the call. mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); @@ -448,10 +465,6 @@ class CufOpConversion : public fir::impl::CufOpConversionBase { } return true; }); -target.addDynamicallyLe
[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/108921 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][sanitizer] Move InitTlsSize (PR #108922)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/108922 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/108923 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/108921 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : aaupov wrote: Sure. But actually can we rely on the fact that children are ordered by their call site probe id? That would make binary search possible. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : + Parent->getChildren()) { +if (Child.Guid == GUID) { + if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) +Cur = &Child; + break; +} + } +} +if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash) aaupov wrote: `Hash` comes from YAML, we compare it against binary checksum. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/109185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); aaupov wrote: I'll drop these assertions as they are done by operator[] anyway. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` (PR #109185)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/109185 None >From 09642a4889da1d0e10f54b17b84e32dae5c8557e Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:00:42 -0700 Subject: [PATCH] [ctx_prof] Handle `select` --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 3 + llvm/lib/Analysis/CtxProfAnalysis.cpp | 9 +++ .../Instrumentation/PGOCtxProfFlattening.cpp | 45 ++- llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++- .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++ 5 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index b3e64b26ee543c..0a5beb92fcbcc0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the instruction instrumenting a BB, or nullptr if not present. static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB); + + /// Get the step instrumentation associated with a `select` + static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 3df72983862d98..7517011395a7d6 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { return nullptr; } +InstrProfIncrementInstStep * +CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { + Instruction *Prev = &SI; + while ((Prev = Prev->getPrevNode())) +if (auto *Step = dyn_cast(Prev)) + return Step; + return nullptr; +} + template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor, diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 91f950e2ba4c3e..30bb251364fdef 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,6 +154,8 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } +uint64_t getCount() const { return *Count;} + bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); @@ -266,6 +268,21 @@ class ProfileAnnotator final { return HitExit; } + bool allNonColdSelectsHaveProfile() const { +for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { +for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { +if (!SI->getMetadata(LLVMContext::MD_prof)) { + return false; +} + } +} + } +} +return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl &Counters, InstrProfSummaryBuilder &PB) @@ -324,12 +341,33 @@ class ProfileAnnotator final { PB.addEntryCount(Counters[0]); for (auto &BB : F) { + const auto &BBInfo = getBBInfo(BB); + if (BBInfo.getCount() > 0) { +for (auto &I : BB) { + if (auto *SI = dyn_cast(&I)) { +if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { + auto Index = Step->getIndex()->getZExtValue(); + assert(Index < Counters.size() && +"The index of the step instruction must be inside the " +"counters vector by " +"construction - tripping this assertion indicates a bug in " +"how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + auto TrueCount = Counters[Index]; + auto FalseCount = + (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); +} + } +} + } if (succ_size(&BB) < 2) continue; auto *Term = BB.getTerminator(); SmallVector EdgeCounts(Term->getNumSuccessors(), 0); uint64_t MaxCount = 0; - const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; ++SuccIdx) { uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); @@ -343,12 +381,15 @@ class ProfileAnnotator final { setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); } assert(allCountersAreAssigned() && - "Expected all counters have been assigned.
[llvm-branch-commits] [llvm] [ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/109184 None >From 987562aab1c409b62b1a4c5d6d8566ad812b8313 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:03:30 -0700 Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index c29709b613410e..3df72983862d98 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, } InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { - for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) + if (!InstrProfCallsite::canInstrumentCallsite(CB)) +return nullptr; + for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) { if (auto *IPC = dyn_cast(Prev)) return IPC; +assert(!isa(Prev) && + "didn't expect to find another call, that's not the callsite " + "instrumentation, before an instrumentable callsite"); + } return nullptr; } InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { for (auto &I : BB) if (auto *Incr = dyn_cast(&I)) - return Incr; + if (!isa(&I)) +return Incr; return nullptr; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)
https://github.com/clementval closed https://github.com/llvm/llvm-project/pull/109214 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/109170 Backport 7153a4bbf6d46e58ce32d59220515c5ab9f35691 Requested by: @owenca >From 2cc6c7bde964931be2c9a6691da944678bcb31a8 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 17 Sep 2024 21:16:20 -0700 Subject: [PATCH] [clang-format] Reimplement InsertNewlineAtEOF (#108513) Fixes #108333. (cherry picked from commit 7153a4bbf6d46e58ce32d59220515c5ab9f35691) --- clang/lib/Format/FormatTokenLexer.cpp | 7 +++ clang/lib/Format/TokenAnnotator.cpp | 5 - clang/unittests/Format/FormatTest.cpp | 6 ++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index e21b5a882b7773..63949b2e26bdc1 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -100,6 +100,13 @@ ArrayRef FormatTokenLexer::lex() { if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { +auto &TokEOF = *Tokens.back(); +if (TokEOF.NewlinesBefore == 0) { + TokEOF.NewlinesBefore = 1; + TokEOF.OriginalColumn = 0; +} + } return Tokens; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3f00a28e62988a..4512e539cc7947 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { auto *First = Line.First; First->SpacesRequiredBefore = 1; First->CanBreakBefore = First->MustBreakBefore; - - if (First->is(tok::eof) && First->NewlinesBefore == 0 && - Style.InsertNewlineAtEOF) { -First->NewlinesBefore = 1; - } } // This function heuristically determines whether 'Current' starts the name of a diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 29200b72d3d008..b7d8fc8ea72c6c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) { verifyNoChange("int i;\n", Style); verifyFormat("int i;\n", "int i;", Style); + + constexpr StringRef Code{"namespace {\n" + "int i;\n" + "} // namespace"}; + verifyFormat(Code.str() + '\n', Code, Style, + {tooling::Range(19, 13)}); // line 3 } TEST_F(FormatTest, KeepEmptyLinesAtEOF) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/109170 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)
llvmbot wrote: @llvm/pr-subscribers-clang-format Author: None (llvmbot) Changes Backport 7153a4bbf6d46e58ce32d59220515c5ab9f35691 Requested by: @owenca --- Full diff: https://github.com/llvm/llvm-project/pull/109170.diff 3 Files Affected: - (modified) clang/lib/Format/FormatTokenLexer.cpp (+7) - (modified) clang/lib/Format/TokenAnnotator.cpp (-5) - (modified) clang/unittests/Format/FormatTest.cpp (+6) ``diff diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index e21b5a882b7773..63949b2e26bdc1 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -100,6 +100,13 @@ ArrayRef FormatTokenLexer::lex() { if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { +auto &TokEOF = *Tokens.back(); +if (TokEOF.NewlinesBefore == 0) { + TokEOF.NewlinesBefore = 1; + TokEOF.OriginalColumn = 0; +} + } return Tokens; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3f00a28e62988a..4512e539cc7947 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { auto *First = Line.First; First->SpacesRequiredBefore = 1; First->CanBreakBefore = First->MustBreakBefore; - - if (First->is(tok::eof) && First->NewlinesBefore == 0 && - Style.InsertNewlineAtEOF) { -First->NewlinesBefore = 1; - } } // This function heuristically determines whether 'Current' starts the name of a diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 29200b72d3d008..b7d8fc8ea72c6c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) { verifyNoChange("int i;\n", Style); verifyFormat("int i;\n", "int i;", Style); + + constexpr StringRef Code{"namespace {\n" + "int i;\n" + "} // namespace"}; + verifyFormat(Code.str() + '\n', Code, Style, + {tooling::Range(19, 13)}); // line 3 } TEST_F(FormatTest, KeepEmptyLinesAtEOF) { `` https://github.com/llvm/llvm-project/pull/109170 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)
llvmbot wrote: @mydeveloperday What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/109170 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109185 >From dfde0036f6dd98f859c7c3984c4e44d6224d17f0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:00:42 -0700 Subject: [PATCH] [ctx_prof] Handle `select` --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 3 + llvm/lib/Analysis/CtxProfAnalysis.cpp | 9 +++ .../Instrumentation/PGOCtxProfFlattening.cpp | 46 ++- llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++- .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++ 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index b3e64b26ee543c..0a5beb92fcbcc0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the instruction instrumenting a BB, or nullptr if not present. static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB); + + /// Get the step instrumentation associated with a `select` + static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 3df72983862d98..7517011395a7d6 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { return nullptr; } +InstrProfIncrementInstStep * +CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { + Instruction *Prev = &SI; + while ((Prev = Prev->getPrevNode())) +if (auto *Step = dyn_cast(Prev)) + return Step; + return nullptr; +} + template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor, diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 91f950e2ba4c3e..3a3c47e90a168a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,6 +154,8 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } +uint64_t getCount() const { return *Count; } + bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); @@ -266,6 +268,21 @@ class ProfileAnnotator final { return HitExit; } + bool allNonColdSelectsHaveProfile() const { +for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { +for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { +if (!SI->getMetadata(LLVMContext::MD_prof)) { + return false; +} + } +} + } +} +return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl &Counters, InstrProfSummaryBuilder &PB) @@ -324,12 +341,34 @@ class ProfileAnnotator final { PB.addEntryCount(Counters[0]); for (auto &BB : F) { + const auto &BBInfo = getBBInfo(BB); + if (BBInfo.getCount() > 0) { +for (auto &I : BB) { + if (auto *SI = dyn_cast(&I)) { +if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { + auto Index = Step->getIndex()->getZExtValue(); + assert( + Index < Counters.size() && + "The index of the step instruction must be inside the " + "counters vector by " + "construction - tripping this assertion indicates a bug in " + "how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + auto TrueCount = Counters[Index]; + auto FalseCount = + (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); +} + } +} + } if (succ_size(&BB) < 2) continue; auto *Term = BB.getTerminator(); SmallVector EdgeCounts(Term->getNumSuccessors(), 0); uint64_t MaxCount = 0; - const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; ++SuccIdx) { uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); @@ -343,12 +382,15 @@ class ProfileAnnotator final { setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); } assert(allCountersAreAssigned() && - "Expected all counters have been ass
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109203 >From 49a3d00864cf1850a8f5f1aff71b66603e2a8d8c Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109203 >From ccc89f9225f33508098c9a0c457f3f8d02a6a8e8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109203 >From 3cafcfb2e786e48c53214c2767b3e72b415aa3bb Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109203 >From 24c376930f98887c88476fd6a41af0b5a452acc1 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/109184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/109185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/109203 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109185 >From b68a12f999df971ca42c8c68e4f3ac091034c47a Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:00:42 -0700 Subject: [PATCH] [ctx_prof] Handle `select` --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 3 + llvm/lib/Analysis/CtxProfAnalysis.cpp | 9 +++ .../Instrumentation/PGOCtxProfFlattening.cpp | 46 ++- llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++- .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++ 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index b3e64b26ee543c..0a5beb92fcbcc0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the instruction instrumenting a BB, or nullptr if not present. static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB); + + /// Get the step instrumentation associated with a `select` + static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 3df72983862d98..7517011395a7d6 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { return nullptr; } +InstrProfIncrementInstStep * +CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { + Instruction *Prev = &SI; + while ((Prev = Prev->getPrevNode())) +if (auto *Step = dyn_cast(Prev)) + return Step; + return nullptr; +} + template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor, diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 91f950e2ba4c3e..3a3c47e90a168a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,6 +154,8 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } +uint64_t getCount() const { return *Count; } + bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); @@ -266,6 +268,21 @@ class ProfileAnnotator final { return HitExit; } + bool allNonColdSelectsHaveProfile() const { +for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { +for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { +if (!SI->getMetadata(LLVMContext::MD_prof)) { + return false; +} + } +} + } +} +return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl &Counters, InstrProfSummaryBuilder &PB) @@ -324,12 +341,34 @@ class ProfileAnnotator final { PB.addEntryCount(Counters[0]); for (auto &BB : F) { + const auto &BBInfo = getBBInfo(BB); + if (BBInfo.getCount() > 0) { +for (auto &I : BB) { + if (auto *SI = dyn_cast(&I)) { +if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { + auto Index = Step->getIndex()->getZExtValue(); + assert( + Index < Counters.size() && + "The index of the step instruction must be inside the " + "counters vector by " + "construction - tripping this assertion indicates a bug in " + "how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + auto TrueCount = Counters[Index]; + auto FalseCount = + (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); +} + } +} + } if (succ_size(&BB) < 2) continue; auto *Term = BB.getTerminator(); SmallVector EdgeCounts(Term->getNumSuccessors(), 0); uint64_t MaxCount = 0; - const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; ++SuccIdx) { uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); @@ -343,12 +382,15 @@ class ProfileAnnotator final { setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); } assert(allCountersAreAssigned() && - "Expected all counters have been ass
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109184 >From f654c77a3f1902b8dc7d9674d89f08f7fca0c85f Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:03:30 -0700 Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index c29709b613410e..3df72983862d98 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, } InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { - for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) + if (!InstrProfCallsite::canInstrumentCallsite(CB)) +return nullptr; + for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) { if (auto *IPC = dyn_cast(Prev)) return IPC; +assert(!isa(Prev) && + "didn't expect to find another call, that's not the callsite " + "instrumentation, before an instrumentable callsite"); + } return nullptr; } InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { for (auto &I : BB) if (auto *Incr = dyn_cast(&I)) - return Incr; + if (!isa(&I)) +return Incr; return nullptr; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/109093 Backport 13280d99aec5b4f383a2f3d5c10ecb148a07384e Requested by: @nikic >From 3539b82c0ba5b412ed51e2031880c1999a401b3d Mon Sep 17 00:00:00 2001 From: YANG Xudong Date: Fri, 13 Sep 2024 08:49:54 +0800 Subject: [PATCH] [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) For zig with LLVM 19.1.0rc4, we are seeing the following error when bootstrapping a `loongarch64-linux-musl` target. https://github.com/ziglang/zig-bootstrap/issues/164#issuecomment-2332357069 It seems that this issue is caused by `PromoteFloatResult` is not handling FREEZE OP on loongarch. Here is the reproduction of the error: https://godbolt.org/z/PPfvWjjG5 ~~This patch adds the FREEZE OP handling with `PromoteFloatRes_UnaryOp` and adds a test case.~~ This patch changes loongarch's way of floating point promotion to soft promotion to avoid this problem. See: loongarch's handling of `half`: - https://github.com/llvm/llvm-project/issues/93894 - https://github.com/llvm/llvm-project/pull/94456 Also see: other float promotion FREEZE handling - https://github.com/llvm/llvm-project/commit/0019c2f194a5e1f4cd65c5284e204328cc40ab3d (cherry picked from commit 13280d99aec5b4f383a2f3d5c10ecb148a07384e) --- .../Target/LoongArch/LoongArchISelLowering.h | 2 + llvm/test/CodeGen/LoongArch/fp16-promote.ll | 198 +++--- 2 files changed, 128 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index fc5b36c2124e01..267837add575dc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering { bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVectorImpl &ArgLocs) const; + + bool softPromoteHalfType() const override { return true; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll index 75f920b43a06ce..03965ac81f3763 100644 --- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll +++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll @@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind { define half @test_fadd_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fadd_reg: ; LA32: # %bb.0: -; LA32-NEXT:addi.w $sp, $sp, -32 -; LA32-NEXT:st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT:addi.w $sp, $sp, -16 +; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT:st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT:fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT:move $fp, $a0 +; LA32-NEXT:move $a0, $a1 +; LA32-NEXT:bl %plt(__gnu_h2f_ieee) ; LA32-NEXT:fmov.s $fs0, $fa0 -; LA32-NEXT:fmov.s $fa0, $fa1 -; LA32-NEXT:bl %plt(__gnu_f2h_ieee) +; LA32-NEXT:move $a0, $fp ; LA32-NEXT:bl %plt(__gnu_h2f_ieee) -; LA32-NEXT:fmov.s $fs1, $fa0 -; LA32-NEXT:fmov.s $fa0, $fs0 +; LA32-NEXT:fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee) -; LA32-NEXT:bl %plt(__gnu_h2f_ieee) -; LA32-NEXT:fadd.s $fa0, $fa0, $fs1 -; LA32-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT:ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT:addi.w $sp, $sp, 32 +; LA32-NEXT:fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT:ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT:addi.w $sp, $sp, 16 ; LA32-NEXT:ret ; ; LA64-LABEL: test_fadd_reg: ; LA64: # %bb.0: ; LA64-NEXT:addi.d $sp, $sp, -32 ; LA64-NEXT:st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT:st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT:move $fp, $a0 +; LA64-NEXT:move $a0, $a1 +; LA64-NEXT:bl %plt(__gnu_h2f_ieee) ; LA64-NEXT:fmov.s $fs0, $fa0 -; LA64-NEXT:fmov.s $fa0, $fa1 -; LA64-NEXT:bl %plt(__gnu_f2h_ieee) +; LA64-NEXT:move $a0, $fp ; LA64-NEXT:bl %plt(__gnu_h2f_ieee) -; LA64-NEXT:fmov.s $fs1, $fa0 -; LA64-NEXT:fmov.s $fa0, $fs0 +; LA64-NEXT:fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT:bl %plt(__gnu_f2h_ieee) -; LA64-NEXT:bl %plt(__gnu_h2f_ieee) -; LA64-NEXT:fadd.s $fa0, $fa0, $fs1 -; LA64-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folde
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
llvmbot wrote: @llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) Changes Backport 13280d99aec5b4f383a2f3d5c10ecb148a07384e Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/109093.diff 2 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2) - (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+126-72) ``diff diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index fc5b36c2124e01..267837add575dc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering { bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVectorImpl &ArgLocs) const; + + bool softPromoteHalfType() const override { return true; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll index 75f920b43a06ce..03965ac81f3763 100644 --- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll +++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll @@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind { define half @test_fadd_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fadd_reg: ; LA32: # %bb.0: -; LA32-NEXT:addi.w $sp, $sp, -32 -; LA32-NEXT:st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT:addi.w $sp, $sp, -16 +; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT:st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT:fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT:move $fp, $a0 +; LA32-NEXT:move $a0, $a1 +; LA32-NEXT:bl %plt(__gnu_h2f_ieee) ; LA32-NEXT:fmov.s $fs0, $fa0 -; LA32-NEXT:fmov.s $fa0, $fa1 -; LA32-NEXT:bl %plt(__gnu_f2h_ieee) +; LA32-NEXT:move $a0, $fp ; LA32-NEXT:bl %plt(__gnu_h2f_ieee) -; LA32-NEXT:fmov.s $fs1, $fa0 -; LA32-NEXT:fmov.s $fa0, $fs0 +; LA32-NEXT:fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee) -; LA32-NEXT:bl %plt(__gnu_h2f_ieee) -; LA32-NEXT:fadd.s $fa0, $fa0, $fs1 -; LA32-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT:ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT:addi.w $sp, $sp, 32 +; LA32-NEXT:fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT:ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT:addi.w $sp, $sp, 16 ; LA32-NEXT:ret ; ; LA64-LABEL: test_fadd_reg: ; LA64: # %bb.0: ; LA64-NEXT:addi.d $sp, $sp, -32 ; LA64-NEXT:st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT:st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT:move $fp, $a0 +; LA64-NEXT:move $a0, $a1 +; LA64-NEXT:bl %plt(__gnu_h2f_ieee) ; LA64-NEXT:fmov.s $fs0, $fa0 -; LA64-NEXT:fmov.s $fa0, $fa1 -; LA64-NEXT:bl %plt(__gnu_f2h_ieee) +; LA64-NEXT:move $a0, $fp ; LA64-NEXT:bl %plt(__gnu_h2f_ieee) -; LA64-NEXT:fmov.s $fs1, $fa0 -; LA64-NEXT:fmov.s $fa0, $fs0 +; LA64-NEXT:fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT:bl %plt(__gnu_f2h_ieee) -; LA64-NEXT:bl %plt(__gnu_h2f_ieee) -; LA64-NEXT:fadd.s $fa0, $fa0, $fs1 -; LA64-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT:ld.d $fp, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT:ld.d $ra, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT:addi.d $sp, $sp, 32 ; LA64-NEXT:ret @@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { ; LA32-NEXT:st.w $fp, $sp, 24 # 4-byte Folded Spill ; LA32-NEXT:st.w $s0, $sp, 20 # 4-byte Folded Spill ; LA32-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill -; LA32-NEXT:move $fp, $a1 -; LA32-NEXT:move $s0, $a0 -; LA32-NEXT:ld.hu $a0, $a0, 0 +; LA32-NEXT:move $fp, $a0 +; LA32-NEXT:ld.hu $s0, $a0, 0 +; LA32-NEXT:ld.hu $a0, $a1, 0 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee) ; LA32-NEXT:fmov.s $fs0, $fa0 -; LA32-NEXT:ld.hu $a0, $fp, 0 +; LA32-NEXT:move $a0, $s0 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee) -; LA32-NEXT:fadd.s $fa0, $fs0, $fa0 +; LA32-NEXT:fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee) -; LA32-NEXT:st.h $a0, $s0, 0 +; LA32-NEXT:st.h $a0, $fp, 0 ; LA32-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT:ld.w
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
llvmbot wrote: @arsenm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108787 >From ad6ff10e0612cce572b0d950b76b9d3c80c6dee3 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 15 Sep 2024 23:06:14 -0400 Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace` This has been moved to `DataLayout`. --- .../llvm/Analysis/TargetTransformInfo.h | 21 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 -- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 - llvm/lib/Analysis/TargetTransformInfo.cpp | 4 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 8 --- .../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 .../Transforms/Scalar/InferAddressSpaces.cpp | 2 +- 7 files changed, 1 insertion(+), 45 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 3411163549de2f..42d3ee328fc1a5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -451,24 +451,6 @@ class TargetTransformInfo { /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address. bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const; - /// Returns the address space ID for a target's 'flat' address space. Note - /// this is not necessarily the same as addrspace(0), which LLVM sometimes - /// refers to as the generic address space. The flat address space is a - /// generic address space that can be used access multiple segments of memory - /// with different address spaces. Access of a memory location through a - /// pointer with this address space is expected to be legal but slower - /// compared to the same memory location accessed through a pointer with a - /// different address space. - // - /// This is for targets with different pointer representations which can - /// be converted with the addrspacecast instruction. If a pointer is converted - /// to this address space, optimizations should attempt to replace the access - /// with the source address space. - /// - /// \returns ~0u if the target does not have such a flat address space to - /// optimize away. - unsigned getFlatAddressSpace() const; - /// Return any intrinsic address operand indexes which may be rewritten if /// they use a flat address space pointer. /// @@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept { virtual bool isAlwaysUniform(const Value *V) = 0; virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0; - virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; @@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.addrspacesMayAlias(AS0, AS1); } - unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const override { return Impl.collectFlatAddressOperands(OpIndexes, IID); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2819af30cd1704..3902a19a90755a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -115,8 +115,6 @@ class TargetTransformInfoImplBase { return true; } - unsigned getFlatAddressSpace() const { return -1; } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2f2a6a09ffc44d..ce585dfc7a2f39 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -292,11 +292,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return true; } - unsigned getFlatAddressSpace() { -// Return an invalid address space. -return -1; - } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a101..42e3f277c1e291 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -305,10 +305,6 @@ bool llvm::TargetTransformInfo::addrspacesMayAlias(unsigned FromAS, return TTIImpl->addrspacesMayAlias(FromAS, ToAS); } -unsigned TargetTransformInfo::getFlatAddressSpace() const { - return TTIImpl->getFlatAddr
[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713 >From 02b52beaefab0b63c3c09fd1f84b907b89c0cf7b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 12 Sep 2024 15:25:43 -0400 Subject: [PATCH] [Attributor] Use more appropriate approach to check flat address space --- llvm/include/llvm/Transforms/IPO/Attributor.h | 3 --- .../Transforms/IPO/AttributorAttributes.cpp | 26 ++- .../Attributor/address_space_info.ll | 4 ++- .../test/Transforms/Attributor/nocapture-1.ll | 4 +-- .../Transforms/Attributor/value-simplify.ll | 3 +-- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 921fe945539510..ee7c930f8a26ac 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6267,9 +6267,6 @@ struct AAAddressSpace : public StateWrapper { return (AA->getIdAddr() == &ID); } - // No address space which indicates the associated value is dead. - static const uint32_t NoAddressSpace = ~0U; - /// Unique ID (due to the unique address) static const char ID; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 217c7cccb5775a..642aeae3c12783 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); -if (getAssociatedType()->getPointerAddressSpace()) + +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +if (FlatAS == DataLayout::AS_INVALID) { + indicatePessimisticFixpoint(); + return; +} + +unsigned AS = getAssociatedType()->getPointerAddressSpace(); +if (AS != FlatAS) { + [[maybe_unused]] bool R = takeAddressSpace(AS); + assert(R && "The take should happen"); indicateOptimisticFixpoint(); +} } ChangeStatus updateImpl(Attributor &A) override { @@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); -if (getAddressSpace() == NoAddressSpace || +if (getAddressSpace() == DataLayout::AS_INVALID || getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +Value *AssociatedValue = &getAssociatedValue(); +Value *OriginalValue = peelAddrspacecast(AssociatedValue); + PointerType *NewPtrTy = PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = @@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (!isValidState()) return "addrspace()"; return "addrspace(" + - (AssumedAddressSpace == NoAddressSpace + (AssumedAddressSpace == DataLayout::AS_INVALID ? "none" : std::to_string(AssumedAddressSpace)) + ")"; } private: - uint32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = DataLayout::AS_INVALID; bool takeAddressSpace(uint32_t AS) { -if (AssumedAddressSpace == NoAddressSpace) { +if (AssumedAddressSpace == DataLayout::AS_INVALID) { AssumedAddressSpace = AS; return true; } diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll index 73dd93c55b819b..0c8b06aca4 100644 --- a/llvm/test/Transforms/Attributor/address_space_info.ll +++ b/llvm/test/Transforms/Attributor/address_space_info.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK + +; REQUIRES: amdgpu-registered-target @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4 diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 3401ddfdd7d758..de5f31e470edfc 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocap
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258 >From f85ea4140cb95e46e0c2341cb93090f755e44cef Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 11 Sep 2024 12:23:32 -0400 Subject: [PATCH] [Attributor] Take the address space from addrspacecast directly If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal. --- .../Transforms/IPO/AttributorAttributes.cpp | 62 ++- llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 33 ++ 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 642aeae3c12783..b67d95bc31110d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12587,16 +12587,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } ChangeStatus updateImpl(Attributor &A) override { +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +assert(FlatAS != DataLayout::AS_INVALID); uint32_t OldAddressSpace = AssumedAddressSpace; -auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, -DepClassTy::REQUIRED); -auto Pred = [&](Value &Obj) { + +auto CheckAddressSpace = [&](Value &Obj) { if (isa(&Obj)) return true; + // If an argument in flat address space only has addrspace cast uses, and + // those casts are same, then we take the dst addrspace. + if (auto *Arg = dyn_cast(&Obj)) { +if (FlatAS != DataLayout::AS_INVALID && +Arg->getType()->getPointerAddressSpace() == FlatAS) { + unsigned CastAddrSpace = FlatAS; + for (auto *U : Arg->users()) { +auto *ASCI = dyn_cast(U); +if (!ASCI) + return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); +if (CastAddrSpace != FlatAS && +CastAddrSpace != ASCI->getDestAddressSpace()) + return false; +CastAddrSpace = ASCI->getDestAddressSpace(); + } + if (CastAddrSpace != FlatAS) +return takeAddressSpace(CastAddrSpace); +} + } return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); }; -if (!AUO->forallUnderlyingObjects(Pred)) +auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, +DepClassTy::REQUIRED); +if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) return indicatePessimisticFixpoint(); return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED @@ -12605,17 +12627,22 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -if (getAddressSpace() == DataLayout::AS_INVALID || -getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) +unsigned NewAS = getAddressSpace(); + +if (NewAS == DataLayout::AS_INVALID || +NewAS == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +assert(FlatAS != DataLayout::AS_INVALID); + Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); +Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS); PointerType *NewPtrTy = -PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); +PointerType::get(getAssociatedType()->getContext(), NewAS); bool UseOriginalValue = -OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace(); +OriginalValue->getType()->getPointerAddressSpace() == NewAS; bool Changed = false; @@ -12675,12 +12702,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { return AssumedAddressSpace == AS; } - static Value *peelAddrspacecast(Value *V) { -if (auto *I = dyn_cast(V)) - return peelAddrspacecast(I->getPointerOperand()); + static Value *peelAddrspacecast(Value *V, unsigned FlatAS) { +if (auto *I = dyn_cast(V)) { + assert(I->getSrcAddressSpace() != FlatAS && + "there should not be flat AS -> non-flat AS"); + return I->getPointerOperand(); +} if (auto *C = dyn_cast(V)) - if (C->getOpcode() == Instruction::AddrSpaceCast) -return peelAddrspacecast(C->getOperand(0)); + if (C->getOpcode() == Instruction::
[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/109110 Backport 1825cf28dc83113200b623ebcf063eea35ade79a Requested by: @heiher >From 4d8867e154dfd99ae6d64de1d97f895c4a44d317 Mon Sep 17 00:00:00 2001 From: hev Date: Sat, 14 Sep 2024 11:19:34 +0800 Subject: [PATCH] [LoongArch][sanitizer] Fix SC_ADDRERR_{RD,WR} missing in the musl environment (#108557) Fixes #108550 (cherry picked from commit 1825cf28dc83113200b623ebcf063eea35ade79a) --- compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 12 1 file changed, 12 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 648df0c4e5a760..b9b1f496df7c98 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { return Unknown; return esr & ESR_ELx_WNR ? Write : Read; # elif defined(__loongarch__) + // In the musl environment, the Linux kernel uapi sigcontext.h is not + // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros, + // copy them here. The LoongArch Linux kernel uapi is already stable, + // so there's no need to worry about the value changing. +#ifndef SC_ADDRERR_RD + // Address error was due to memory load +# define SC_ADDRERR_RD (1 << 30) +#endif +#ifndef SC_ADDRERR_WR + // Address error was due to memory store +# define SC_ADDRERR_WR (1 << 31) +#endif u32 flags = ucontext->uc_mcontext.__flags; if (flags & SC_ADDRERR_RD) return SignalContext::Read; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/109110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (llvmbot) Changes Backport 1825cf28dc83113200b623ebcf063eea35ade79a Requested by: @heiher --- Full diff: https://github.com/llvm/llvm-project/pull/109110.diff 1 Files Affected: - (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+12) ``diff diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 648df0c4e5a760..b9b1f496df7c98 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { return Unknown; return esr & ESR_ELx_WNR ? Write : Read; # elif defined(__loongarch__) + // In the musl environment, the Linux kernel uapi sigcontext.h is not + // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros, + // copy them here. The LoongArch Linux kernel uapi is already stable, + // so there's no need to worry about the value changing. +#ifndef SC_ADDRERR_RD + // Address error was due to memory load +# define SC_ADDRERR_RD (1 << 30) +#endif +#ifndef SC_ADDRERR_WR + // Address error was due to memory store +# define SC_ADDRERR_WR (1 << 31) +#endif u32 flags = ucontext->uc_mcontext.__flags; if (flags & SC_ADDRERR_RD) return SignalContext::Read; `` https://github.com/llvm/llvm-project/pull/109110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)
Martin =?utf-8?q?Storsjö?= , Martin =?utf-8?q?Storsjö?= Message-ID: In-Reply-To: https://github.com/tru updated https://github.com/llvm/llvm-project/pull/107146 >From a82122d63db137b7210e54b127cc2e45fc31fd69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 3 Sep 2024 22:45:54 +0300 Subject: [PATCH 1/3] [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca. That commit changed the clang-cl options /Zi and /Z7 to be implemented as aliases of -g rather than having separate handling. This had the unintended effect, that when assembling .s files with clang-cl, the /Z7 option (which implies using CodeView debug info) was treated as a -g option, which causes `ClangAs::ConstructJob` to pick up the option as part of `Args.getLastArg(options::OPT_g_Group)`, which sets the `WantDebug` variable. Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or `-gcodeview` options have been set, and if not, we pick the default debug format for the current toolchain. However, in `ClangAs`, if debug info has been enabled, it always adds DWARF debug info. Add similar logic in `ClangAs` - check if the user has explicitly requested either DWARF or CodeView, otherwise look up the toolchain default. If we (either implicitly or explicitly) should be producing CodeView, don't enable the default `ClangAs` DWARF generation. This fixes the issue, where assembling a single `.s` file with clang-cl, with the /Z7 option, causes the file to contain some DWARF sections. This causes the output executable to contain DWARF, in addition to the separate intended main PDB file. By having the output executable contain DWARF sections, LLDB only looks at the (very little) DWARF info in the executable, rather than looking for a separate standalone PDB file. This caused an issue with LLDB's tests, https://github.com/llvm/llvm-project/issues/101710. (cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10) --- clang/lib/Driver/ToolChains/Clang.cpp | 26 ++ clang/test/Driver/debug-options-as.c | 17 - 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 366b147a052bf2..8858c318aba7a1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); + // If a -gdwarf argument appeared, remember it. + bool EmitDwarf = false; + if (const Arg *A = getDwarfNArg(Args)) +EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain()); + + bool EmitCodeView = false; + if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) +EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain()); + + // If the user asked for debug info but did not explicitly specify -gcodeview + // or -gdwarf, ask the toolchain for the default format. + if (!EmitCodeView && !EmitDwarf && WantDebug) { +switch (getToolChain().getDefaultDebugFormat()) { +case llvm::codegenoptions::DIF_CodeView: + EmitCodeView = true; + break; +case llvm::codegenoptions::DIF_DWARF: + EmitDwarf = true; + break; +} + } + + // If the arguments don't imply DWARF, don't emit any debug info here. + if (!EmitDwarf) +WantDebug = false; + llvm::codegenoptions::DebugInfoKind DebugInfoKind = llvm::codegenoptions::NoDebugInfo; diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c index c83c0cb90431d3..3e1ae109711003 100644 --- a/clang/test/Driver/debug-options-as.c +++ b/clang/test/Driver/debug-options-as.c @@ -19,12 +19,27 @@ // GGDB0-NOT: -debug-info-kind= // Check to make sure clang with -g on a .s file gets passed. -// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ +// This requires a target that defaults to DWARF. +// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck %s // // CHECK: "-cc1as" // CHECK: "-debug-info-kind=constructor" +// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't +// trigger producing DWARF output. +// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x assembler %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s +// +// MSVC: "-cc1as" +// MSVC-NOT: "-debug-info-kind=constructor" + +// Check that clang-cl with the -Z7 option works the same, not triggering +// any DWARF output. +// +// RUN: %clang_cl -### -c -Z7 -x assembler %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s + // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer. // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
[llvm-branch-commits] [lldb] release/19.x: [lldb] Fix some tests that fail with system libstdc++ (#106885) (PR #107938)
https://github.com/felipepiovezan approved this pull request. This LGTM! https://github.com/llvm/llvm-project/pull/107938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108787 >From 3541dca143c46f0c775bb0c51c8d562dda322cb2 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 15 Sep 2024 23:06:14 -0400 Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace` This has been moved to `DataLayout`. --- .../llvm/Analysis/TargetTransformInfo.h | 21 --- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 -- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 - llvm/lib/Analysis/TargetTransformInfo.cpp | 4 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 8 --- .../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 .../Transforms/Scalar/InferAddressSpaces.cpp | 2 +- .../AMDGPU/noop-ptrint-pair.ll| 2 +- .../old-pass-regressions-inseltpoison.ll | 2 +- .../AMDGPU/old-pass-regressions.ll| 2 +- .../InferAddressSpaces/AMDGPU/ptrmask.ll | 2 +- 11 files changed, 5 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 3411163549de2f..42d3ee328fc1a5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -451,24 +451,6 @@ class TargetTransformInfo { /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address. bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const; - /// Returns the address space ID for a target's 'flat' address space. Note - /// this is not necessarily the same as addrspace(0), which LLVM sometimes - /// refers to as the generic address space. The flat address space is a - /// generic address space that can be used access multiple segments of memory - /// with different address spaces. Access of a memory location through a - /// pointer with this address space is expected to be legal but slower - /// compared to the same memory location accessed through a pointer with a - /// different address space. - // - /// This is for targets with different pointer representations which can - /// be converted with the addrspacecast instruction. If a pointer is converted - /// to this address space, optimizations should attempt to replace the access - /// with the source address space. - /// - /// \returns ~0u if the target does not have such a flat address space to - /// optimize away. - unsigned getFlatAddressSpace() const; - /// Return any intrinsic address operand indexes which may be rewritten if /// they use a flat address space pointer. /// @@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept { virtual bool isAlwaysUniform(const Value *V) = 0; virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0; - virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; @@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.addrspacesMayAlias(AS0, AS1); } - unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const override { return Impl.collectFlatAddressOperands(OpIndexes, IID); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2819af30cd1704..3902a19a90755a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -115,8 +115,6 @@ class TargetTransformInfoImplBase { return true; } - unsigned getFlatAddressSpace() const { return -1; } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 2f2a6a09ffc44d..ce585dfc7a2f39 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -292,11 +292,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return true; } - unsigned getFlatAddressSpace() { -// Return an invalid address space. -return -1; - } - bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a101..42e3f277c1e291 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -305,10 +305,6
[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713 >From cfce39f2434a0a9b2cab278c909380edef9ce896 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 12 Sep 2024 15:25:43 -0400 Subject: [PATCH] [Attributor] Use more appropriate approach to check flat address space --- llvm/include/llvm/Transforms/IPO/Attributor.h | 3 --- .../Transforms/IPO/AttributorAttributes.cpp | 26 ++- .../Attributor/address_space_info.ll | 4 ++- .../test/Transforms/Attributor/nocapture-1.ll | 4 +-- .../Transforms/Attributor/value-simplify.ll | 3 +-- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 921fe945539510..ee7c930f8a26ac 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6267,9 +6267,6 @@ struct AAAddressSpace : public StateWrapper { return (AA->getIdAddr() == &ID); } - // No address space which indicates the associated value is dead. - static const uint32_t NoAddressSpace = ~0U; - /// Unique ID (due to the unique address) static const char ID; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 217c7cccb5775a..642aeae3c12783 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); -if (getAssociatedType()->getPointerAddressSpace()) + +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +if (FlatAS == DataLayout::AS_INVALID) { + indicatePessimisticFixpoint(); + return; +} + +unsigned AS = getAssociatedType()->getPointerAddressSpace(); +if (AS != FlatAS) { + [[maybe_unused]] bool R = takeAddressSpace(AS); + assert(R && "The take should happen"); indicateOptimisticFixpoint(); +} } ChangeStatus updateImpl(Attributor &A) override { @@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); -if (getAddressSpace() == NoAddressSpace || +if (getAddressSpace() == DataLayout::AS_INVALID || getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +Value *AssociatedValue = &getAssociatedValue(); +Value *OriginalValue = peelAddrspacecast(AssociatedValue); + PointerType *NewPtrTy = PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = @@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (!isValidState()) return "addrspace()"; return "addrspace(" + - (AssumedAddressSpace == NoAddressSpace + (AssumedAddressSpace == DataLayout::AS_INVALID ? "none" : std::to_string(AssumedAddressSpace)) + ")"; } private: - uint32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = DataLayout::AS_INVALID; bool takeAddressSpace(uint32_t AS) { -if (AssumedAddressSpace == NoAddressSpace) { +if (AssumedAddressSpace == DataLayout::AS_INVALID) { AssumedAddressSpace = AS; return true; } diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll index 73dd93c55b819b..0c8b06aca4 100644 --- a/llvm/test/Transforms/Attributor/address_space_info.ll +++ b/llvm/test/Transforms/Attributor/address_space_info.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK + +; REQUIRES: amdgpu-registered-target @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4 diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 3401ddfdd7d758..de5f31e470edfc 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocap
[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258 >From 081015ee9e9988d7340eff720af47e9292fb3d1c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 11 Sep 2024 12:23:32 -0400 Subject: [PATCH] [Attributor] Take the address space from addrspacecast directly If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal. --- .../Transforms/IPO/AttributorAttributes.cpp | 62 ++- llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 33 ++ 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 642aeae3c12783..b67d95bc31110d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12587,16 +12587,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } ChangeStatus updateImpl(Attributor &A) override { +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +assert(FlatAS != DataLayout::AS_INVALID); uint32_t OldAddressSpace = AssumedAddressSpace; -auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, -DepClassTy::REQUIRED); -auto Pred = [&](Value &Obj) { + +auto CheckAddressSpace = [&](Value &Obj) { if (isa(&Obj)) return true; + // If an argument in flat address space only has addrspace cast uses, and + // those casts are same, then we take the dst addrspace. + if (auto *Arg = dyn_cast(&Obj)) { +if (FlatAS != DataLayout::AS_INVALID && +Arg->getType()->getPointerAddressSpace() == FlatAS) { + unsigned CastAddrSpace = FlatAS; + for (auto *U : Arg->users()) { +auto *ASCI = dyn_cast(U); +if (!ASCI) + return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); +if (CastAddrSpace != FlatAS && +CastAddrSpace != ASCI->getDestAddressSpace()) + return false; +CastAddrSpace = ASCI->getDestAddressSpace(); + } + if (CastAddrSpace != FlatAS) +return takeAddressSpace(CastAddrSpace); +} + } return takeAddressSpace(Obj.getType()->getPointerAddressSpace()); }; -if (!AUO->forallUnderlyingObjects(Pred)) +auto *AUO = A.getOrCreateAAFor(getIRPosition(), this, +DepClassTy::REQUIRED); +if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) return indicatePessimisticFixpoint(); return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED @@ -12605,17 +12627,22 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { -if (getAddressSpace() == DataLayout::AS_INVALID || -getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) +unsigned NewAS = getAddressSpace(); + +if (NewAS == DataLayout::AS_INVALID || +NewAS == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; +unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); +assert(FlatAS != DataLayout::AS_INVALID); + Value *AssociatedValue = &getAssociatedValue(); -Value *OriginalValue = peelAddrspacecast(AssociatedValue); +Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS); PointerType *NewPtrTy = -PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); +PointerType::get(getAssociatedType()->getContext(), NewAS); bool UseOriginalValue = -OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace(); +OriginalValue->getType()->getPointerAddressSpace() == NewAS; bool Changed = false; @@ -12675,12 +12702,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { return AssumedAddressSpace == AS; } - static Value *peelAddrspacecast(Value *V) { -if (auto *I = dyn_cast(V)) - return peelAddrspacecast(I->getPointerOperand()); + static Value *peelAddrspacecast(Value *V, unsigned FlatAS) { +if (auto *I = dyn_cast(V)) { + assert(I->getSrcAddressSpace() != FlatAS && + "there should not be flat AS -> non-flat AS"); + return I->getPointerOperand(); +} if (auto *C = dyn_cast(V)) - if (C->getOpcode() == Instruction::AddrSpaceCast) -return peelAddrspacecast(C->getOperand(0)); + if (C->getOpcode() == Instruction::
[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)
heiher wrote: The purpose of cherry-picking this patch into the 19.x release branch is to resolve a build failure. This enables Rust `loongarch64-unknown-linux-musl` target to support sanitizers starting from LLVM 19.x. This patch is a straightforward fix for a build issue and does not introduce any functional changes, so there is no apparent risk involved. https://github.com/llvm/llvm-project/pull/109110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 1f67363 - Revert "[flang][runtime] Use cuda::std::complex in F18 runtime CUDA build. (#…"
Author: Slava Zakharin Date: 2024-09-18T11:22:08-07:00 New Revision: 1f6736320fde2fb17f9b3c74b571b620c5fee72e URL: https://github.com/llvm/llvm-project/commit/1f6736320fde2fb17f9b3c74b571b620c5fee72e DIFF: https://github.com/llvm/llvm-project/commit/1f6736320fde2fb17f9b3c74b571b620c5fee72e.diff LOG: Revert "[flang][runtime] Use cuda::std::complex in F18 runtime CUDA build. (#…" This reverts commit be187a6812fb6e8984886c28a502ec69bdaa4ad4. Added: Modified: flang/include/flang/Runtime/cpp-type.h flang/include/flang/Runtime/matmul-instances.inc flang/include/flang/Runtime/numeric.h flang/include/flang/Runtime/reduce.h flang/include/flang/Runtime/reduction.h flang/include/flang/Runtime/transformational.h flang/runtime/complex-powi.cpp flang/runtime/complex-reduction.c flang/runtime/dot-product.cpp flang/runtime/extrema.cpp flang/runtime/matmul-transpose.cpp flang/runtime/matmul.cpp flang/runtime/numeric.cpp flang/runtime/product.cpp flang/runtime/random.cpp flang/runtime/reduce.cpp flang/runtime/reduction-templates.h flang/runtime/sum.cpp flang/runtime/transformational.cpp flang/unittests/Runtime/Numeric.cpp flang/unittests/Runtime/Transformational.cpp Removed: flang/include/flang/Common/float80.h flang/include/flang/Runtime/complex.h diff --git a/flang/include/flang/Common/float80.h b/flang/include/flang/Common/float80.h deleted file mode 100644 index 1838f7b13c8bb2..00 --- a/flang/include/flang/Common/float80.h +++ /dev/null @@ -1,43 +0,0 @@ -/*===-- flang/Common/float80.h --*- C -*-=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===--===*/ - -/* This header is usable in both C and C++ code. - * Isolates build compiler checks to determine if the 80-bit - * floating point format is supported via a particular C type. - * It defines CFloat80Type and CppFloat80Type aliases for this - * C type. - */ - -#ifndef FORTRAN_COMMON_FLOAT80_H_ -#define FORTRAN_COMMON_FLOAT80_H_ - -#include "api-attrs.h" -#include - -#if LDBL_MANT_DIG == 64 -#undef HAS_FLOAT80 -#define HAS_FLOAT80 1 -#endif - -#if defined(RT_DEVICE_COMPILATION) && defined(__CUDACC__) -/* - * 'long double' is treated as 'double' in the CUDA device code, - * and there is no support for 80-bit floating point format. - * This is probably true for most offload devices, so RT_DEVICE_COMPILATION - * check should be enough. For the time being, guard it with __CUDACC__ - * as well. - */ -#undef HAS_FLOAT80 -#endif - -#if HAS_FLOAT80 -typedef long double CFloat80Type; -typedef long double CppFloat80Type; -#endif - -#endif /* FORTRAN_COMMON_FLOAT80_H_ */ diff --git a/flang/include/flang/Runtime/complex.h b/flang/include/flang/Runtime/complex.h deleted file mode 100644 index b7ad1376bffbf1..00 --- a/flang/include/flang/Runtime/complex.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- include/flang/Runtime/complex.h -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// - -// A single way to expose C++ complex class in files that can be used -// in F18 runtime build. With inclusion of this file std::complex -// and the related names become available, though, they may correspond -// to alternative definitions (e.g. from cuda::std namespace). - -#ifndef FORTRAN_RUNTIME_COMPLEX_H -#define FORTRAN_RUNTIME_COMPLEX_H - -#if RT_USE_LIBCUDACXX -#include -namespace Fortran::runtime::rtcmplx { -using cuda::std::complex; -using cuda::std::conj; -} // namespace Fortran::runtime::rtcmplx -#else // !RT_USE_LIBCUDACXX -#include -namespace Fortran::runtime::rtcmplx { -using std::complex; -using std::conj; -} // namespace Fortran::runtime::rtcmplx -#endif // !RT_USE_LIBCUDACXX - -#endif // FORTRAN_RUNTIME_COMPLEX_H diff --git a/flang/include/flang/Runtime/cpp-type.h b/flang/include/flang/Runtime/cpp-type.h index aef0fbd7ede586..fe21dd544cf7d8 100644 --- a/flang/include/flang/Runtime/cpp-type.h +++ b/flang/include/flang/Runtime/cpp-type.h @@ -13,9 +13,8 @@ #include "flang/Common/Fortran.h" #include "flang/Common/float128.h" -#include "flang/Common/float80.h" #include "flang/Common/uint128.h" -#include "flang/Runtime/complex.h" +#include #include #if __cplusplus >= 202302 #include @@ -71,9 +70,9 @@ template <> struct CppTypeForHelper { using type = double; #endif }; -#if HAS_FLOAT80 +#if LDBL_MANT_DIG == 6
[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/109125 Backport a111f9119a5ec77c19a514ec09454218f739454f 0f47e3aebdd2a4a938468a272ea4224552dbf176 Requested by: @heiher >From 67066255ada3b70def779630a473373aa36cb8df Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 10 Sep 2024 09:19:39 +0800 Subject: [PATCH 1/2] [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit constant. It will produce wrong result when `X == 2`: https://alive2.llvm.org/ce/z/pzfGZZ This patch adds additional `sexti32` checks to operands of `PatGprGpr_32`. Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp Fix #107414. (cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f) --- .../Target/LoongArch/LoongArchInstrInfo.td| 5 +- .../ir-instruction/sdiv-udiv-srem-urem.ll | 67 ++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ef647a42778737..339d50bd819217 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>; /// Generic pattern classes +def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32); +}]>; class PatGprGpr : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32 -: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; +: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>; class PatGpr : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index ab3eec240db3c1..c22acdb4969071 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64: # %bb.0: # %entry ; LA64-NEXT:addi.w $a1, $a1, 0 ; LA64-NEXT:addi.w $a0, $a0, 0 -; LA64-NEXT:div.w $a0, $a0, $a1 +; LA64-NEXT:div.d $a0, $a0, $a1 +; LA64-NEXT:addi.w $a0, $a0, 0 ; LA64-NEXT:ret ; ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32: @@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT:addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT:addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT:div.w $a0, $a0, $a1 +; LA64-TRAP-NEXT:div.d $a0, $a0, $a1 ; LA64-TRAP-NEXT:bnez $a1, .LBB5_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT:break 7 ; LA64-TRAP-NEXT: .LBB5_2: # %entry +; LA64-TRAP-NEXT:addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT:ret entry: %r = sdiv i32 %a, %b @@ -1151,3 +1153,64 @@ entry: %r = urem i64 %a, %b ret i64 %r } + +define signext i32 @pr107414(i32 signext %x) { +; LA32-LABEL: pr107414: +; LA32: # %bb.0: # %entry +; LA32-NEXT:addi.w $sp, $sp, -16 +; LA32-NEXT:.cfi_def_cfa_offset 16 +; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT:.cfi_offset 1, -4 +; LA32-NEXT:move $a2, $a0 +; LA32-NEXT:srai.w $a3, $a0, 31 +; LA32-NEXT:lu12i.w $a0, -266831 +; LA32-NEXT:ori $a0, $a0, 3337 +; LA32-NEXT:move $a1, $zero +; LA32-NEXT:bl %plt(__divdi3) +; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT:addi.w $sp, $sp, 16 +; LA32-NEXT:ret +; +; LA64-LABEL: pr107414: +; LA64: # %bb.0: # %entry +; LA64-NEXT:lu12i.w $a1, -266831 +; LA64-NEXT:ori $a1, $a1, 3337 +; LA64-NEXT:lu32i.d $a1, 0 +; LA64-NEXT:div.d $a0, $a1, $a0 +; LA64-NEXT:addi.w $a0, $a0, 0 +; LA64-NEXT:ret +; +; LA32-TRAP-LABEL: pr107414: +; LA32-TRAP: # %bb.0: # %entry +; LA32-TRAP-NEXT:addi.w $sp, $sp, -16 +; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16 +; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-TRAP-NEXT:.cfi_offset 1, -4 +; LA32-TRAP-NEXT:move $a2, $a0 +; LA32-TRAP-NEXT:srai.w $a3, $a0, 31 +; LA32-TRAP-NEXT:lu12i.w $a0, -266831 +; LA32-TRAP-NEXT:ori $a0, $a0, 3337 +; LA32-TRAP-NEXT:move $a1, $zero +; LA32-TRAP-NEXT:bl %plt(__divdi3) +; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-TRAP-NEXT:addi.w $sp, $sp, 16 +; LA32-TRAP-NEXT:ret +; +; LA64-TRAP-LABEL: pr107414: +; LA64-TRAP: # %bb.0: # %entry +; LA64-TRAP-NEXT:lu12i.w $a1, -266831 +; LA64-TRAP-NEXT:ori $a1, $a1, 3337 +; LA64-TRAP-NEXT:lu32i.d $a1, 0 +; LA64-TRAP-NEXT:div.d $
[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/109125 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)
llvmbot wrote: @wangleiat @SixWeining What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/109125 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)
llvmbot wrote: @llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) Changes Backport a111f9119a5ec77c19a514ec09454218f739454f 0f47e3aebdd2a4a938468a272ea4224552dbf176 Requested by: @heiher --- Full diff: https://github.com/llvm/llvm-project/pull/109125.diff 3 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+4-1) - (modified) llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp (+15) - (modified) llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll (+61) ``diff diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ef647a42778737..339d50bd819217 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>; /// Generic pattern classes +def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32); +}]>; class PatGprGpr : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32 -: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; +: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>; class PatGpr : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp index abac69054f3b91..ab90409fdf47d0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp @@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, break; } return false; +// If all incoming values are sign-extended and all users only use +// the lower 32 bits, then convert them to W versions. +case LoongArch::DIV_D: { + if (!AddRegToWorkList(MI->getOperand(1).getReg())) +return false; + if (!AddRegToWorkList(MI->getOperand(2).getReg())) +return false; + if (hasAllWUsers(*MI, ST, MRI)) { +FixableDef.insert(MI); +break; + } + return false; +} } } @@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) { return LoongArch::ADDI_W; case LoongArch::ADD_D: return LoongArch::ADD_W; + case LoongArch::DIV_D: +return LoongArch::DIV_W; case LoongArch::LD_D: case LoongArch::LD_WU: return LoongArch::LD_W; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index ab3eec240db3c1..c5af79157eaadc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -1151,3 +1151,64 @@ entry: %r = urem i64 %a, %b ret i64 %r } + +define signext i32 @pr107414(i32 signext %x) { +; LA32-LABEL: pr107414: +; LA32: # %bb.0: # %entry +; LA32-NEXT:addi.w $sp, $sp, -16 +; LA32-NEXT:.cfi_def_cfa_offset 16 +; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT:.cfi_offset 1, -4 +; LA32-NEXT:move $a2, $a0 +; LA32-NEXT:srai.w $a3, $a0, 31 +; LA32-NEXT:lu12i.w $a0, -266831 +; LA32-NEXT:ori $a0, $a0, 3337 +; LA32-NEXT:move $a1, $zero +; LA32-NEXT:bl %plt(__divdi3) +; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT:addi.w $sp, $sp, 16 +; LA32-NEXT:ret +; +; LA64-LABEL: pr107414: +; LA64: # %bb.0: # %entry +; LA64-NEXT:lu12i.w $a1, -266831 +; LA64-NEXT:ori $a1, $a1, 3337 +; LA64-NEXT:lu32i.d $a1, 0 +; LA64-NEXT:div.d $a0, $a1, $a0 +; LA64-NEXT:addi.w $a0, $a0, 0 +; LA64-NEXT:ret +; +; LA32-TRAP-LABEL: pr107414: +; LA32-TRAP: # %bb.0: # %entry +; LA32-TRAP-NEXT:addi.w $sp, $sp, -16 +; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16 +; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-TRAP-NEXT:.cfi_offset 1, -4 +; LA32-TRAP-NEXT:move $a2, $a0 +; LA32-TRAP-NEXT:srai.w $a3, $a0, 31 +; LA32-TRAP-NEXT:lu12i.w $a0, -266831 +; LA32-TRAP-NEXT:ori $a0, $a0, 3337 +; LA32-TRAP-NEXT:move $a1, $zero +; LA32-TRAP-NEXT:bl %plt(__divdi3) +; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-TRAP-NEXT:addi.w $sp, $sp, 16 +; LA32-TRAP-NEXT:ret +; +; LA64-TRAP-LABEL: pr107414: +; LA64-TRAP: # %bb.0: # %entry +; LA64-TRAP-NEXT:lu12i.w $a1, -266831 +; LA64-TRAP-NEXT:ori $a1, $a1, 3337 +; LA64-TRAP-NEXT:lu32i.d $a1, 0 +; LA64-TRAP-NEXT:div.d $a1, $a1, $a0 +; LA64-TRAP-NEXT:bnez $a0, .LBB32_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry +; LA64-TRAP-NEXT:break 7 +; LA64-TRAP-NEXT: .LBB32_2: # %entry +; LA64-TRAP-NEXT:addi.w $a0, $a1, 0 +; LA64-TRAP-NEXT:ret +entry: + %conv = sext i32 %x to i64 + %div = sdiv i64 3202030857, %conv + %c
[llvm-branch-commits] [llvm] [ADT] Use range-based helper functions in SmallSet (PR #108585)
@@ -234,19 +225,12 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); -return Set.find(V) != Set.end(); + return llvm::is_contained(Vector, V); +return llvm::is_contained(Set, V); nikic wrote: If you want to have an abstraction over contains() and find() != end(), I'd suggest adding another helper, which does not also include the linear scan case. Though I don't think this is really worthwhile, as the existing find() != end() code works fine. https://github.com/llvm/llvm-project/pull/108585 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/107146 >From 64075837b5532108a1fe96a5b158feb7a9025694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 3 Sep 2024 22:45:54 +0300 Subject: [PATCH] [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca. That commit changed the clang-cl options /Zi and /Z7 to be implemented as aliases of -g rather than having separate handling. This had the unintended effect, that when assembling .s files with clang-cl, the /Z7 option (which implies using CodeView debug info) was treated as a -g option, which causes `ClangAs::ConstructJob` to pick up the option as part of `Args.getLastArg(options::OPT_g_Group)`, which sets the `WantDebug` variable. Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or `-gcodeview` options have been set, and if not, we pick the default debug format for the current toolchain. However, in `ClangAs`, if debug info has been enabled, it always adds DWARF debug info. Add similar logic in `ClangAs` - check if the user has explicitly requested either DWARF or CodeView, otherwise look up the toolchain default. If we (either implicitly or explicitly) should be producing CodeView, don't enable the default `ClangAs` DWARF generation. This fixes the issue, where assembling a single `.s` file with clang-cl, with the /Z7 option, causes the file to contain some DWARF sections. This causes the output executable to contain DWARF, in addition to the separate intended main PDB file. By having the output executable contain DWARF sections, LLDB only looks at the (very little) DWARF info in the executable, rather than looking for a separate standalone PDB file. This caused an issue with LLDB's tests, https://github.com/llvm/llvm-project/issues/101710. (cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10) --- clang/lib/Driver/ToolChains/Clang.cpp | 26 ++ clang/test/Driver/debug-options-as.c | 17 - 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 366b147a052bf2..8858c318aba7a1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); + // If a -gdwarf argument appeared, remember it. + bool EmitDwarf = false; + if (const Arg *A = getDwarfNArg(Args)) +EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain()); + + bool EmitCodeView = false; + if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) +EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain()); + + // If the user asked for debug info but did not explicitly specify -gcodeview + // or -gdwarf, ask the toolchain for the default format. + if (!EmitCodeView && !EmitDwarf && WantDebug) { +switch (getToolChain().getDefaultDebugFormat()) { +case llvm::codegenoptions::DIF_CodeView: + EmitCodeView = true; + break; +case llvm::codegenoptions::DIF_DWARF: + EmitDwarf = true; + break; +} + } + + // If the arguments don't imply DWARF, don't emit any debug info here. + if (!EmitDwarf) +WantDebug = false; + llvm::codegenoptions::DebugInfoKind DebugInfoKind = llvm::codegenoptions::NoDebugInfo; diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c index c83c0cb90431d3..cb0492177ff47a 100644 --- a/clang/test/Driver/debug-options-as.c +++ b/clang/test/Driver/debug-options-as.c @@ -19,12 +19,27 @@ // GGDB0-NOT: -debug-info-kind= // Check to make sure clang with -g on a .s file gets passed. -// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ +// This requires a target that defaults to DWARF. +// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck %s // // CHECK: "-cc1as" // CHECK: "-debug-info-kind=constructor" +// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't +// trigger producing DWARF output. +// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x assembler %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s +// +// MSVC: "-cc1as" +// MSVC-NOT: "-debug-info-kind=constructor" + +// Check that clang-cl with the -Z7 option works the same, not triggering +// any DWARF output. +// +// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s + // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer. // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck -check-prefix=P %s __
[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)
github-actions[bot] wrote: @mstorsjo (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/107146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 6407583 - [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686)
Author: Martin Storsjö Date: 2024-09-18T16:01:50+02:00 New Revision: 64075837b5532108a1fe96a5b158feb7a9025694 URL: https://github.com/llvm/llvm-project/commit/64075837b5532108a1fe96a5b158feb7a9025694 DIFF: https://github.com/llvm/llvm-project/commit/64075837b5532108a1fe96a5b158feb7a9025694.diff LOG: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca. That commit changed the clang-cl options /Zi and /Z7 to be implemented as aliases of -g rather than having separate handling. This had the unintended effect, that when assembling .s files with clang-cl, the /Z7 option (which implies using CodeView debug info) was treated as a -g option, which causes `ClangAs::ConstructJob` to pick up the option as part of `Args.getLastArg(options::OPT_g_Group)`, which sets the `WantDebug` variable. Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or `-gcodeview` options have been set, and if not, we pick the default debug format for the current toolchain. However, in `ClangAs`, if debug info has been enabled, it always adds DWARF debug info. Add similar logic in `ClangAs` - check if the user has explicitly requested either DWARF or CodeView, otherwise look up the toolchain default. If we (either implicitly or explicitly) should be producing CodeView, don't enable the default `ClangAs` DWARF generation. This fixes the issue, where assembling a single `.s` file with clang-cl, with the /Z7 option, causes the file to contain some DWARF sections. This causes the output executable to contain DWARF, in addition to the separate intended main PDB file. By having the output executable contain DWARF sections, LLDB only looks at the (very little) DWARF info in the executable, rather than looking for a separate standalone PDB file. This caused an issue with LLDB's tests, https://github.com/llvm/llvm-project/issues/101710. (cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10) Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/debug-options-as.c Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 366b147a052bf2..8858c318aba7a1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); + // If a -gdwarf argument appeared, remember it. + bool EmitDwarf = false; + if (const Arg *A = getDwarfNArg(Args)) +EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain()); + + bool EmitCodeView = false; + if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) +EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain()); + + // If the user asked for debug info but did not explicitly specify -gcodeview + // or -gdwarf, ask the toolchain for the default format. + if (!EmitCodeView && !EmitDwarf && WantDebug) { +switch (getToolChain().getDefaultDebugFormat()) { +case llvm::codegenoptions::DIF_CodeView: + EmitCodeView = true; + break; +case llvm::codegenoptions::DIF_DWARF: + EmitDwarf = true; + break; +} + } + + // If the arguments don't imply DWARF, don't emit any debug info here. + if (!EmitDwarf) +WantDebug = false; + llvm::codegenoptions::DebugInfoKind DebugInfoKind = llvm::codegenoptions::NoDebugInfo; diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c index c83c0cb90431d3..cb0492177ff47a 100644 --- a/clang/test/Driver/debug-options-as.c +++ b/clang/test/Driver/debug-options-as.c @@ -19,12 +19,27 @@ // GGDB0-NOT: -debug-info-kind= // Check to make sure clang with -g on a .s file gets passed. -// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ +// This requires a target that defaults to DWARF. +// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck %s // // CHECK: "-cc1as" // CHECK: "-debug-info-kind=constructor" +// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't +// trigger producing DWARF output. +// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x assembler %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s +// +// MSVC: "-cc1as" +// MSVC-NOT: "-debug-info-kind=constructor" + +// Check that clang-cl with the -Z7 option works the same, not triggering +// any DWARF output. +// +// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s + // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer. // RUN: %clang -### -c -integrated-as
[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/107146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 4cabae1 - Revert "[clang] Increase VecLib bitfield size to 4 bits in CodeGenOptions.def…"
Author: Aaron Ballman Date: 2024-09-18T11:33:44-04:00 New Revision: 4cabae1e59f75c9be5a1156cc785c93ece49c531 URL: https://github.com/llvm/llvm-project/commit/4cabae1e59f75c9be5a1156cc785c93ece49c531 DIFF: https://github.com/llvm/llvm-project/commit/4cabae1e59f75c9be5a1156cc785c93ece49c531.diff LOG: Revert "[clang] Increase VecLib bitfield size to 4 bits in CodeGenOptions.def…" This reverts commit 475ceca859233b387c22f13ecef581158ef36346. Added: Modified: clang/include/clang/Basic/CodeGenOptions.def Removed: clang/unittests/CodeGen/AllLibrariesFit.cpp clang/unittests/CodeGen/EncodingDecodingTest.cpp clang/unittests/CodeGen/SimulatedOverflowTest.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index b78ae61e6509ea..b600198998d85b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -375,18 +375,8 @@ ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NormalInlining) /// The maximum stack size a function can have to be considered for inlining. VALUE_CODEGENOPT(InlineMaxStackSize, 32, UINT_MAX) -// Define the number of bits required for the VecLib enum -#define VECLIB_BIT_COUNT (llvm::countPopulation(llvm::driver::VectorLibrary::MaxLibrary)) - -// Ensure the VecLib bitfield has enough space for future vector libraries. -// The number of bits is determined automatically based on the number of enum values. -static_assert(static_cast(llvm::driver::VectorLibrary::MaxLibrary) <= (1 << VECLIB_BIT_COUNT), - "VecLib bitfield size is too small to accommodate all vector libraries."); - -// VecLib definition in CodeGenOptions.def -ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, VECLIB_BIT_COUNT, llvm::driver::VectorLibrary::NoLibrary) - -#undef VECLIB_BIT_COUNT +// Vector functions library to use. +ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, llvm::driver::VectorLibrary::NoLibrary) /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) diff --git a/clang/unittests/CodeGen/AllLibrariesFit.cpp b/clang/unittests/CodeGen/AllLibrariesFit.cpp deleted file mode 100644 index dfe63b557729ee..00 --- a/clang/unittests/CodeGen/AllLibrariesFit.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "clang/Basic/CodeGenOptions.h" -#include "llvm/Driver/Options.h" -#include "gtest/gtest.h" - -TEST(VecLibBitfieldTest, AllLibrariesFit) { - // We expect that all vector libraries fit in the bitfield size - EXPECT_LE(static_cast(llvm::driver::VectorLibrary::MaxLibrary), -(1 << VECLIB_BIT_COUNT)) - << "VecLib bitfield size is too small!"; - } diff --git a/clang/unittests/CodeGen/EncodingDecodingTest.cpp b/clang/unittests/CodeGen/EncodingDecodingTest.cpp deleted file mode 100644 index 67c89ef07c428b..00 --- a/clang/unittests/CodeGen/EncodingDecodingTest.cpp +++ /dev/null @@ -1,17 +0,0 @@ -TEST(VecLibBitfieldTest, EncodingDecodingTest) { - clang::CodeGenOptions Opts; - - // Test encoding and decoding for each vector library - for (int i = static_cast(llvm::driver::VectorLibrary::Accelerate); - i <= static_cast(llvm::driver::VectorLibrary::MaxLibrary); ++i) { - -Opts.VecLib = static_cast(i); - -// Encode and then decode -llvm::driver::VectorLibrary decodedValue = -static_cast(Opts.VecLib); - -EXPECT_EQ(decodedValue, Opts.VecLib) -<< "Encoding/Decoding failed for vector library " << i; - } -} diff --git a/clang/unittests/CodeGen/SimulatedOverflowTest.cpp b/clang/unittests/CodeGen/SimulatedOverflowTest.cpp deleted file mode 100644 index acfeaf7498b6d0..00 --- a/clang/unittests/CodeGen/SimulatedOverflowTest.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Simulate the addition of a new library without increasing the bitfield size -enum class SimulatedVectorLibrary { - Accelerate = 0, - LIBMVEC, - MASSV, - SVML, - SLEEF, - Darwin_libsystem_m, - ArmPL, - AMDLIBM, - NoLibrary, - // Simulate new addition - NewLibrary, - MaxLibrary -}; - -#define SIMULATED_VECLIB_BIT_COUNT \ - 4 // The current bitfield size (should be 4 for 9 options) - -TEST(VecLibBitfieldTest, SimulatedOverflowTest) { - // Simulate the addition of a new library and check if the bitfield size is - // sufficient - EXPECT_LE(static_cast(SimulatedVectorLibrary::MaxLibrary), -(1 << SIMULATED_VECLIB_BIT_COUNT)) - << "Simulated VecLib bitfield size overflow!"; -} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
yxd-ym wrote: > Do I understand correctly that a side effect of this change is to change the > half float ABI on loongarch from passing via FP regs to passing via GPR regs? https://github.com/llvm/llvm-project/blob/43c9203d4946b7911d2ba69369717979900d7bc2/llvm/lib/CodeGen/TargetLoweringBase.cpp#L1356-L1374 >From the logic above, the answer seems to be yes. Because with this patch, the >following functions' return values for loongarch are - `softPromoteHalfType() => true` - `useFPRegsForHalfType() => false` https://github.com/llvm/llvm-project/blob/43c9203d4946b7911d2ba69369717979900d7bc2/llvm/include/llvm/CodeGen/TargetLowering.h#L519-L530 https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] test2 (PR #109137)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/109137 None >From 32ae0b07276f7ccbdc5dd6675e0c46b507625449 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 18 Sep 2024 06:05:41 -0700 Subject: [PATCH] test2 --- llvm/lib/LTO/LTO.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 9a01edd70e08c9..d33815ff704128 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1371,6 +1371,7 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { /// This class defines the interface to the ThinLTO backend. /// Test +/// Test2 class lto::ThinBackendProc { protected: const Config &Conf; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -722,6 +770,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } } + if (opts::StaleMatchingWithPseudoProbes) { +const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); +assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); +for (const MCDecodedPseudoProbeInlineTree &TopLev : + Decoder->getDummyInlineRoot().getChildren()) + TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; + } + wlei-llvm wrote: How about moving this code near to where it's used, i.e the `matchWeightsByHashes`. It seems the ` ... BC.getPseudoProbeDecoder() ...` can be shared there. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -0,0 +1,62 @@ +## Tests stale block matching with pseudo probes. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile --profile-use-pseudo-probes --stale-matching-with-block-pseudo-probes 2>&1 | FileCheck %s + +# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale) + +#--- main.s + .text + .globl main# -- Begin function main + .p2align4, 0x90 + .type main,@function +main: # @main +# %bb.0: + pushq %rbp + movq%rsp, %rbp + movl$0, -4(%rbp) + .pseudoprobe15822663052811949562 1 0 0 main wlei-llvm wrote: I think this comment is still valid? could you add the testcase to check the feature of inline tree matching? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -722,12 +722,28 @@ class BinaryContext { /// Stats for stale profile matching: /// the total number of basic blocks in the profile uint32_t NumStaleBlocks{0}; -/// the number of matched basic blocks -uint32_t NumMatchedBlocks{0}; +/// the number of exactly matched basic blocks +uint32_t NumExactMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; -/// the count of matched samples -uint64_t MatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t ExactMatchedSampleCount{0}; +/// the count of exactly matched samples wlei-llvm wrote: typo: `exactly` --> `loosely` https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : wlei-llvm wrote: Is it possible to use `find` instead of the linear scanning for the callsite query? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -266,6 +305,69 @@ class StaleMatcher { } return BestBlock; } + + /// Matches a profile block with an binary block based on pseudo probes. + /// Returns the best matching block (or nullptr) and whether the match is + /// unambiguous. + std::pair matchWithPseudoProbes( + const ArrayRef BlockPseudoProbes, + const ArrayRef InlineTree) const { +if (!opts::StaleMatchingWithPseudoProbes) + return {nullptr, false}; + +DenseMap FlowBlockMatchCount; + +auto match = [&](uint32_t NodeId, uint64_t ProbeId) -> const FlowBlock * { + const MCDecodedPseudoProbeInlineTree *Node = + InlineTreeNodeMap.getInlineTreeNode(NodeId); + if (!Node) +return nullptr; + const MCDecodedPseudoProbe *BinaryProbe = nullptr; + for (const MCDecodedPseudoProbe &Probe : Node->getProbes()) { +if (Probe.getIndex() != ProbeId) wlei-llvm wrote: nit: just for the readability, is it possible to add the `Binary` or `Profile` prefix to the variable names, especially when they are used closely. Same to other places like `Node/Tree/Probe/...` but only when you think it's not too verbose:) https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + class InlineTreeNodeMapTy { +DenseMap Map; wlei-llvm wrote: add comments to explain the key and the value of the map. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -266,6 +305,69 @@ class StaleMatcher { } return BestBlock; } + + /// Matches a profile block with an binary block based on pseudo probes. wlei-llvm wrote: nit: `an` -> `a` https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + class InlineTreeNodeMapTy { +DenseMap Map; + +void mapInlineTreeNode(uint32_t ProfileNode, + const MCDecodedPseudoProbeInlineTree *BinaryNode) { + auto Res = Map.try_emplace(ProfileNode, BinaryNode); wlei-llvm wrote: nit: perhaps use `[[maybe_unused]]` to avoid the `(void)Res;` line https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + class InlineTreeNodeMapTy { +DenseMap Map; + +void mapInlineTreeNode(uint32_t ProfileNode, + const MCDecodedPseudoProbeInlineTree *BinaryNode) { + auto Res = Map.try_emplace(ProfileNode, BinaryNode); + assert(Res.second && + "Duplicate mapping from profile node index to binary inline tree"); + (void)Res; +} + + public: +/// Returns matched InlineTree * for a given profile inline_tree_id. +const MCDecodedPseudoProbeInlineTree * +getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const { + auto It = Map.find(ProfileInlineTreeNodeId); + if (It == Map.end()) +return nullptr; + return It->second; +} + +// Match up YAML inline tree with binary inline tree. +// \p GetRootCallback is invoked for matching up the first YAML inline tree +// node and has the following signature: +// const MCDecodedPseudoProbeInlineTree *GetRootCallback(uint64_t RootGUID) +void matchInlineTrees( wlei-llvm wrote: Maybe rename to `readAndMatchInlineTrees` or `buildAndMatchingInlinTrees`? I see a big part of the code is to read and build the trees from yaml. Or maybe split it into two functions? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + class InlineTreeNodeMapTy { +DenseMap Map; + +void mapInlineTreeNode(uint32_t ProfileNode, + const MCDecodedPseudoProbeInlineTree *BinaryNode) { + auto Res = Map.try_emplace(ProfileNode, BinaryNode); + assert(Res.second && + "Duplicate mapping from profile node index to binary inline tree"); + (void)Res; +} + + public: +/// Returns matched InlineTree * for a given profile inline_tree_id. +const MCDecodedPseudoProbeInlineTree * +getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const { + auto It = Map.find(ProfileInlineTreeNodeId); + if (It == Map.end()) +return nullptr; + return It->second; +} + +// Match up YAML inline tree with binary inline tree. wlei-llvm wrote: Could you add more comments to explain the output of this function and how it will be used for the matching later? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); wlei-llvm wrote: nit: add assertion msg https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + class InlineTreeNodeMapTy { +DenseMap Map; + +void mapInlineTreeNode(uint32_t ProfileNode, wlei-llvm wrote: nit: `ProfileNode` --> `ProfileNodeIdx` https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -482,6 +584,30 @@ matchWeightsByHashes(BinaryContext &BC, << Twine::utohexstr(BB->getHash()) << "\n"); } StaleMatcher Matcher; + // Collects function pseudo probes for use in the StaleMatcher. + if (opts::StaleMatchingWithPseudoProbes) { +const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); +assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); +const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); +const uint64_t FuncAddr = BF.getAddress(); +auto GetTopLevelNodeByGUID = wlei-llvm wrote: Can we move this into the `matchInlineTrees`? I didn't find any other usages in this function. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : + Parent->getChildren()) { +if (Child.Guid == GUID) { + if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) +Cur = &Child; + break; +} + } +} +if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash) wlei-llvm wrote: Why needs to meet this `Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash`, should this be an assertion? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/109185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin ready_for_review https://github.com/llvm/llvm-project/pull/109185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin ready_for_review https://github.com/llvm/llvm-project/pull/109184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) Changes The `step` instrumentation shouldn't be treated, during use, like an `increment`. The latter is treated as a BB ID. The step isn't that, it's more of a type of value profiling. We need to distinguish between the 2 when really looking for BB IDs (==increments), and handle appropriately `step`s. In particular, we need to know when to elide them because `select`s may get elided by function cloning, if the condition of the select is statically known. --- Full diff: https://github.com/llvm/llvm-project/pull/109185.diff 5 Files Affected: - (modified) llvm/include/llvm/Analysis/CtxProfAnalysis.h (+3) - (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+9) - (modified) llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp (+43-2) - (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+9-1) - (added) llvm/test/Analysis/CtxProfAnalysis/handle-select.ll (+76) ``diff diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index b3e64b26ee543c..0a5beb92fcbcc0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the instruction instrumenting a BB, or nullptr if not present. static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB); + + /// Get the step instrumentation associated with a `select` + static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 3df72983862d98..7517011395a7d6 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { return nullptr; } +InstrProfIncrementInstStep * +CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { + Instruction *Prev = &SI; + while ((Prev = Prev->getPrevNode())) +if (auto *Step = dyn_cast(Prev)) + return Step; + return nullptr; +} + template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor, diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 91f950e2ba4c3e..30bb251364fdef 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,6 +154,8 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } +uint64_t getCount() const { return *Count;} + bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); @@ -266,6 +268,21 @@ class ProfileAnnotator final { return HitExit; } + bool allNonColdSelectsHaveProfile() const { +for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { +for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { +if (!SI->getMetadata(LLVMContext::MD_prof)) { + return false; +} + } +} + } +} +return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl &Counters, InstrProfSummaryBuilder &PB) @@ -324,12 +341,33 @@ class ProfileAnnotator final { PB.addEntryCount(Counters[0]); for (auto &BB : F) { + const auto &BBInfo = getBBInfo(BB); + if (BBInfo.getCount() > 0) { +for (auto &I : BB) { + if (auto *SI = dyn_cast(&I)) { +if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { + auto Index = Step->getIndex()->getZExtValue(); + assert(Index < Counters.size() && +"The index of the step instruction must be inside the " +"counters vector by " +"construction - tripping this assertion indicates a bug in " +"how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + auto TrueCount = Counters[Index]; + auto FalseCount = + (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); +} + } +} + } if (succ_size(&BB) < 2) continue; auto *Term = BB.getTerminator(); SmallVector EdgeCounts(Term->getNumSuccessors(), 0); uint64_t MaxCount = 0; - const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdge
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Mircea Trofin (mtrofin) Changes Reinforcing properties ensured at instrumentation time. --- Full diff: https://github.com/llvm/llvm-project/pull/109184.diff 1 Files Affected: - (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+9-2) ``diff diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index c29709b613410e..3df72983862d98 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, } InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { - for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) + if (!InstrProfCallsite::canInstrumentCallsite(CB)) +return nullptr; + for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) { if (auto *IPC = dyn_cast(Prev)) return IPC; +assert(!isa(Prev) && + "didn't expect to find another call, that's not the callsite " + "instrumentation, before an instrumentable callsite"); + } return nullptr; } InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { for (auto &I : BB) if (auto *Incr = dyn_cast(&I)) - return Incr; + if (!isa(&I)) +return Incr; return nullptr; } `` https://github.com/llvm/llvm-project/pull/109184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/109184 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` (PR #109185)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 987562aab1c409b62b1a4c5d6d8566ad812b8313 09642a4889da1d0e10f54b17b84e32dae5c8557e --extensions cpp,h -- llvm/include/llvm/Analysis/CtxProfAnalysis.h llvm/lib/Analysis/CtxProfAnalysis.cpp llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp llvm/lib/Transforms/Utils/InlineFunction.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 30bb251364..3a3c47e90a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,7 +154,7 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } -uint64_t getCount() const { return *Count;} +uint64_t getCount() const { return *Count; } bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { @@ -347,11 +347,12 @@ public: if (auto *SI = dyn_cast(&I)) { if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { auto Index = Step->getIndex()->getZExtValue(); - assert(Index < Counters.size() && -"The index of the step instruction must be inside the " -"counters vector by " -"construction - tripping this assertion indicates a bug in " -"how the contextual profile is managed by IPO transforms"); + assert( + Index < Counters.size() && + "The index of the step instruction must be inside the " + "counters vector by " + "construction - tripping this assertion indicates a bug in " + "how the contextual profile is managed by IPO transforms"); auto TotalCount = BBInfo.getCount(); auto TrueCount = Counters[Index]; auto FalseCount = `` https://github.com/llvm/llvm-project/pull/109185 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109184 >From 152a2a965e4c500f207b960293578c3715ffd903 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:03:30 -0700 Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index c29709b613410e..3df72983862d98 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, } InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { - for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) + if (!InstrProfCallsite::canInstrumentCallsite(CB)) +return nullptr; + for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) { if (auto *IPC = dyn_cast(Prev)) return IPC; +assert(!isa(Prev) && + "didn't expect to find another call, that's not the callsite " + "instrumentation, before an instrumentable callsite"); + } return nullptr; } InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { for (auto &I : BB) if (auto *Incr = dyn_cast(&I)) - return Incr; + if (!isa(&I)) +return Incr; return nullptr; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : + Parent->getChildren()) { +if (Child.Guid == GUID) { + if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) +Cur = &Child; + break; +} + } +} +if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash) wlei-llvm wrote: I see, please add comments to explain this is the case for the stale profile, we don't match the probes whose function checksum is mismatched. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109203 >From bcac23616ef0b321e4e7c48e4f08067241807ea7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Sep 2024 14:18:23 -0700 Subject: [PATCH] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 - .../transform-to-local.ll | 13 +++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea4..644effab9414ba 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; -if (ConvertToLocal) +if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) -return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf +; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c6..d0b96daf3bf3b1 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,11 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, nothing gets converted. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +17,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +33,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( +const MCPseudoProbeDecoder &Decoder, +const yaml::bolt::PseudoProbeDesc &YamlPD, +const std::vector &YamlInlineTree, +llvm::function_ref +GetRootCallback) { + + // Match inline tree nodes by GUID, checksum, parent, and call site. + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + uint32_t Index = 0; + for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint64_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +assert(GUIDIdx < YamlPD.GUID.size()); +assert(GUIDIdx < YamlPD.GUIDHashIdx.size()); +uint64_t GUID = YamlPD.GUID[GUIDIdx]; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +assert(HashIdx < YamlPD.Hash.size()); +uint64_t Hash = YamlPD.Hash[HashIdx]; +uint32_t InlineTreeNodeId = Index++; +ParentId += InlineTreeNode.ParentIndexDelta; +uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; +const MCDecodedPseudoProbeInlineTree *Cur = nullptr; +if (!InlineTreeNodeId) { + Cur = GetRootCallback(GUID); +} else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : wlei-llvm wrote: I see, I just remembered it has been changed to a vector structure not a map, then that needs the pre-sorting things. OK, I'm good with the current version assuming the perf here is fine. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/109185 >From 7e92883f5ac815136d891b8c15728af1e7086df7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 17 Sep 2024 22:00:42 -0700 Subject: [PATCH] [ctx_prof] Handle `select` --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 3 + llvm/lib/Analysis/CtxProfAnalysis.cpp | 9 +++ .../Instrumentation/PGOCtxProfFlattening.cpp | 46 ++- llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++- .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++ 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index b3e64b26ee543c..0a5beb92fcbcc0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the instruction instrumenting a BB, or nullptr if not present. static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB); + + /// Get the step instrumentation associated with a `select` + static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 3df72983862d98..7517011395a7d6 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { return nullptr; } +InstrProfIncrementInstStep * +CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { + Instruction *Prev = &SI; + while ((Prev = Prev->getPrevNode())) +if (auto *Step = dyn_cast(Prev)) + return Step; + return nullptr; +} + template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor, diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 91f950e2ba4c3e..3a3c47e90a168a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -154,6 +154,8 @@ class ProfileAnnotator final { bool hasCount() const { return Count.has_value(); } +uint64_t getCount() const { return *Count; } + bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); @@ -266,6 +268,21 @@ class ProfileAnnotator final { return HitExit; } + bool allNonColdSelectsHaveProfile() const { +for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { +for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { +if (!SI->getMetadata(LLVMContext::MD_prof)) { + return false; +} + } +} + } +} +return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl &Counters, InstrProfSummaryBuilder &PB) @@ -324,12 +341,34 @@ class ProfileAnnotator final { PB.addEntryCount(Counters[0]); for (auto &BB : F) { + const auto &BBInfo = getBBInfo(BB); + if (BBInfo.getCount() > 0) { +for (auto &I : BB) { + if (auto *SI = dyn_cast(&I)) { +if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { + auto Index = Step->getIndex()->getZExtValue(); + assert( + Index < Counters.size() && + "The index of the step instruction must be inside the " + "counters vector by " + "construction - tripping this assertion indicates a bug in " + "how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + auto TrueCount = Counters[Index]; + auto FalseCount = + (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); +} + } +} + } if (succ_size(&BB) < 2) continue; auto *Term = BB.getTerminator(); SmallVector EdgeCounts(Term->getNumSuccessors(), 0); uint64_t MaxCount = 0; - const auto &BBInfo = getBBInfo(BB); + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; ++SuccIdx) { uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); @@ -343,12 +382,15 @@ class ProfileAnnotator final { setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); } assert(allCountersAreAssigned() && - "Expected all counters have been ass
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
nikic wrote: Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs? https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits