[llvm-branch-commits] [llvm] [ADT] Use range-based helper functions in SmallSet (PR #108585)

2024-09-18 Thread David Blaikie via llvm-branch-commits


@@ -234,19 +225,12 @@ class SmallSet {
   /// Check if the SmallSet contains the given element.
   bool contains(const T &V) const {
 if (isSmall())
-  return vfind(V) != Vector.end();
-return Set.find(V) != Set.end();
+  return llvm::is_contained(Vector, V);
+return llvm::is_contained(Set, V);

dwblaikie wrote:

FWIW, I think is_contained is more legible than find!=end and seems fine to use 
here & I agree with the comment on is_contained suggesting it for uses like 
this.

https://github.com/llvm/llvm-project/pull/108585
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108713

>From fb2ed73b44facf865312d7efe32053718fcd6458 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH] [Attributor] Use more appropriate approach to check flat
 address space

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  7 ++---
 .../Transforms/IPO/AttributorAttributes.cpp   | 26 ++-
 .../CodeGen/AMDGPU/simple-indirect-call.ll|  5 ++--
 .../Attributor/address_space_info.ll  |  4 ++-
 .../Attributor/memory_locations_gpu.ll|  8 +++---
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +--
 .../reduced/openmp_opt_constant_type_crash.ll |  1 -
 .../Transforms/Attributor/value-simplify.ll   |  3 +--
 .../Transforms/OpenMP/nested_parallelism.ll   |  4 +--
 .../OpenMP/spmdization_kernel_env_dep.ll  | 25 +-
 10 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h 
b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..59bae547522ea7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6267,11 +6267,12 @@ struct AAAddressSpace : public 
StateWrapper {
 return (AA->getIdAddr() == &ID);
   }
 
-  // No address space which indicates the associated value is dead.
-  static const uint32_t NoAddressSpace = ~0U;
-
   /// Unique ID (due to the unique address)
   static const char ID;
+
+protected:
+  // Invalid address space which indicates the associated value is dead.
+  static const uint32_t InvalidAddressSpace = ~0U;
 };
 
 struct AAAllocationInfo : public StateWrapper 
{
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..b2888f556d7d0d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
 assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
-if (getAssociatedType()->getPointerAddressSpace())
+
+if (!A.getInfoCache().getDL().getFlatAddressSpace().has_value()) {
+  indicatePessimisticFixpoint();
+  return;
+}
+
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value();
+unsigned AS = getAssociatedType()->getPointerAddressSpace();
+if (AS != FlatAS) {
+  [[maybe_unused]] bool R = takeAddressSpace(AS);
+  assert(R && "The take should happen");
   indicateOptimisticFixpoint();
+}
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-if (getAddressSpace() == NoAddressSpace ||
+if (getAddressSpace() == InvalidAddressSpace ||
 getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+Value *AssociatedValue = &getAssociatedValue();
+Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
 PointerType *NewPtrTy =
 PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
 bool UseOriginalValue =
@@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 if (!isValidState())
   return "addrspace()";
 return "addrspace(" +
-   (AssumedAddressSpace == NoAddressSpace
+   (AssumedAddressSpace == InvalidAddressSpace
 ? "none"
 : std::to_string(AssumedAddressSpace)) +
")";
   }
 
 private:
-  uint32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = InvalidAddressSpace;
 
   bool takeAddressSpace(uint32_t AS) {
-if (AssumedAddressSpace == NoAddressSpace) {
+if (AssumedAddressSpace == InvalidAddressSpace) {
   AssumedAddressSpace = AS;
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll 
b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index cca7b49996ff3b..971161a1c59855 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -34,8 +34,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_GCN-NEXT:[[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:store ptr @indirect, ptr addrspace(5) [[FPTR]], 
align 8
-; ATTRIBUTOR_GCN-NEXT:[[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], 
align 8
+; ATTRIBUTOR_GCN-NEXT:[[FPTR_CAST:%.*]] = a

[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin created 
https://github.com/llvm/llvm-project/pull/109203

None

>From 886b8947ae2dfa496a56ff0251f6fe22dca5060e Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)

2024-09-18 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/108923


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)

2024-09-18 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/108921


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)

2024-09-18 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/108923


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108787

>From e6fb6d58ce5cd8d02d769e1a4eb9664449fb785b Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 15 Sep 2024 23:06:14 -0400
Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace`

This has been moved to `DataLayout`.
---
 .../llvm/Analysis/TargetTransformInfo.h   | 21 ---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 --
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  5 -
 llvm/lib/Analysis/TargetTransformInfo.cpp |  4 
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  8 ---
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |  4 
 .../Transforms/Scalar/InferAddressSpaces.cpp  |  5 +++--
 .../AMDGPU/noop-ptrint-pair.ll|  2 +-
 .../old-pass-regressions-inseltpoison.ll  |  2 +-
 .../AMDGPU/old-pass-regressions.ll|  2 +-
 .../InferAddressSpaces/AMDGPU/ptrmask.ll  |  2 +-
 11 files changed, 7 insertions(+), 50 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3411163549de2f..42d3ee328fc1a5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -451,24 +451,6 @@ class TargetTransformInfo {
   /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
   bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
 
-  /// Returns the address space ID for a target's 'flat' address space. Note
-  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
-  /// refers to as the generic address space. The flat address space is a
-  /// generic address space that can be used access multiple segments of memory
-  /// with different address spaces. Access of a memory location through a
-  /// pointer with this address space is expected to be legal but slower
-  /// compared to the same memory location accessed through a pointer with a
-  /// different address space.
-  //
-  /// This is for targets with different pointer representations which can
-  /// be converted with the addrspacecast instruction. If a pointer is 
converted
-  /// to this address space, optimizations should attempt to replace the access
-  /// with the source address space.
-  ///
-  /// \returns ~0u if the target does not have such a flat address space to
-  /// optimize away.
-  unsigned getFlatAddressSpace() const;
-
   /// Return any intrinsic address operand indexes which may be rewritten if
   /// they use a flat address space pointer.
   ///
@@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept {
   virtual bool isAlwaysUniform(const Value *V) = 0;
   virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
   virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
-  virtual unsigned getFlatAddressSpace() = 0;
   virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const = 0;
   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
@@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public 
TargetTransformInfo::Concept {
 return Impl.addrspacesMayAlias(AS0, AS1);
   }
 
-  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); 
}
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const override {
 return Impl.collectFlatAddressOperands(OpIndexes, IID);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h 
b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2819af30cd1704..3902a19a90755a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -115,8 +115,6 @@ class TargetTransformInfoImplBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() const { return -1; }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 2f2a6a09ffc44d..ce585dfc7a2f39 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -292,11 +292,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() {
-// Return an invalid address space.
-return -1;
-  }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp 
b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 67b626f300a101..42e3f277c1e291 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -305,10 +30

[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108258

>From 4f56200b16374ae69b717847872d53a80f505869 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 60 ++-
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll   | 33 ++
 2 files changed, 79 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b2888f556d7d0d..22f983af85af3e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12587,16 +12587,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+assert(A.getInfoCache().getDL().getFlatAddressSpace().has_value());
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value();
 uint32_t OldAddressSpace = AssumedAddressSpace;
-auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
-DepClassTy::REQUIRED);
-auto Pred = [&](Value &Obj) {
+
+auto CheckAddressSpace = [&](Value &Obj) {
   if (isa(&Obj))
 return true;
+  // If an argument in flat address space only has addrspace cast uses, and
+  // those casts are same, then we take the dst addrspace.
+  if (auto *Arg = dyn_cast(&Obj)) {
+if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+  unsigned CastAddrSpace = FlatAS;
+  for (auto *U : Arg->users()) {
+auto *ASCI = dyn_cast(U);
+if (!ASCI)
+  return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+if (CastAddrSpace != FlatAS &&
+CastAddrSpace != ASCI->getDestAddressSpace())
+  return false;
+CastAddrSpace = ASCI->getDestAddressSpace();
+  }
+  if (CastAddrSpace != FlatAS)
+return takeAddressSpace(CastAddrSpace);
+}
+  }
   return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
 };
 
-if (!AUO->forallUnderlyingObjects(Pred))
+auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
+DepClassTy::REQUIRED);
+if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
   return indicatePessimisticFixpoint();
 
 return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12605,17 +12626,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-if (getAddressSpace() == InvalidAddressSpace ||
-getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+unsigned NewAS = getAddressSpace();
+
+if (NewAS == InvalidAddressSpace ||
+NewAS == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value();
+
 Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
 
 PointerType *NewPtrTy =
-PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+PointerType::get(getAssociatedType()->getContext(), NewAS);
 bool UseOriginalValue =
-OriginalValue->getType()->getPointerAddressSpace() == 
getAddressSpace();
+OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
 bool Changed = false;
 
@@ -12675,12 +12700,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 return AssumedAddressSpace == AS;
   }
 
-  static Value *peelAddrspacecast(Value *V) {
-if (auto *I = dyn_cast(V))
-  return peelAddrspacecast(I->getPointerOperand());
+  static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
+if (auto *I = dyn_cast(V)) {
+  assert(I->getSrcAddressSpace() != FlatAS &&
+ "there should not be flat AS -> non-flat AS");
+  return I->getPointerOperand();
+}
 if (auto *C = dyn_cast(V))
-  if (C->getOpcode() == Instruction::AddrSpaceCast)
-return peelAddrspacecast(C->getOperand(0));
+  if (C->getOpcode() == Instruction::AddrSpaceCast) {
+assert(C->getOperand(0)->getType()

[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/109184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval created 
https://github.com/llvm/llvm-project/pull/109214

Convert `cuf.allocate` and `cuf.deallocate` to the runtime entry points added 
in #109213

>From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 18 Sep 2024 15:42:19 -0700
Subject: [PATCH] [flang][cuda] Convert module allocation/deallocation to
 runtime calls

---
 .../Optimizer/Transforms/CufOpConversion.cpp  | 63 ---
 flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++-
 2 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp 
b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 2dc37f4df3aeec..e61105491ca69f 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/CodeGen/TypeConverter.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
@@ -14,6 +15,10 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+<<< HEAD
+===
+#include "flang/Runtime/CUDA/allocatable.h"
+>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate 
device module variable)
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
@@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda;
 namespace {
 
 template 
-static bool needDoubleDescriptor(OpTy op) {
+static bool isPinned(OpTy op) {
+  if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+return true;
+  return false;
+}
+
+template 
+static bool hasDoubleDescriptors(OpTy op) {
   if (auto declareOp =
   mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) 
{
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) {
  op.getBox().getDefiningOp())) {
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -108,17 +118,22 @@ struct CufAllocateOpConversion
 if (op.getPinned())
   return mlir::failure();
 
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
+auto mod = op->getParentOfType();
+fir::FirOpBuilder builder(rewriter, mod);
+mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Allocation for module variable are done with custom runtime entry 
point
+  // so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
 
 // Allocation for local descriptor falls back on the standard runtime
 // AllocatableAllocate as the dedicated allocator is set in the descriptor
 // before the call.
-auto mod = op->template getParentOfType();
-fir::FirOpBuilder builder(rewriter, mod);
-mlir::Location loc = op.getLoc();
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
builder);
@@ -133,17 +148,23 @@ struct CufDeallocateOpConversion
   mlir::LogicalResult
   matchAndRewrite(cuf::DeallocateOp op,
   mlir::PatternRewriter &rewriter) const override {
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
 
-// Deallocation for local descriptor falls back on the standard runtime
-// AllocatableDeallocate as the dedicated deallocator is set in the
-// descriptor before the call.
 auto mod = op->getParentOfType();
 fir::FirOpBuilder builder(rewriter, mod);
 mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Deallocation for module variable are done with custom runtime entry
+  // point so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builde

[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)


Changes

Convert `cuf.allocate` and `cuf.deallocate` to the runtime entry points added 
in #109213

---
Full diff: https://github.com/llvm/llvm-project/pull/109214.diff


2 Files Affected:

- (modified) flang/lib/Optimizer/Transforms/CufOpConversion.cpp (+40-23) 
- (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+38-2) 


``diff
diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp 
b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 2dc37f4df3aeec..e61105491ca69f 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/CodeGen/TypeConverter.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
@@ -14,6 +15,10 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+<<< HEAD
+===
+#include "flang/Runtime/CUDA/allocatable.h"
+>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate 
device module variable)
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
@@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda;
 namespace {
 
 template 
-static bool needDoubleDescriptor(OpTy op) {
+static bool isPinned(OpTy op) {
+  if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+return true;
+  return false;
+}
+
+template 
+static bool hasDoubleDescriptors(OpTy op) {
   if (auto declareOp =
   mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) 
{
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) {
  op.getBox().getDefiningOp())) {
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -108,17 +118,22 @@ struct CufAllocateOpConversion
 if (op.getPinned())
   return mlir::failure();
 
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
+auto mod = op->getParentOfType();
+fir::FirOpBuilder builder(rewriter, mod);
+mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Allocation for module variable are done with custom runtime entry 
point
+  // so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
 
 // Allocation for local descriptor falls back on the standard runtime
 // AllocatableAllocate as the dedicated allocator is set in the descriptor
 // before the call.
-auto mod = op->template getParentOfType();
-fir::FirOpBuilder builder(rewriter, mod);
-mlir::Location loc = op.getLoc();
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
builder);
@@ -133,17 +148,23 @@ struct CufDeallocateOpConversion
   mlir::LogicalResult
   matchAndRewrite(cuf::DeallocateOp op,
   mlir::PatternRewriter &rewriter) const override {
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
 
-// Deallocation for local descriptor falls back on the standard runtime
-// AllocatableDeallocate as the dedicated deallocator is set in the
-// descriptor before the call.
 auto mod = op->getParentOfType();
 fir::FirOpBuilder builder(rewriter, mod);
 mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Deallocation for module variable are done with custom runtime entry
+  // point so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
+
+// Deallocation for local descriptor falls back on the standard runtime
+   

[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/109214

>From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 18 Sep 2024 15:42:19 -0700
Subject: [PATCH 1/3] [flang][cuda] Convert module allocation/deallocation to
 runtime calls

---
 .../Optimizer/Transforms/CufOpConversion.cpp  | 63 ---
 flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++-
 2 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp 
b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 2dc37f4df3aeec..e61105491ca69f 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/CodeGen/TypeConverter.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
@@ -14,6 +15,10 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+<<< HEAD
+===
+#include "flang/Runtime/CUDA/allocatable.h"
+>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate 
device module variable)
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
@@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda;
 namespace {
 
 template 
-static bool needDoubleDescriptor(OpTy op) {
+static bool isPinned(OpTy op) {
+  if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+return true;
+  return false;
+}
+
+template 
+static bool hasDoubleDescriptors(OpTy op) {
   if (auto declareOp =
   mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) 
{
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) {
  op.getBox().getDefiningOp())) {
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -108,17 +118,22 @@ struct CufAllocateOpConversion
 if (op.getPinned())
   return mlir::failure();
 
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
+auto mod = op->getParentOfType();
+fir::FirOpBuilder builder(rewriter, mod);
+mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Allocation for module variable are done with custom runtime entry 
point
+  // so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
 
 // Allocation for local descriptor falls back on the standard runtime
 // AllocatableAllocate as the dedicated allocator is set in the descriptor
 // before the call.
-auto mod = op->template getParentOfType();
-fir::FirOpBuilder builder(rewriter, mod);
-mlir::Location loc = op.getLoc();
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
builder);
@@ -133,17 +148,23 @@ struct CufDeallocateOpConversion
   mlir::LogicalResult
   matchAndRewrite(cuf::DeallocateOp op,
   mlir::PatternRewriter &rewriter) const override {
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
 
-// Deallocation for local descriptor falls back on the standard runtime
-// AllocatableDeallocate as the dedicated deallocator is set in the
-// descriptor before the call.
 auto mod = op->getParentOfType();
 fir::FirOpBuilder builder(rewriter, mod);
 mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Deallocation for module variable are done with custom runtime entry
+  // point so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
+
+// Deallocation fo

[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/109214

>From 0cf3e882111cf343be5e074ea1cf29893ab8ceb4 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 18 Sep 2024 15:42:19 -0700
Subject: [PATCH 1/2] [flang][cuda] Convert module allocation/deallocation to
 runtime calls

---
 .../Optimizer/Transforms/CufOpConversion.cpp  | 63 ---
 flang/test/Fir/CUDA/cuda-allocate.fir | 40 +++-
 2 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp 
b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 2dc37f4df3aeec..e61105491ca69f 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/CodeGen/TypeConverter.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
@@ -14,6 +15,10 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+<<< HEAD
+===
+#include "flang/Runtime/CUDA/allocatable.h"
+>>> 8dcdb01d463a ([flang][cuda] Add function to allocate and deallocate 
device module variable)
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
@@ -35,13 +40,19 @@ using namespace Fortran::runtime::cuda;
 namespace {
 
 template 
-static bool needDoubleDescriptor(OpTy op) {
+static bool isPinned(OpTy op) {
+  if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+return true;
+  return false;
+}
+
+template 
+static bool hasDoubleDescriptors(OpTy op) {
   if (auto declareOp =
   mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) 
{
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -49,8 +60,7 @@ static bool needDoubleDescriptor(OpTy op) {
  op.getBox().getDefiningOp())) {
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -108,17 +118,22 @@ struct CufAllocateOpConversion
 if (op.getPinned())
   return mlir::failure();
 
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
+auto mod = op->getParentOfType();
+fir::FirOpBuilder builder(rewriter, mod);
+mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Allocation for module variable are done with custom runtime entry 
point
+  // so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
 
 // Allocation for local descriptor falls back on the standard runtime
 // AllocatableAllocate as the dedicated allocator is set in the descriptor
 // before the call.
-auto mod = op->template getParentOfType();
-fir::FirOpBuilder builder(rewriter, mod);
-mlir::Location loc = op.getLoc();
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
builder);
@@ -133,17 +148,23 @@ struct CufDeallocateOpConversion
   mlir::LogicalResult
   matchAndRewrite(cuf::DeallocateOp op,
   mlir::PatternRewriter &rewriter) const override {
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
 
-// Deallocation for local descriptor falls back on the standard runtime
-// AllocatableDeallocate as the dedicated deallocator is set in the
-// descriptor before the call.
 auto mod = op->getParentOfType();
 fir::FirOpBuilder builder(rewriter, mod);
 mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Deallocation for module variable are done with custom runtime entry
+  // point so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
+
+// Deallocation fo

[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/109203
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Mircea Trofin (mtrofin)


Changes

For the modules containing context roots, the way IPO happens will potentially 
result in imported functions that are differently specialized (even if 
themselves not inlined) than their originals. So we want to convert them to 
local rather than elide them.

Eventually we'd perform this as a ThinLTO directive.

---
Full diff: https://github.com/llvm/llvm-project/pull/109203.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/IPO/ElimAvailExtern.cpp (+8-5) 
- (modified) 
llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll (+11-2) 


``diff
diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

``




https://github.com/llvm/llvm-project/pull/109203
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin ready_for_review 
https://github.com/llvm/llvm-project/pull/109203
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Slava Zakharin via llvm-branch-commits

https://github.com/vzakhari approved this pull request.

Great!

https://github.com/llvm/llvm-project/pull/109214
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval updated 
https://github.com/llvm/llvm-project/pull/109214

>From be4731f339d6fd9b45cd7cc93e3dd8ff83e80576 Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Wed, 18 Sep 2024 15:42:19 -0700
Subject: [PATCH] [flang][cuda] Convert module allocation/deallocation to
 runtime calls

---
 .../Optimizer/Transforms/CufOpConversion.cpp  | 59 +++
 flang/test/Fir/CUDA/cuda-allocate.fir | 40 -
 2 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp 
b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 2dc37f4df3aeec..ac796e83b07078 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -14,6 +14,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Runtime/CUDA/allocatable.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
@@ -35,13 +36,19 @@ using namespace Fortran::runtime::cuda;
 namespace {
 
 template 
-static bool needDoubleDescriptor(OpTy op) {
+static bool isPinned(OpTy op) {
+  if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
+return true;
+  return false;
+}
+
+template 
+static bool hasDoubleDescriptors(OpTy op) {
   if (auto declareOp =
   mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) 
{
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -49,8 +56,7 @@ static bool needDoubleDescriptor(OpTy op) {
  op.getBox().getDefiningOp())) {
 if (mlir::isa_and_nonnull(
 declareOp.getMemref().getDefiningOp())) {
-  if (declareOp.getDataAttr() &&
-  *declareOp.getDataAttr() == cuf::DataAttribute::Pinned)
+  if (isPinned(declareOp))
 return false;
   return true;
 }
@@ -108,17 +114,22 @@ struct CufAllocateOpConversion
 if (op.getPinned())
   return mlir::failure();
 
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
+auto mod = op->getParentOfType();
+fir::FirOpBuilder builder(rewriter, mod);
+mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Allocation for module variable are done with custom runtime entry 
point
+  // so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
 
 // Allocation for local descriptor falls back on the standard runtime
 // AllocatableAllocate as the dedicated allocator is set in the descriptor
 // before the call.
-auto mod = op->template getParentOfType();
-fir::FirOpBuilder builder(rewriter, mod);
-mlir::Location loc = op.getLoc();
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
builder);
@@ -133,17 +144,23 @@ struct CufDeallocateOpConversion
   mlir::LogicalResult
   matchAndRewrite(cuf::DeallocateOp op,
   mlir::PatternRewriter &rewriter) const override {
-// TODO: Allocation of module variable will need more work as the 
descriptor
-// will be duplicated and needs to be synced after allocation.
-if (needDoubleDescriptor(op))
-  return mlir::failure();
 
-// Deallocation for local descriptor falls back on the standard runtime
-// AllocatableDeallocate as the dedicated deallocator is set in the
-// descriptor before the call.
 auto mod = op->getParentOfType();
 fir::FirOpBuilder builder(rewriter, mod);
 mlir::Location loc = op.getLoc();
+
+if (hasDoubleDescriptors(op)) {
+  // Deallocation for module variable are done with custom runtime entry
+  // point so the descriptors can be synchronized.
+  mlir::func::FuncOp func =
+  fir::runtime::getRuntimeFunc(
+  loc, builder);
+  return convertOpToCall(op, rewriter, func);
+}
+
+// Deallocation for local descriptor falls back on the standard runtime
+// AllocatableDeallocate as the dedicated deallocator is set in the
+// descriptor before the call.
 mlir::func::FuncOp func =
 fir::runtime::getRuntimeFunc(loc,
  builder);
@@ -448,10 +465,6 @@ class CufOpConversion : public 
fir::impl::CufOpConversionBase {
   }
   return true;
 });
-target.addDynamicallyLe

[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)

2024-09-18 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/108921
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][sanitizer] Move InitTlsSize (PR #108922)

2024-09-18 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/108922
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [sanitizer] Switch from lazy `ThreadDescriptorSize` (PR #108923)

2024-09-18 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/108923
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][sanitizer] Move `InitTlsSize` into `InitializePlatformEarly` (PR #108921)

2024-09-18 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/108921


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Amir Ayupov via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :

aaupov wrote:

Sure. But actually can we rely on the fact that children are ordered by their 
call site probe id? That would make binary search possible.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Amir Ayupov via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :
+   Parent->getChildren()) {
+if (Child.Guid == GUID) {
+  if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
+Cur = &Child;
+  break;
+}
+  }
+}
+if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash)

aaupov wrote:

`Hash` comes from YAML, we compare it against binary checksum.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/109185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Amir Ayupov via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());

aaupov wrote:

I'll drop these assertions as they are done by operator[] anyway.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin created 
https://github.com/llvm/llvm-project/pull/109185

None

>From 09642a4889da1d0e10f54b17b84e32dae5c8557e Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:00:42 -0700
Subject: [PATCH] [ctx_prof] Handle `select`

---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  3 +
 llvm/lib/Analysis/CtxProfAnalysis.cpp |  9 +++
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 45 ++-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 10 ++-
 .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++
 5 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public 
AnalysisInfoMixin {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst 
*CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+if (auto *Step = dyn_cast(Prev))
+  return Step;
+  return nullptr;
+}
+
 template 
 static void preorderVisit(ProfilesTy &Profiles,
   function_ref Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..30bb251364fdef 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
+uint64_t getCount() const { return *Count;}
+
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
 setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
 return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+for (const auto &BB : F) {
+  if (getBBInfo(BB).getCount() > 0) {
+for (const auto &I : BB) {
+  if (const auto *SI = dyn_cast(&I)) {
+if (!SI->getMetadata(LLVMContext::MD_prof)) {
+  return false;
+}
+  }
+}
+  }
+}
+return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,33 @@ class ProfileAnnotator final {
 PB.addEntryCount(Counters[0]);
 
 for (auto &BB : F) {
+  const auto &BBInfo = getBBInfo(BB);
+  if (BBInfo.getCount() > 0) {
+for (auto &I : BB) {
+  if (auto *SI = dyn_cast(&I)) {
+if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+  auto Index = Step->getIndex()->getZExtValue();
+  assert(Index < Counters.size() &&
+"The index of the step instruction must be inside the "
+"counters vector by "
+"construction - tripping this assertion indicates a bug in 
"
+"how the contextual profile is managed by IPO transforms");
+  auto TotalCount = BBInfo.getCount();
+  auto TrueCount = Counters[Index];
+  auto FalseCount =
+  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+  setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+  std::max(TrueCount, FalseCount));
+}
+  }
+}
+  }
   if (succ_size(&BB) < 2)
 continue;
   auto *Term = BB.getTerminator();
   SmallVector EdgeCounts(Term->getNumSuccessors(), 0);
   uint64_t MaxCount = 0;
-  const auto &BBInfo = getBBInfo(BB);
+
   for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < 
Size;
++SuccIdx) {
 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +381,15 @@ class ProfileAnnotator final {
 setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
 }
 assert(allCountersAreAssigned() &&
-   "Expected all counters have been assigned.

[llvm-branch-commits] [llvm] [ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin created 
https://github.com/llvm/llvm-project/pull/109184

None

>From 987562aab1c409b62b1a4c5d6d8566ad812b8313 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:03:30 -0700
Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for
 un-instrumentable callsites

---
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index c29709b613410e..3df72983862d98 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module 
&M,
 }
 
 InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
-  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode())
+  if (!InstrProfCallsite::canInstrumentCallsite(CB))
+return nullptr;
+  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
 if (auto *IPC = dyn_cast(Prev))
   return IPC;
+assert(!isa(Prev) &&
+   "didn't expect to find another call, that's not the callsite "
+   "instrumentation, before an instrumentable callsite");
+  }
   return nullptr;
 }
 
 InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   for (auto &I : BB)
 if (auto *Incr = dyn_cast(&I))
-  return Incr;
+  if (!isa(&I))
+return Incr;
   return nullptr;
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][cuda] Convert module allocation/deallocation to runtime calls (PR #109214)

2024-09-18 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

https://github.com/clementval closed 
https://github.com/llvm/llvm-project/pull/109214
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109170

Backport 7153a4bbf6d46e58ce32d59220515c5ab9f35691

Requested by: @owenca

>From 2cc6c7bde964931be2c9a6691da944678bcb31a8 Mon Sep 17 00:00:00 2001
From: Owen Pan 
Date: Tue, 17 Sep 2024 21:16:20 -0700
Subject: [PATCH] [clang-format] Reimplement InsertNewlineAtEOF (#108513)

Fixes #108333.

(cherry picked from commit 7153a4bbf6d46e58ce32d59220515c5ab9f35691)
---
 clang/lib/Format/FormatTokenLexer.cpp | 7 +++
 clang/lib/Format/TokenAnnotator.cpp   | 5 -
 clang/unittests/Format/FormatTest.cpp | 6 ++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index e21b5a882b7773..63949b2e26bdc1 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -100,6 +100,13 @@ ArrayRef FormatTokenLexer::lex() {
 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
   FirstInLineIndex = Tokens.size() - 1;
   } while (Tokens.back()->isNot(tok::eof));
+  if (Style.InsertNewlineAtEOF) {
+auto &TokEOF = *Tokens.back();
+if (TokEOF.NewlinesBefore == 0) {
+  TokEOF.NewlinesBefore = 1;
+  TokEOF.OriginalColumn = 0;
+}
+  }
   return Tokens;
 }
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index 3f00a28e62988a..4512e539cc7947 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
   auto *First = Line.First;
   First->SpacesRequiredBefore = 1;
   First->CanBreakBefore = First->MustBreakBefore;
-
-  if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
-  Style.InsertNewlineAtEOF) {
-First->NewlinesBefore = 1;
-  }
 }
 
 // This function heuristically determines whether 'Current' starts the name of 
a
diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 29200b72d3d008..b7d8fc8ea72c6c 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) {
 
   verifyNoChange("int i;\n", Style);
   verifyFormat("int i;\n", "int i;", Style);
+
+  constexpr StringRef Code{"namespace {\n"
+   "int i;\n"
+   "} // namespace"};
+  verifyFormat(Code.str() + '\n', Code, Style,
+   {tooling::Range(19, 13)}); // line 3
 }
 
 TEST_F(FormatTest, KeepEmptyLinesAtEOF) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109170
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-format

Author: None (llvmbot)


Changes

Backport 7153a4bbf6d46e58ce32d59220515c5ab9f35691

Requested by: @owenca

---
Full diff: https://github.com/llvm/llvm-project/pull/109170.diff


3 Files Affected:

- (modified) clang/lib/Format/FormatTokenLexer.cpp (+7) 
- (modified) clang/lib/Format/TokenAnnotator.cpp (-5) 
- (modified) clang/unittests/Format/FormatTest.cpp (+6) 


``diff
diff --git a/clang/lib/Format/FormatTokenLexer.cpp 
b/clang/lib/Format/FormatTokenLexer.cpp
index e21b5a882b7773..63949b2e26bdc1 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -100,6 +100,13 @@ ArrayRef FormatTokenLexer::lex() {
 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
   FirstInLineIndex = Tokens.size() - 1;
   } while (Tokens.back()->isNot(tok::eof));
+  if (Style.InsertNewlineAtEOF) {
+auto &TokEOF = *Tokens.back();
+if (TokEOF.NewlinesBefore == 0) {
+  TokEOF.NewlinesBefore = 1;
+  TokEOF.OriginalColumn = 0;
+}
+  }
   return Tokens;
 }
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp 
b/clang/lib/Format/TokenAnnotator.cpp
index 3f00a28e62988a..4512e539cc7947 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
   auto *First = Line.First;
   First->SpacesRequiredBefore = 1;
   First->CanBreakBefore = First->MustBreakBefore;
-
-  if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
-  Style.InsertNewlineAtEOF) {
-First->NewlinesBefore = 1;
-  }
 }
 
 // This function heuristically determines whether 'Current' starts the name of 
a
diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 29200b72d3d008..b7d8fc8ea72c6c 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) {
 
   verifyNoChange("int i;\n", Style);
   verifyFormat("int i;\n", "int i;", Style);
+
+  constexpr StringRef Code{"namespace {\n"
+   "int i;\n"
+   "} // namespace"};
+  verifyFormat(Code.str() + '\n', Code, Style,
+   {tooling::Range(19, 13)}); // line 3
 }
 
 TEST_F(FormatTest, KeepEmptyLinesAtEOF) {

``




https://github.com/llvm/llvm-project/pull/109170
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:

@mydeveloperday What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/109170
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109185

>From dfde0036f6dd98f859c7c3984c4e44d6224d17f0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:00:42 -0700
Subject: [PATCH] [ctx_prof] Handle `select`

---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  3 +
 llvm/lib/Analysis/CtxProfAnalysis.cpp |  9 +++
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 46 ++-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 10 ++-
 .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++
 5 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public 
AnalysisInfoMixin {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst 
*CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+if (auto *Step = dyn_cast(Prev))
+  return Step;
+  return nullptr;
+}
+
 template 
 static void preorderVisit(ProfilesTy &Profiles,
   function_ref Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..3a3c47e90a168a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
+uint64_t getCount() const { return *Count; }
+
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
 setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
 return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+for (const auto &BB : F) {
+  if (getBBInfo(BB).getCount() > 0) {
+for (const auto &I : BB) {
+  if (const auto *SI = dyn_cast(&I)) {
+if (!SI->getMetadata(LLVMContext::MD_prof)) {
+  return false;
+}
+  }
+}
+  }
+}
+return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,34 @@ class ProfileAnnotator final {
 PB.addEntryCount(Counters[0]);
 
 for (auto &BB : F) {
+  const auto &BBInfo = getBBInfo(BB);
+  if (BBInfo.getCount() > 0) {
+for (auto &I : BB) {
+  if (auto *SI = dyn_cast(&I)) {
+if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+  auto Index = Step->getIndex()->getZExtValue();
+  assert(
+  Index < Counters.size() &&
+  "The index of the step instruction must be inside the "
+  "counters vector by "
+  "construction - tripping this assertion indicates a bug in "
+  "how the contextual profile is managed by IPO transforms");
+  auto TotalCount = BBInfo.getCount();
+  auto TrueCount = Counters[Index];
+  auto FalseCount =
+  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+  setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+  std::max(TrueCount, FalseCount));
+}
+  }
+}
+  }
   if (succ_size(&BB) < 2)
 continue;
   auto *Term = BB.getTerminator();
   SmallVector EdgeCounts(Term->getNumSuccessors(), 0);
   uint64_t MaxCount = 0;
-  const auto &BBInfo = getBBInfo(BB);
+
   for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < 
Size;
++SuccIdx) {
 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +382,15 @@ class ProfileAnnotator final {
 setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
 }
 assert(allCountersAreAssigned() &&
-   "Expected all counters have been ass

[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From 49a3d00864cf1850a8f5f1aff71b66603e2a8d8c Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From ccc89f9225f33508098c9a0c457f3f8d02a6a8e8 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From 3cafcfb2e786e48c53214c2767b3e72b415aa3bb Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From 24c376930f98887c88476fd6a41af0b5a452acc1 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

mtrofin wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/109184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

mtrofin wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/109185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

mtrofin wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#109203** https://app.graphite.dev/github/pr/llvm/llvm-project/109203?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#109185** https://app.graphite.dev/github/pr/llvm/llvm-project/109185?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109184** https://app.graphite.dev/github/pr/llvm/llvm-project/109184?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109183** https://app.graphite.dev/github/pr/llvm/llvm-project/109183?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/109203
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109185

>From b68a12f999df971ca42c8c68e4f3ac091034c47a Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:00:42 -0700
Subject: [PATCH] [ctx_prof] Handle `select`

---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  3 +
 llvm/lib/Analysis/CtxProfAnalysis.cpp |  9 +++
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 46 ++-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 10 ++-
 .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++
 5 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public 
AnalysisInfoMixin {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst 
*CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+if (auto *Step = dyn_cast(Prev))
+  return Step;
+  return nullptr;
+}
+
 template 
 static void preorderVisit(ProfilesTy &Profiles,
   function_ref Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..3a3c47e90a168a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
+uint64_t getCount() const { return *Count; }
+
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
 setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
 return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+for (const auto &BB : F) {
+  if (getBBInfo(BB).getCount() > 0) {
+for (const auto &I : BB) {
+  if (const auto *SI = dyn_cast(&I)) {
+if (!SI->getMetadata(LLVMContext::MD_prof)) {
+  return false;
+}
+  }
+}
+  }
+}
+return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,34 @@ class ProfileAnnotator final {
 PB.addEntryCount(Counters[0]);
 
 for (auto &BB : F) {
+  const auto &BBInfo = getBBInfo(BB);
+  if (BBInfo.getCount() > 0) {
+for (auto &I : BB) {
+  if (auto *SI = dyn_cast(&I)) {
+if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+  auto Index = Step->getIndex()->getZExtValue();
+  assert(
+  Index < Counters.size() &&
+  "The index of the step instruction must be inside the "
+  "counters vector by "
+  "construction - tripping this assertion indicates a bug in "
+  "how the contextual profile is managed by IPO transforms");
+  auto TotalCount = BBInfo.getCount();
+  auto TrueCount = Counters[Index];
+  auto FalseCount =
+  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+  setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+  std::max(TrueCount, FalseCount));
+}
+  }
+}
+  }
   if (succ_size(&BB) < 2)
 continue;
   auto *Term = BB.getTerminator();
   SmallVector EdgeCounts(Term->getNumSuccessors(), 0);
   uint64_t MaxCount = 0;
-  const auto &BBInfo = getBBInfo(BB);
+
   for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < 
Size;
++SuccIdx) {
 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +382,15 @@ class ProfileAnnotator final {
 setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
 }
 assert(allCountersAreAssigned() &&
-   "Expected all counters have been ass

[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109184

>From f654c77a3f1902b8dc7d9674d89f08f7fca0c85f Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:03:30 -0700
Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for
 un-instrumentable callsites

---
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index c29709b613410e..3df72983862d98 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module 
&M,
 }
 
 InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
-  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode())
+  if (!InstrProfCallsite::canInstrumentCallsite(CB))
+return nullptr;
+  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
 if (auto *IPC = dyn_cast(Prev))
   return IPC;
+assert(!isa(Prev) &&
+   "didn't expect to find another call, that's not the callsite "
+   "instrumentation, before an instrumentable callsite");
+  }
   return nullptr;
 }
 
 InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   for (auto &I : BB)
 if (auto *Incr = dyn_cast(&I))
-  return Incr;
+  if (!isa(&I))
+return Incr;
   return nullptr;
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109093

Backport 13280d99aec5b4f383a2f3d5c10ecb148a07384e

Requested by: @nikic

>From 3539b82c0ba5b412ed51e2031880c1999a401b3d Mon Sep 17 00:00:00 2001
From: YANG Xudong 
Date: Fri, 13 Sep 2024 08:49:54 +0800
Subject: [PATCH] [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16)
 type on loongarch (#107791)

For zig with LLVM 19.1.0rc4, we are seeing the following error when
bootstrapping a `loongarch64-linux-musl` target.

https://github.com/ziglang/zig-bootstrap/issues/164#issuecomment-2332357069

It seems that this issue is caused by `PromoteFloatResult` is not
handling FREEZE OP on loongarch.

Here is the reproduction of the error: https://godbolt.org/z/PPfvWjjG5

~~This patch adds the FREEZE OP handling with `PromoteFloatRes_UnaryOp`
and adds a test case.~~

This patch changes loongarch's way of floating point promotion to soft
promotion to avoid this problem.

See: loongarch's handling of `half`:
- https://github.com/llvm/llvm-project/issues/93894
- https://github.com/llvm/llvm-project/pull/94456

Also see: other float promotion FREEZE handling
-
https://github.com/llvm/llvm-project/commit/0019c2f194a5e1f4cd65c5284e204328cc40ab3d

(cherry picked from commit 13280d99aec5b4f383a2f3d5c10ecb148a07384e)
---
 .../Target/LoongArch/LoongArchISelLowering.h  |   2 +
 llvm/test/CodeGen/LoongArch/fp16-promote.ll   | 198 +++---
 2 files changed, 128 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e01..267837add575dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering {
   bool isEligibleForTailCallOptimization(
   CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
   const SmallVectorImpl &ArgLocs) const;
+
+  bool softPromoteHalfType() const override { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll 
b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b43a06ce..03965ac81f3763 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) 
nounwind {
 define half @test_fadd_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fadd_reg:
 ; LA32:   # %bb.0:
-; LA32-NEXT:addi.w $sp, $sp, -32
-; LA32-NEXT:st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:move $fp, $a0
+; LA32-NEXT:move $a0, $a1
+; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:fmov.s $fs0, $fa0
-; LA32-NEXT:fmov.s $fa0, $fa1
-; LA32-NEXT:bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:move $a0, $fp
 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:fmov.s $fs1, $fa0
-; LA32-NEXT:fmov.s $fa0, $fs0
+; LA32-NEXT:fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:addi.w $sp, $sp, 32
+; LA32-NEXT:fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
 ; LA32-NEXT:ret
 ;
 ; LA64-LABEL: test_fadd_reg:
 ; LA64:   # %bb.0:
 ; LA64-NEXT:addi.d $sp, $sp, -32
 ; LA64-NEXT:st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:move $fp, $a0
+; LA64-NEXT:move $a0, $a1
+; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:fmov.s $fs0, $fa0
-; LA64-NEXT:fmov.s $fa0, $fa1
-; LA64-NEXT:bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:move $a0, $fp
 ; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:fmov.s $fs1, $fa0
-; LA64-NEXT:fmov.s $fa0, $fs0
+; LA64-NEXT:fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folde

[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-loongarch

Author: None (llvmbot)


Changes

Backport 13280d99aec5b4f383a2f3d5c10ecb148a07384e

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/109093.diff


2 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2) 
- (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+126-72) 


``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e01..267837add575dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering {
   bool isEligibleForTailCallOptimization(
   CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
   const SmallVectorImpl &ArgLocs) const;
+
+  bool softPromoteHalfType() const override { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll 
b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b43a06ce..03965ac81f3763 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) 
nounwind {
 define half @test_fadd_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fadd_reg:
 ; LA32:   # %bb.0:
-; LA32-NEXT:addi.w $sp, $sp, -32
-; LA32-NEXT:st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:move $fp, $a0
+; LA32-NEXT:move $a0, $a1
+; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:fmov.s $fs0, $fa0
-; LA32-NEXT:fmov.s $fa0, $fa1
-; LA32-NEXT:bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:move $a0, $fp
 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:fmov.s $fs1, $fa0
-; LA32-NEXT:fmov.s $fa0, $fs0
+; LA32-NEXT:fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:addi.w $sp, $sp, 32
+; LA32-NEXT:fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
 ; LA32-NEXT:ret
 ;
 ; LA64-LABEL: test_fadd_reg:
 ; LA64:   # %bb.0:
 ; LA64-NEXT:addi.d $sp, $sp, -32
 ; LA64-NEXT:st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:move $fp, $a0
+; LA64-NEXT:move $a0, $a1
+; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:fmov.s $fs0, $fa0
-; LA64-NEXT:fmov.s $fa0, $fa1
-; LA64-NEXT:bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:move $a0, $fp
 ; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:fmov.s $fs1, $fa0
-; LA64-NEXT:fmov.s $fa0, $fs0
+; LA64-NEXT:fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT:fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-NEXT:ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-NEXT:addi.d $sp, $sp, 32
 ; LA64-NEXT:ret
@@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 ; LA32-NEXT:st.w $fp, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:st.w $s0, $sp, 20 # 4-byte Folded Spill
 ; LA32-NEXT:fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT:move $fp, $a1
-; LA32-NEXT:move $s0, $a0
-; LA32-NEXT:ld.hu $a0, $a0, 0
+; LA32-NEXT:move $fp, $a0
+; LA32-NEXT:ld.hu $s0, $a0, 0
+; LA32-NEXT:ld.hu $a0, $a1, 0
 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:fmov.s $fs0, $fa0
-; LA32-NEXT:ld.hu $a0, $fp, 0
+; LA32-NEXT:move $a0, $s0
 ; LA32-NEXT:bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT:fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:st.h $a0, $s0, 0
+; LA32-NEXT:st.h $a0, $fp, 0
 ; LA32-NEXT:fld.d $fs0, $sp, 8 # 8-byte Folded Reload
 ; LA32-NEXT:ld.w

[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:

@arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108787

>From ad6ff10e0612cce572b0d950b76b9d3c80c6dee3 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 15 Sep 2024 23:06:14 -0400
Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace`

This has been moved to `DataLayout`.
---
 .../llvm/Analysis/TargetTransformInfo.h   | 21 ---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 --
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  5 -
 llvm/lib/Analysis/TargetTransformInfo.cpp |  4 
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  8 ---
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |  4 
 .../Transforms/Scalar/InferAddressSpaces.cpp  |  2 +-
 7 files changed, 1 insertion(+), 45 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3411163549de2f..42d3ee328fc1a5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -451,24 +451,6 @@ class TargetTransformInfo {
   /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
   bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
 
-  /// Returns the address space ID for a target's 'flat' address space. Note
-  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
-  /// refers to as the generic address space. The flat address space is a
-  /// generic address space that can be used access multiple segments of memory
-  /// with different address spaces. Access of a memory location through a
-  /// pointer with this address space is expected to be legal but slower
-  /// compared to the same memory location accessed through a pointer with a
-  /// different address space.
-  //
-  /// This is for targets with different pointer representations which can
-  /// be converted with the addrspacecast instruction. If a pointer is 
converted
-  /// to this address space, optimizations should attempt to replace the access
-  /// with the source address space.
-  ///
-  /// \returns ~0u if the target does not have such a flat address space to
-  /// optimize away.
-  unsigned getFlatAddressSpace() const;
-
   /// Return any intrinsic address operand indexes which may be rewritten if
   /// they use a flat address space pointer.
   ///
@@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept {
   virtual bool isAlwaysUniform(const Value *V) = 0;
   virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
   virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
-  virtual unsigned getFlatAddressSpace() = 0;
   virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const = 0;
   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
@@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public 
TargetTransformInfo::Concept {
 return Impl.addrspacesMayAlias(AS0, AS1);
   }
 
-  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); 
}
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const override {
 return Impl.collectFlatAddressOperands(OpIndexes, IID);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h 
b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2819af30cd1704..3902a19a90755a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -115,8 +115,6 @@ class TargetTransformInfoImplBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() const { return -1; }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 2f2a6a09ffc44d..ce585dfc7a2f39 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -292,11 +292,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() {
-// Return an invalid address space.
-return -1;
-  }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp 
b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 67b626f300a101..42e3f277c1e291 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -305,10 +305,6 @@ bool 
llvm::TargetTransformInfo::addrspacesMayAlias(unsigned FromAS,
   return TTIImpl->addrspacesMayAlias(FromAS, ToAS);
 }
 
-unsigned TargetTransformInfo::getFlatAddressSpace() const {
-  return TTIImpl->getFlatAddr

[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108713

>From 02b52beaefab0b63c3c09fd1f84b907b89c0cf7b Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH] [Attributor] Use more appropriate approach to check flat
 address space

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  3 ---
 .../Transforms/IPO/AttributorAttributes.cpp   | 26 ++-
 .../Attributor/address_space_info.ll  |  4 ++-
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +--
 .../Transforms/Attributor/value-simplify.ll   |  3 +--
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h 
b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..ee7c930f8a26ac 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6267,9 +6267,6 @@ struct AAAddressSpace : public StateWrapper {
 return (AA->getIdAddr() == &ID);
   }
 
-  // No address space which indicates the associated value is dead.
-  static const uint32_t NoAddressSpace = ~0U;
-
   /// Unique ID (due to the unique address)
   static const char ID;
 };
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..642aeae3c12783 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
 assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
-if (getAssociatedType()->getPointerAddressSpace())
+
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+if (FlatAS == DataLayout::AS_INVALID) {
+  indicatePessimisticFixpoint();
+  return;
+}
+
+unsigned AS = getAssociatedType()->getPointerAddressSpace();
+if (AS != FlatAS) {
+  [[maybe_unused]] bool R = takeAddressSpace(AS);
+  assert(R && "The take should happen");
   indicateOptimisticFixpoint();
+}
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-if (getAddressSpace() == NoAddressSpace ||
+if (getAddressSpace() == DataLayout::AS_INVALID ||
 getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+Value *AssociatedValue = &getAssociatedValue();
+Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
 PointerType *NewPtrTy =
 PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
 bool UseOriginalValue =
@@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 if (!isValidState())
   return "addrspace()";
 return "addrspace(" +
-   (AssumedAddressSpace == NoAddressSpace
+   (AssumedAddressSpace == DataLayout::AS_INVALID
 ? "none"
 : std::to_string(AssumedAddressSpace)) +
")";
   }
 
 private:
-  uint32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = DataLayout::AS_INVALID;
 
   bool takeAddressSpace(uint32_t AS) {
-if (AssumedAddressSpace == NoAddressSpace) {
+if (AssumedAddressSpace == DataLayout::AS_INVALID) {
   AssumedAddressSpace = AS;
   return true;
 }
diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll 
b/llvm/test/Transforms/Attributor/address_space_info.ll
index 73dd93c55b819b..0c8b06aca4 100644
--- a/llvm/test/Transforms/Attributor/address_space_info.ll
+++ b/llvm/test/Transforms/Attributor/address_space_info.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals 
--prefix-filecheck-ir-name true
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor 
-attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck 
%s --check-prefixes=CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor 
-attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck 
%s --check-prefix=CHECK
+
+; REQUIRES: amdgpu-registered-target
 
 @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4
 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll 
b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 3401ddfdd7d758..de5f31e470edfc 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocap

[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108258

>From f85ea4140cb95e46e0c2341cb93090f755e44cef Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 62 ++-
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll   | 33 ++
 2 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 642aeae3c12783..b67d95bc31110d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12587,16 +12587,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+assert(FlatAS != DataLayout::AS_INVALID);
 uint32_t OldAddressSpace = AssumedAddressSpace;
-auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
-DepClassTy::REQUIRED);
-auto Pred = [&](Value &Obj) {
+
+auto CheckAddressSpace = [&](Value &Obj) {
   if (isa(&Obj))
 return true;
+  // If an argument in flat address space only has addrspace cast uses, and
+  // those casts are same, then we take the dst addrspace.
+  if (auto *Arg = dyn_cast(&Obj)) {
+if (FlatAS != DataLayout::AS_INVALID &&
+Arg->getType()->getPointerAddressSpace() == FlatAS) {
+  unsigned CastAddrSpace = FlatAS;
+  for (auto *U : Arg->users()) {
+auto *ASCI = dyn_cast(U);
+if (!ASCI)
+  return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+if (CastAddrSpace != FlatAS &&
+CastAddrSpace != ASCI->getDestAddressSpace())
+  return false;
+CastAddrSpace = ASCI->getDestAddressSpace();
+  }
+  if (CastAddrSpace != FlatAS)
+return takeAddressSpace(CastAddrSpace);
+}
+  }
   return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
 };
 
-if (!AUO->forallUnderlyingObjects(Pred))
+auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
+DepClassTy::REQUIRED);
+if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
   return indicatePessimisticFixpoint();
 
 return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12605,17 +12627,22 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-if (getAddressSpace() == DataLayout::AS_INVALID ||
-getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+unsigned NewAS = getAddressSpace();
+
+if (NewAS == DataLayout::AS_INVALID ||
+NewAS == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+assert(FlatAS != DataLayout::AS_INVALID);
+
 Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
 
 PointerType *NewPtrTy =
-PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+PointerType::get(getAssociatedType()->getContext(), NewAS);
 bool UseOriginalValue =
-OriginalValue->getType()->getPointerAddressSpace() == 
getAddressSpace();
+OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
 bool Changed = false;
 
@@ -12675,12 +12702,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 return AssumedAddressSpace == AS;
   }
 
-  static Value *peelAddrspacecast(Value *V) {
-if (auto *I = dyn_cast(V))
-  return peelAddrspacecast(I->getPointerOperand());
+  static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
+if (auto *I = dyn_cast(V)) {
+  assert(I->getSrcAddressSpace() != FlatAS &&
+ "there should not be flat AS -> non-flat AS");
+  return I->getPointerOperand();
+}
 if (auto *C = dyn_cast(V))
-  if (C->getOpcode() == Instruction::AddrSpaceCast)
-return peelAddrspacecast(C->getOperand(0));
+  if (C->getOpcode() == Instruction::

[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109110

Backport 1825cf28dc83113200b623ebcf063eea35ade79a

Requested by: @heiher

>From 4d8867e154dfd99ae6d64de1d97f895c4a44d317 Mon Sep 17 00:00:00 2001
From: hev 
Date: Sat, 14 Sep 2024 11:19:34 +0800
Subject: [PATCH] [LoongArch][sanitizer] Fix SC_ADDRERR_{RD,WR} missing in the
 musl environment (#108557)

Fixes #108550

(cherry picked from commit 1825cf28dc83113200b623ebcf063eea35ade79a)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 12 
 1 file changed, 12 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 648df0c4e5a760..b9b1f496df7c98 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() 
const {
 return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#  define SC_ADDRERR_RD (1 << 30)
+#endif
+#ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#  define SC_ADDRERR_WR (1 << 31)
+#endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
 return SignalContext::Read;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (llvmbot)


Changes

Backport 1825cf28dc83113200b623ebcf063eea35ade79a

Requested by: @heiher

---
Full diff: https://github.com/llvm/llvm-project/pull/109110.diff


1 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+12) 


``diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 648df0c4e5a760..b9b1f496df7c98 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() 
const {
 return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#  define SC_ADDRERR_RD (1 << 30)
+#endif
+#ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#  define SC_ADDRERR_WR (1 << 31)
+#endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
 return SignalContext::Read;

``




https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)

2024-09-18 Thread Tobias Hieta via llvm-branch-commits
Martin =?utf-8?q?Storsjö?= ,
Martin =?utf-8?q?Storsjö?= 
Message-ID:
In-Reply-To: 


https://github.com/tru updated https://github.com/llvm/llvm-project/pull/107146

>From a82122d63db137b7210e54b127cc2e45fc31fd69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Tue, 3 Sep 2024 22:45:54 +0300
Subject: [PATCH 1/3] [clang] Don't add DWARF debug info when assembling .s
 with clang-cl /Z7 (#106686)

This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca.

That commit changed the clang-cl options /Zi and /Z7 to be implemented
as aliases of -g rather than having separate handling.

This had the unintended effect, that when assembling .s files with
clang-cl, the /Z7 option (which implies using CodeView debug info) was
treated as a -g option, which causes `ClangAs::ConstructJob` to pick up
the option as part of `Args.getLastArg(options::OPT_g_Group)`, which
sets the `WantDebug` variable.

Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or
`-gcodeview` options have been set, and if not, we pick the default
debug format for the current toolchain. However, in `ClangAs`, if debug
info has been enabled, it always adds DWARF debug info.

Add similar logic in `ClangAs` - check if the user has explicitly
requested either DWARF or CodeView, otherwise look up the toolchain
default. If we (either implicitly or explicitly) should be producing
CodeView, don't enable the default `ClangAs` DWARF generation.

This fixes the issue, where assembling a single `.s` file with clang-cl,
with the /Z7 option, causes the file to contain some DWARF sections.
This causes the output executable to contain DWARF, in addition to the
separate intended main PDB file.

By having the output executable contain DWARF sections, LLDB only looks
at the (very little) DWARF info in the executable, rather than looking
for a separate standalone PDB file. This caused an issue with LLDB's
tests, https://github.com/llvm/llvm-project/issues/101710.

(cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 26 ++
 clang/test/Driver/debug-options-as.c  | 17 -
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 366b147a052bf2..8858c318aba7a1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const 
JobAction &JA,
 WantDebug = !A->getOption().matches(options::OPT_g0) &&
 !A->getOption().matches(options::OPT_ggdb0);
 
+  // If a -gdwarf argument appeared, remember it.
+  bool EmitDwarf = false;
+  if (const Arg *A = getDwarfNArg(Args))
+EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  bool EmitCodeView = false;
+  if (const Arg *A = Args.getLastArg(options::OPT_gcodeview))
+EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  // If the user asked for debug info but did not explicitly specify -gcodeview
+  // or -gdwarf, ask the toolchain for the default format.
+  if (!EmitCodeView && !EmitDwarf && WantDebug) {
+switch (getToolChain().getDefaultDebugFormat()) {
+case llvm::codegenoptions::DIF_CodeView:
+  EmitCodeView = true;
+  break;
+case llvm::codegenoptions::DIF_DWARF:
+  EmitDwarf = true;
+  break;
+}
+  }
+
+  // If the arguments don't imply DWARF, don't emit any debug info here.
+  if (!EmitDwarf)
+WantDebug = false;
+
   llvm::codegenoptions::DebugInfoKind DebugInfoKind =
   llvm::codegenoptions::NoDebugInfo;
 
diff --git a/clang/test/Driver/debug-options-as.c 
b/clang/test/Driver/debug-options-as.c
index c83c0cb90431d3..3e1ae109711003 100644
--- a/clang/test/Driver/debug-options-as.c
+++ b/clang/test/Driver/debug-options-as.c
@@ -19,12 +19,27 @@
 // GGDB0-NOT: -debug-info-kind=
 
 // Check to make sure clang with -g on a .s file gets passed.
-// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
+// This requires a target that defaults to DWARF.
+// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x 
assembler %s 2>&1 \
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
 // CHECK: "-debug-info-kind=constructor"
 
+// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't
+// trigger producing DWARF output.
+// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x 
assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+//
+// MSVC: "-cc1as"
+// MSVC-NOT: "-debug-info-kind=constructor"
+
+// Check that clang-cl with the -Z7 option works the same, not triggering
+// any DWARF output.
+//
+// RUN: %clang_cl -### -c -Z7 -x assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+
 // Check to make sure clang with -g on a .s file gets passed 
-dwarf-debug-producer.
 // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
 

[llvm-branch-commits] [lldb] release/19.x: [lldb] Fix some tests that fail with system libstdc++ (#106885) (PR #107938)

2024-09-18 Thread Felipe de Azevedo Piovezan via llvm-branch-commits

https://github.com/felipepiovezan approved this pull request.

This LGTM!

https://github.com/llvm/llvm-project/pull/107938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [TargetTransformInfo] Remove `getFlatAddressSpace` (PR #108787)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108787

>From 3541dca143c46f0c775bb0c51c8d562dda322cb2 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Sun, 15 Sep 2024 23:06:14 -0400
Subject: [PATCH] [TargetTransformInfo] Remove `getFlatAddressSpace`

This has been moved to `DataLayout`.
---
 .../llvm/Analysis/TargetTransformInfo.h   | 21 ---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 --
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  5 -
 llvm/lib/Analysis/TargetTransformInfo.cpp |  4 
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  8 ---
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |  4 
 .../Transforms/Scalar/InferAddressSpaces.cpp  |  2 +-
 .../AMDGPU/noop-ptrint-pair.ll|  2 +-
 .../old-pass-regressions-inseltpoison.ll  |  2 +-
 .../AMDGPU/old-pass-regressions.ll|  2 +-
 .../InferAddressSpaces/AMDGPU/ptrmask.ll  |  2 +-
 11 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3411163549de2f..42d3ee328fc1a5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -451,24 +451,6 @@ class TargetTransformInfo {
   /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
   bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
 
-  /// Returns the address space ID for a target's 'flat' address space. Note
-  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
-  /// refers to as the generic address space. The flat address space is a
-  /// generic address space that can be used access multiple segments of memory
-  /// with different address spaces. Access of a memory location through a
-  /// pointer with this address space is expected to be legal but slower
-  /// compared to the same memory location accessed through a pointer with a
-  /// different address space.
-  //
-  /// This is for targets with different pointer representations which can
-  /// be converted with the addrspacecast instruction. If a pointer is 
converted
-  /// to this address space, optimizations should attempt to replace the access
-  /// with the source address space.
-  ///
-  /// \returns ~0u if the target does not have such a flat address space to
-  /// optimize away.
-  unsigned getFlatAddressSpace() const;
-
   /// Return any intrinsic address operand indexes which may be rewritten if
   /// they use a flat address space pointer.
   ///
@@ -1838,7 +1820,6 @@ class TargetTransformInfo::Concept {
   virtual bool isAlwaysUniform(const Value *V) = 0;
   virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
   virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
-  virtual unsigned getFlatAddressSpace() = 0;
   virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const = 0;
   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
@@ -2266,8 +2247,6 @@ class TargetTransformInfo::Model final : public 
TargetTransformInfo::Concept {
 return Impl.addrspacesMayAlias(AS0, AS1);
   }
 
-  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); 
}
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const override {
 return Impl.collectFlatAddressOperands(OpIndexes, IID);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h 
b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2819af30cd1704..3902a19a90755a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -115,8 +115,6 @@ class TargetTransformInfoImplBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() const { return -1; }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 2f2a6a09ffc44d..ce585dfc7a2f39 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -292,11 +292,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 return true;
   }
 
-  unsigned getFlatAddressSpace() {
-// Return an invalid address space.
-return -1;
-  }
-
   bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes,
   Intrinsic::ID IID) const {
 return false;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp 
b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 67b626f300a101..42e3f277c1e291 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -305,10 +305,6

[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108713

>From cfce39f2434a0a9b2cab278c909380edef9ce896 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH] [Attributor] Use more appropriate approach to check flat
 address space

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  3 ---
 .../Transforms/IPO/AttributorAttributes.cpp   | 26 ++-
 .../Attributor/address_space_info.ll  |  4 ++-
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +--
 .../Transforms/Attributor/value-simplify.ll   |  3 +--
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h 
b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..ee7c930f8a26ac 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6267,9 +6267,6 @@ struct AAAddressSpace : public StateWrapper {
 return (AA->getIdAddr() == &ID);
   }
 
-  // No address space which indicates the associated value is dead.
-  static const uint32_t NoAddressSpace = ~0U;
-
   /// Unique ID (due to the unique address)
   static const char ID;
 };
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..642aeae3c12783 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
 assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
-if (getAssociatedType()->getPointerAddressSpace())
+
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+if (FlatAS == DataLayout::AS_INVALID) {
+  indicatePessimisticFixpoint();
+  return;
+}
+
+unsigned AS = getAssociatedType()->getPointerAddressSpace();
+if (AS != FlatAS) {
+  [[maybe_unused]] bool R = takeAddressSpace(AS);
+  assert(R && "The take should happen");
   indicateOptimisticFixpoint();
+}
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-if (getAddressSpace() == NoAddressSpace ||
+if (getAddressSpace() == DataLayout::AS_INVALID ||
 getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+Value *AssociatedValue = &getAssociatedValue();
+Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
 PointerType *NewPtrTy =
 PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
 bool UseOriginalValue =
@@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 if (!isValidState())
   return "addrspace()";
 return "addrspace(" +
-   (AssumedAddressSpace == NoAddressSpace
+   (AssumedAddressSpace == DataLayout::AS_INVALID
 ? "none"
 : std::to_string(AssumedAddressSpace)) +
")";
   }
 
 private:
-  uint32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = DataLayout::AS_INVALID;
 
   bool takeAddressSpace(uint32_t AS) {
-if (AssumedAddressSpace == NoAddressSpace) {
+if (AssumedAddressSpace == DataLayout::AS_INVALID) {
   AssumedAddressSpace = AS;
   return true;
 }
diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll 
b/llvm/test/Transforms/Attributor/address_space_info.ll
index 73dd93c55b819b..0c8b06aca4 100644
--- a/llvm/test/Transforms/Attributor/address_space_info.ll
+++ b/llvm/test/Transforms/Attributor/address_space_info.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals 
--prefix-filecheck-ir-name true
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor 
-attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck 
%s --check-prefixes=CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor 
-attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck 
%s --check-prefix=CHECK
+
+; REQUIRES: amdgpu-registered-target
 
 @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4
 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll 
b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 3401ddfdd7d758..de5f31e470edfc 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocap

[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-18 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108258

>From 081015ee9e9988d7340eff720af47e9292fb3d1c Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 62 ++-
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll   | 33 ++
 2 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 642aeae3c12783..b67d95bc31110d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12587,16 +12587,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+assert(FlatAS != DataLayout::AS_INVALID);
 uint32_t OldAddressSpace = AssumedAddressSpace;
-auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
-DepClassTy::REQUIRED);
-auto Pred = [&](Value &Obj) {
+
+auto CheckAddressSpace = [&](Value &Obj) {
   if (isa(&Obj))
 return true;
+  // If an argument in flat address space only has addrspace cast uses, and
+  // those casts are same, then we take the dst addrspace.
+  if (auto *Arg = dyn_cast(&Obj)) {
+if (FlatAS != DataLayout::AS_INVALID &&
+Arg->getType()->getPointerAddressSpace() == FlatAS) {
+  unsigned CastAddrSpace = FlatAS;
+  for (auto *U : Arg->users()) {
+auto *ASCI = dyn_cast(U);
+if (!ASCI)
+  return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+if (CastAddrSpace != FlatAS &&
+CastAddrSpace != ASCI->getDestAddressSpace())
+  return false;
+CastAddrSpace = ASCI->getDestAddressSpace();
+  }
+  if (CastAddrSpace != FlatAS)
+return takeAddressSpace(CastAddrSpace);
+}
+  }
   return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
 };
 
-if (!AUO->forallUnderlyingObjects(Pred))
+auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
+DepClassTy::REQUIRED);
+if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
   return indicatePessimisticFixpoint();
 
 return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12605,17 +12627,22 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-if (getAddressSpace() == DataLayout::AS_INVALID ||
-getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+unsigned NewAS = getAddressSpace();
+
+if (NewAS == DataLayout::AS_INVALID ||
+NewAS == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace();
+assert(FlatAS != DataLayout::AS_INVALID);
+
 Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
 
 PointerType *NewPtrTy =
-PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+PointerType::get(getAssociatedType()->getContext(), NewAS);
 bool UseOriginalValue =
-OriginalValue->getType()->getPointerAddressSpace() == 
getAddressSpace();
+OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
 bool Changed = false;
 
@@ -12675,12 +12702,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 return AssumedAddressSpace == AS;
   }
 
-  static Value *peelAddrspacecast(Value *V) {
-if (auto *I = dyn_cast(V))
-  return peelAddrspacecast(I->getPointerOperand());
+  static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
+if (auto *I = dyn_cast(V)) {
+  assert(I->getSrcAddressSpace() != FlatAS &&
+ "there should not be flat AS -> non-flat AS");
+  return I->getPointerOperand();
+}
 if (auto *C = dyn_cast(V))
-  if (C->getOpcode() == Instruction::AddrSpaceCast)
-return peelAddrspacecast(C->getOperand(0));
+  if (C->getOpcode() == Instruction::

[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-18 Thread via llvm-branch-commits

heiher wrote:

The purpose of cherry-picking this patch into the 19.x release branch is to 
resolve a build failure. This enables Rust `loongarch64-unknown-linux-musl` 
target to support sanitizers starting from LLVM 19.x. This patch is a 
straightforward fix for a build issue and does not introduce any functional 
changes, so there is no apparent risk involved.

https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] 1f67363 - Revert "[flang][runtime] Use cuda::std::complex in F18 runtime CUDA build. (#…"

2024-09-18 Thread via llvm-branch-commits

Author: Slava Zakharin
Date: 2024-09-18T11:22:08-07:00
New Revision: 1f6736320fde2fb17f9b3c74b571b620c5fee72e

URL: 
https://github.com/llvm/llvm-project/commit/1f6736320fde2fb17f9b3c74b571b620c5fee72e
DIFF: 
https://github.com/llvm/llvm-project/commit/1f6736320fde2fb17f9b3c74b571b620c5fee72e.diff

LOG: Revert "[flang][runtime] Use cuda::std::complex in F18 runtime CUDA build. 
(#…"

This reverts commit be187a6812fb6e8984886c28a502ec69bdaa4ad4.

Added: 


Modified: 
flang/include/flang/Runtime/cpp-type.h
flang/include/flang/Runtime/matmul-instances.inc
flang/include/flang/Runtime/numeric.h
flang/include/flang/Runtime/reduce.h
flang/include/flang/Runtime/reduction.h
flang/include/flang/Runtime/transformational.h
flang/runtime/complex-powi.cpp
flang/runtime/complex-reduction.c
flang/runtime/dot-product.cpp
flang/runtime/extrema.cpp
flang/runtime/matmul-transpose.cpp
flang/runtime/matmul.cpp
flang/runtime/numeric.cpp
flang/runtime/product.cpp
flang/runtime/random.cpp
flang/runtime/reduce.cpp
flang/runtime/reduction-templates.h
flang/runtime/sum.cpp
flang/runtime/transformational.cpp
flang/unittests/Runtime/Numeric.cpp
flang/unittests/Runtime/Transformational.cpp

Removed: 
flang/include/flang/Common/float80.h
flang/include/flang/Runtime/complex.h



diff  --git a/flang/include/flang/Common/float80.h 
b/flang/include/flang/Common/float80.h
deleted file mode 100644
index 1838f7b13c8bb2..00
--- a/flang/include/flang/Common/float80.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*===-- flang/Common/float80.h --*- C 
-*-===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- 
*===--===*/
-
-/* This header is usable in both C and C++ code.
- * Isolates build compiler checks to determine if the 80-bit
- * floating point format is supported via a particular C type.
- * It defines CFloat80Type and CppFloat80Type aliases for this
- * C type.
- */
-
-#ifndef FORTRAN_COMMON_FLOAT80_H_
-#define FORTRAN_COMMON_FLOAT80_H_
-
-#include "api-attrs.h"
-#include 
-
-#if LDBL_MANT_DIG == 64
-#undef HAS_FLOAT80
-#define HAS_FLOAT80 1
-#endif
-
-#if defined(RT_DEVICE_COMPILATION) && defined(__CUDACC__)
-/*
- * 'long double' is treated as 'double' in the CUDA device code,
- * and there is no support for 80-bit floating point format.
- * This is probably true for most offload devices, so RT_DEVICE_COMPILATION
- * check should be enough. For the time being, guard it with __CUDACC__
- * as well.
- */
-#undef HAS_FLOAT80
-#endif
-
-#if HAS_FLOAT80
-typedef long double CFloat80Type;
-typedef long double CppFloat80Type;
-#endif
-
-#endif /* FORTRAN_COMMON_FLOAT80_H_ */

diff  --git a/flang/include/flang/Runtime/complex.h 
b/flang/include/flang/Runtime/complex.h
deleted file mode 100644
index b7ad1376bffbf1..00
--- a/flang/include/flang/Runtime/complex.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- include/flang/Runtime/complex.h -*- C++ 
-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-
-// A single way to expose C++ complex class in files that can be used
-// in F18 runtime build. With inclusion of this file std::complex
-// and the related names become available, though, they may correspond
-// to alternative definitions (e.g. from cuda::std namespace).
-
-#ifndef FORTRAN_RUNTIME_COMPLEX_H
-#define FORTRAN_RUNTIME_COMPLEX_H
-
-#if RT_USE_LIBCUDACXX
-#include 
-namespace Fortran::runtime::rtcmplx {
-using cuda::std::complex;
-using cuda::std::conj;
-} // namespace Fortran::runtime::rtcmplx
-#else // !RT_USE_LIBCUDACXX
-#include 
-namespace Fortran::runtime::rtcmplx {
-using std::complex;
-using std::conj;
-} // namespace Fortran::runtime::rtcmplx
-#endif // !RT_USE_LIBCUDACXX
-
-#endif // FORTRAN_RUNTIME_COMPLEX_H

diff  --git a/flang/include/flang/Runtime/cpp-type.h 
b/flang/include/flang/Runtime/cpp-type.h
index aef0fbd7ede586..fe21dd544cf7d8 100644
--- a/flang/include/flang/Runtime/cpp-type.h
+++ b/flang/include/flang/Runtime/cpp-type.h
@@ -13,9 +13,8 @@
 
 #include "flang/Common/Fortran.h"
 #include "flang/Common/float128.h"
-#include "flang/Common/float80.h"
 #include "flang/Common/uint128.h"
-#include "flang/Runtime/complex.h"
+#include 
 #include 
 #if __cplusplus >= 202302
 #include 
@@ -71,9 +70,9 @@ template <> struct CppTypeForHelper {
   using type = double;
 #endif
 };
-#if HAS_FLOAT80
+#if LDBL_MANT_DIG == 6

[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109125

Backport a111f9119a5ec77c19a514ec09454218f739454f 
0f47e3aebdd2a4a938468a272ea4224552dbf176

Requested by: @heiher

>From 67066255ada3b70def779630a473373aa36cb8df Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Tue, 10 Sep 2024 09:19:39 +0800
Subject: [PATCH 1/2]  [LoongArch][ISel] Check the number of sign bits in
 `PatGprGpr_32` (#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)
---
 .../Target/LoongArch/LoongArchInstrInfo.td|  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll | 67 ++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a42778737..339d50bd819217 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr
 : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32
-: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, 
GPR:$rk)>;
+: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 
GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr
 : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c1..c22acdb4969071 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:   # %bb.0: # %entry
 ; LA64-NEXT:addi.w $a1, $a1, 0
 ; LA64-NEXT:addi.w $a0, $a0, 0
-; LA64-NEXT:div.w $a0, $a0, $a1
+; LA64-NEXT:div.d $a0, $a0, $a1
+; LA64-NEXT:addi.w $a0, $a0, 0
 ; LA64-NEXT:ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:   # %bb.0: # %entry
 ; LA64-TRAP-NEXT:addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:   # %bb.0: # %entry
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:.cfi_def_cfa_offset 16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:.cfi_offset 1, -4
+; LA32-NEXT:move $a2, $a0
+; LA32-NEXT:srai.w $a3, $a0, 31
+; LA32-NEXT:lu12i.w $a0, -266831
+; LA32-NEXT:ori $a0, $a0, 3337
+; LA32-NEXT:move $a1, $zero
+; LA32-NEXT:bl %plt(__divdi3)
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: pr107414:
+; LA64:   # %bb.0: # %entry
+; LA64-NEXT:lu12i.w $a1, -266831
+; LA64-NEXT:ori $a1, $a1, 3337
+; LA64-NEXT:lu32i.d $a1, 0
+; LA64-NEXT:div.d $a0, $a1, $a0
+; LA64-NEXT:addi.w $a0, $a0, 0
+; LA64-NEXT:ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:   # %bb.0: # %entry
+; LA32-TRAP-NEXT:addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:.cfi_offset 1, -4
+; LA32-TRAP-NEXT:move $a2, $a0
+; LA32-TRAP-NEXT:srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:move $a1, $zero
+; LA32-TRAP-NEXT:bl %plt(__divdi3)
+; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:   # %bb.0: # %entry
+; LA64-TRAP-NEXT:lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:lu32i.d $a1, 0
+; LA64-TRAP-NEXT:div.d $

[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-18 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:

@wangleiat @SixWeining What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-loongarch

Author: None (llvmbot)


Changes

Backport a111f9119a5ec77c19a514ec09454218f739454f 
0f47e3aebdd2a4a938468a272ea4224552dbf176

Requested by: @heiher

---
Full diff: https://github.com/llvm/llvm-project/pull/109125.diff


3 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+4-1) 
- (modified) llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp (+15) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
(+61) 


``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a42778737..339d50bd819217 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr
 : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32
-: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, 
GPR:$rk)>;
+: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 
GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr
 : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp 
b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index abac69054f3b91..ab90409fdf47d0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const 
LoongArchSubtarget &ST,
 break;
   }
   return false;
+// If all incoming values are sign-extended and all users only use
+// the lower 32 bits, then convert them to W versions.
+case LoongArch::DIV_D: {
+  if (!AddRegToWorkList(MI->getOperand(1).getReg()))
+return false;
+  if (!AddRegToWorkList(MI->getOperand(2).getReg()))
+return false;
+  if (hasAllWUsers(*MI, ST, MRI)) {
+FixableDef.insert(MI);
+break;
+  }
+  return false;
+}
 }
   }
 
@@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) {
 return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
 return LoongArch::ADD_W;
+  case LoongArch::DIV_D:
+return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
 return LoongArch::LD_W;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c1..c5af79157eaadc 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -1151,3 +1151,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:   # %bb.0: # %entry
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:.cfi_def_cfa_offset 16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:.cfi_offset 1, -4
+; LA32-NEXT:move $a2, $a0
+; LA32-NEXT:srai.w $a3, $a0, 31
+; LA32-NEXT:lu12i.w $a0, -266831
+; LA32-NEXT:ori $a0, $a0, 3337
+; LA32-NEXT:move $a1, $zero
+; LA32-NEXT:bl %plt(__divdi3)
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: pr107414:
+; LA64:   # %bb.0: # %entry
+; LA64-NEXT:lu12i.w $a1, -266831
+; LA64-NEXT:ori $a1, $a1, 3337
+; LA64-NEXT:lu32i.d $a1, 0
+; LA64-NEXT:div.d $a0, $a1, $a0
+; LA64-NEXT:addi.w $a0, $a0, 0
+; LA64-NEXT:ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:   # %bb.0: # %entry
+; LA32-TRAP-NEXT:addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:.cfi_offset 1, -4
+; LA32-TRAP-NEXT:move $a2, $a0
+; LA32-TRAP-NEXT:srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:move $a1, $zero
+; LA32-TRAP-NEXT:bl %plt(__divdi3)
+; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:   # %bb.0: # %entry
+; LA64-TRAP-NEXT:lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:lu32i.d $a1, 0
+; LA64-TRAP-NEXT:div.d $a1, $a1, $a0
+; LA64-TRAP-NEXT:bnez $a0, .LBB32_2
+; LA64-TRAP-NEXT:  # %bb.1: # %entry
+; LA64-TRAP-NEXT:break 7
+; LA64-TRAP-NEXT:  .LBB32_2: # %entry
+; LA64-TRAP-NEXT:addi.w $a0, $a1, 0
+; LA64-TRAP-NEXT:ret
+entry:
+  %conv = sext i32 %x to i64
+  %div = sdiv i64 3202030857, %conv
+  %c

[llvm-branch-commits] [llvm] [ADT] Use range-based helper functions in SmallSet (PR #108585)

2024-09-18 Thread Nikita Popov via llvm-branch-commits


@@ -234,19 +225,12 @@ class SmallSet {
   /// Check if the SmallSet contains the given element.
   bool contains(const T &V) const {
 if (isSmall())
-  return vfind(V) != Vector.end();
-return Set.find(V) != Set.end();
+  return llvm::is_contained(Vector, V);
+return llvm::is_contained(Set, V);

nikic wrote:

If you want to have an abstraction over contains() and find() != end(), I'd 
suggest adding another helper, which does not also include the linear scan 
case. Though I don't think this is really worthwhile, as the existing find() != 
end() code works fine.

https://github.com/llvm/llvm-project/pull/108585
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)

2024-09-18 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/107146

>From 64075837b5532108a1fe96a5b158feb7a9025694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Tue, 3 Sep 2024 22:45:54 +0300
Subject: [PATCH] [clang] Don't add DWARF debug info when assembling .s with
 clang-cl /Z7 (#106686)

This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca.

That commit changed the clang-cl options /Zi and /Z7 to be implemented
as aliases of -g rather than having separate handling.

This had the unintended effect, that when assembling .s files with
clang-cl, the /Z7 option (which implies using CodeView debug info) was
treated as a -g option, which causes `ClangAs::ConstructJob` to pick up
the option as part of `Args.getLastArg(options::OPT_g_Group)`, which
sets the `WantDebug` variable.

Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or
`-gcodeview` options have been set, and if not, we pick the default
debug format for the current toolchain. However, in `ClangAs`, if debug
info has been enabled, it always adds DWARF debug info.

Add similar logic in `ClangAs` - check if the user has explicitly
requested either DWARF or CodeView, otherwise look up the toolchain
default. If we (either implicitly or explicitly) should be producing
CodeView, don't enable the default `ClangAs` DWARF generation.

This fixes the issue, where assembling a single `.s` file with clang-cl,
with the /Z7 option, causes the file to contain some DWARF sections.
This causes the output executable to contain DWARF, in addition to the
separate intended main PDB file.

By having the output executable contain DWARF sections, LLDB only looks
at the (very little) DWARF info in the executable, rather than looking
for a separate standalone PDB file. This caused an issue with LLDB's
tests, https://github.com/llvm/llvm-project/issues/101710.

(cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 26 ++
 clang/test/Driver/debug-options-as.c  | 17 -
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 366b147a052bf2..8858c318aba7a1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const 
JobAction &JA,
 WantDebug = !A->getOption().matches(options::OPT_g0) &&
 !A->getOption().matches(options::OPT_ggdb0);
 
+  // If a -gdwarf argument appeared, remember it.
+  bool EmitDwarf = false;
+  if (const Arg *A = getDwarfNArg(Args))
+EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  bool EmitCodeView = false;
+  if (const Arg *A = Args.getLastArg(options::OPT_gcodeview))
+EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  // If the user asked for debug info but did not explicitly specify -gcodeview
+  // or -gdwarf, ask the toolchain for the default format.
+  if (!EmitCodeView && !EmitDwarf && WantDebug) {
+switch (getToolChain().getDefaultDebugFormat()) {
+case llvm::codegenoptions::DIF_CodeView:
+  EmitCodeView = true;
+  break;
+case llvm::codegenoptions::DIF_DWARF:
+  EmitDwarf = true;
+  break;
+}
+  }
+
+  // If the arguments don't imply DWARF, don't emit any debug info here.
+  if (!EmitDwarf)
+WantDebug = false;
+
   llvm::codegenoptions::DebugInfoKind DebugInfoKind =
   llvm::codegenoptions::NoDebugInfo;
 
diff --git a/clang/test/Driver/debug-options-as.c 
b/clang/test/Driver/debug-options-as.c
index c83c0cb90431d3..cb0492177ff47a 100644
--- a/clang/test/Driver/debug-options-as.c
+++ b/clang/test/Driver/debug-options-as.c
@@ -19,12 +19,27 @@
 // GGDB0-NOT: -debug-info-kind=
 
 // Check to make sure clang with -g on a .s file gets passed.
-// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
+// This requires a target that defaults to DWARF.
+// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x 
assembler %s 2>&1 \
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
 // CHECK: "-debug-info-kind=constructor"
 
+// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't
+// trigger producing DWARF output.
+// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x 
assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+//
+// MSVC: "-cc1as"
+// MSVC-NOT: "-debug-info-kind=constructor"
+
+// Check that clang-cl with the -Z7 option works the same, not triggering
+// any DWARF output.
+//
+// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- 
%s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+
 // Check to make sure clang with -g on a .s file gets passed 
-dwarf-debug-producer.
 // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=P %s

__

[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)

2024-09-18 Thread via llvm-branch-commits

github-actions[bot] wrote:

@mstorsjo (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/107146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 6407583 - [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686)

2024-09-18 Thread Tobias Hieta via llvm-branch-commits

Author: Martin Storsjö
Date: 2024-09-18T16:01:50+02:00
New Revision: 64075837b5532108a1fe96a5b158feb7a9025694

URL: 
https://github.com/llvm/llvm-project/commit/64075837b5532108a1fe96a5b158feb7a9025694
DIFF: 
https://github.com/llvm/llvm-project/commit/64075837b5532108a1fe96a5b158feb7a9025694.diff

LOG: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 
(#106686)

This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca.

That commit changed the clang-cl options /Zi and /Z7 to be implemented
as aliases of -g rather than having separate handling.

This had the unintended effect, that when assembling .s files with
clang-cl, the /Z7 option (which implies using CodeView debug info) was
treated as a -g option, which causes `ClangAs::ConstructJob` to pick up
the option as part of `Args.getLastArg(options::OPT_g_Group)`, which
sets the `WantDebug` variable.

Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or
`-gcodeview` options have been set, and if not, we pick the default
debug format for the current toolchain. However, in `ClangAs`, if debug
info has been enabled, it always adds DWARF debug info.

Add similar logic in `ClangAs` - check if the user has explicitly
requested either DWARF or CodeView, otherwise look up the toolchain
default. If we (either implicitly or explicitly) should be producing
CodeView, don't enable the default `ClangAs` DWARF generation.

This fixes the issue, where assembling a single `.s` file with clang-cl,
with the /Z7 option, causes the file to contain some DWARF sections.
This causes the output executable to contain DWARF, in addition to the
separate intended main PDB file.

By having the output executable contain DWARF sections, LLDB only looks
at the (very little) DWARF info in the executable, rather than looking
for a separate standalone PDB file. This caused an issue with LLDB's
tests, https://github.com/llvm/llvm-project/issues/101710.

(cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10)

Added: 


Modified: 
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/debug-options-as.c

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 366b147a052bf2..8858c318aba7a1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const 
JobAction &JA,
 WantDebug = !A->getOption().matches(options::OPT_g0) &&
 !A->getOption().matches(options::OPT_ggdb0);
 
+  // If a -gdwarf argument appeared, remember it.
+  bool EmitDwarf = false;
+  if (const Arg *A = getDwarfNArg(Args))
+EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  bool EmitCodeView = false;
+  if (const Arg *A = Args.getLastArg(options::OPT_gcodeview))
+EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  // If the user asked for debug info but did not explicitly specify -gcodeview
+  // or -gdwarf, ask the toolchain for the default format.
+  if (!EmitCodeView && !EmitDwarf && WantDebug) {
+switch (getToolChain().getDefaultDebugFormat()) {
+case llvm::codegenoptions::DIF_CodeView:
+  EmitCodeView = true;
+  break;
+case llvm::codegenoptions::DIF_DWARF:
+  EmitDwarf = true;
+  break;
+}
+  }
+
+  // If the arguments don't imply DWARF, don't emit any debug info here.
+  if (!EmitDwarf)
+WantDebug = false;
+
   llvm::codegenoptions::DebugInfoKind DebugInfoKind =
   llvm::codegenoptions::NoDebugInfo;
 

diff  --git a/clang/test/Driver/debug-options-as.c 
b/clang/test/Driver/debug-options-as.c
index c83c0cb90431d3..cb0492177ff47a 100644
--- a/clang/test/Driver/debug-options-as.c
+++ b/clang/test/Driver/debug-options-as.c
@@ -19,12 +19,27 @@
 // GGDB0-NOT: -debug-info-kind=
 
 // Check to make sure clang with -g on a .s file gets passed.
-// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
+// This requires a target that defaults to DWARF.
+// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x 
assembler %s 2>&1 \
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
 // CHECK: "-debug-info-kind=constructor"
 
+// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't
+// trigger producing DWARF output.
+// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x 
assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+//
+// MSVC: "-cc1as"
+// MSVC-NOT: "-debug-info-kind=constructor"
+
+// Check that clang-cl with the -Z7 option works the same, not triggering
+// any DWARF output.
+//
+// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- 
%s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+
 // Check to make sure clang with -g on a .s file gets passed 
-dwarf-debug-producer.
 // RUN: %clang -### -c -integrated-as

[llvm-branch-commits] [clang] release/19.x: [clang] Don't add DWARF debug info when assembling .s with clang-cl /Z7 (#106686) (PR #107146)

2024-09-18 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/107146
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 4cabae1 - Revert "[clang] Increase VecLib bitfield size to 4 bits in CodeGenOptions.def…"

2024-09-18 Thread via llvm-branch-commits

Author: Aaron Ballman
Date: 2024-09-18T11:33:44-04:00
New Revision: 4cabae1e59f75c9be5a1156cc785c93ece49c531

URL: 
https://github.com/llvm/llvm-project/commit/4cabae1e59f75c9be5a1156cc785c93ece49c531
DIFF: 
https://github.com/llvm/llvm-project/commit/4cabae1e59f75c9be5a1156cc785c93ece49c531.diff

LOG: Revert "[clang] Increase VecLib bitfield size to 4 bits in 
CodeGenOptions.def…"

This reverts commit 475ceca859233b387c22f13ecef581158ef36346.

Added: 


Modified: 
clang/include/clang/Basic/CodeGenOptions.def

Removed: 
clang/unittests/CodeGen/AllLibrariesFit.cpp
clang/unittests/CodeGen/EncodingDecodingTest.cpp
clang/unittests/CodeGen/SimulatedOverflowTest.cpp



diff  --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index b78ae61e6509ea..b600198998d85b 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -375,18 +375,8 @@ ENUM_CODEGENOPT(Inlining, InliningMethod, 2, 
NormalInlining)
 /// The maximum stack size a function can have to be considered for inlining.
 VALUE_CODEGENOPT(InlineMaxStackSize, 32, UINT_MAX)
 
-// Define the number of bits required for the VecLib enum
-#define VECLIB_BIT_COUNT 
(llvm::countPopulation(llvm::driver::VectorLibrary::MaxLibrary))
-
-// Ensure the VecLib bitfield has enough space for future vector libraries.
-// The number of bits is determined automatically based on the number of enum 
values.
-static_assert(static_cast(llvm::driver::VectorLibrary::MaxLibrary) <= 
(1 << VECLIB_BIT_COUNT),
-  "VecLib bitfield size is too small to accommodate all vector 
libraries.");
-  
-// VecLib definition in CodeGenOptions.def
-ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, VECLIB_BIT_COUNT, 
llvm::driver::VectorLibrary::NoLibrary)
-
-#undef VECLIB_BIT_COUNT
+// Vector functions library to use.
+ENUM_CODEGENOPT(VecLib, llvm::driver::VectorLibrary, 3, 
llvm::driver::VectorLibrary::NoLibrary)
 
 /// The default TLS model to use.
 ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel)

diff  --git a/clang/unittests/CodeGen/AllLibrariesFit.cpp 
b/clang/unittests/CodeGen/AllLibrariesFit.cpp
deleted file mode 100644
index dfe63b557729ee..00
--- a/clang/unittests/CodeGen/AllLibrariesFit.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "clang/Basic/CodeGenOptions.h"
-#include "llvm/Driver/Options.h"
-#include "gtest/gtest.h"
-
-TEST(VecLibBitfieldTest, AllLibrariesFit) {
-  // We expect that all vector libraries fit in the bitfield size
-  EXPECT_LE(static_cast(llvm::driver::VectorLibrary::MaxLibrary),
-(1 << VECLIB_BIT_COUNT))
-  << "VecLib bitfield size is too small!";
- }

diff  --git a/clang/unittests/CodeGen/EncodingDecodingTest.cpp 
b/clang/unittests/CodeGen/EncodingDecodingTest.cpp
deleted file mode 100644
index 67c89ef07c428b..00
--- a/clang/unittests/CodeGen/EncodingDecodingTest.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-TEST(VecLibBitfieldTest, EncodingDecodingTest) {
-  clang::CodeGenOptions Opts;
-
-  // Test encoding and decoding for each vector library
-  for (int i = static_cast(llvm::driver::VectorLibrary::Accelerate);
-   i <= static_cast(llvm::driver::VectorLibrary::MaxLibrary); ++i) {
-
-Opts.VecLib = static_cast(i);
-
-// Encode and then decode
-llvm::driver::VectorLibrary decodedValue =
-static_cast(Opts.VecLib);
-
-EXPECT_EQ(decodedValue, Opts.VecLib)
-<< "Encoding/Decoding failed for vector library " << i;
-  }
-}

diff  --git a/clang/unittests/CodeGen/SimulatedOverflowTest.cpp 
b/clang/unittests/CodeGen/SimulatedOverflowTest.cpp
deleted file mode 100644
index acfeaf7498b6d0..00
--- a/clang/unittests/CodeGen/SimulatedOverflowTest.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Simulate the addition of a new library without increasing the bitfield size
-enum class SimulatedVectorLibrary {
-  Accelerate = 0,
-  LIBMVEC,
-  MASSV,
-  SVML,
-  SLEEF,
-  Darwin_libsystem_m,
-  ArmPL,
-  AMDLIBM,
-  NoLibrary,
-  // Simulate new addition
-  NewLibrary,
-  MaxLibrary
-};
-
-#define SIMULATED_VECLIB_BIT_COUNT 
\
-  4 // The current bitfield size (should be 4 for 9 options)
-
-TEST(VecLibBitfieldTest, SimulatedOverflowTest) {
-  // Simulate the addition of a new library and check if the bitfield size is
-  // sufficient
-  EXPECT_LE(static_cast(SimulatedVectorLibrary::MaxLibrary),
-(1 << SIMULATED_VECLIB_BIT_COUNT))
-  << "Simulated VecLib bitfield size overflow!";
-}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread YANG Xudong via llvm-branch-commits

yxd-ym wrote:

> Do I understand correctly that a side effect of this change is to change the 
> half float ABI on loongarch from passing via FP regs to passing via GPR regs?

https://github.com/llvm/llvm-project/blob/43c9203d4946b7911d2ba69369717979900d7bc2/llvm/lib/CodeGen/TargetLoweringBase.cpp#L1356-L1374

>From the logic above, the answer seems to be yes. Because with this patch, the 
>following functions' return values for loongarch are

- `softPromoteHalfType() => true`
- `useFPRegsForHalfType() => false`

https://github.com/llvm/llvm-project/blob/43c9203d4946b7911d2ba69369717979900d7bc2/llvm/include/llvm/CodeGen/TargetLowering.h#L519-L530


https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] test2 (PR #109137)

2024-09-18 Thread Kyungwoo Lee via llvm-branch-commits

https://github.com/kyulee-com created 
https://github.com/llvm/llvm-project/pull/109137

None

>From 32ae0b07276f7ccbdc5dd6675e0c46b507625449 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee 
Date: Wed, 18 Sep 2024 06:05:41 -0700
Subject: [PATCH] test2

---
 llvm/lib/LTO/LTO.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 9a01edd70e08c9..d33815ff704128 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1371,6 +1371,7 @@ SmallVector 
LTO::getRuntimeLibcallSymbols(const Triple &TT) {
 
 /// This class defines the interface to the ThinLTO backend.
 /// Test
+/// Test2
 class lto::ThinBackendProc {
 protected:
   const Config &Conf;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -722,6 +770,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 }
   }
 
+  if (opts::StaleMatchingWithPseudoProbes) {
+const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
+assert(Decoder &&
+   "If pseudo probes are in use, pseudo probe decoder should exist");
+for (const MCDecodedPseudoProbeInlineTree &TopLev :
+ Decoder->getDummyInlineRoot().getChildren())
+  TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
+  }
+

wlei-llvm wrote:

How about moving this code near to where it's used, i.e the 
`matchWeightsByHashes`. It seems the ` ... BC.getPseudoProbeDecoder() ...` can 
be shared there.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -0,0 +1,62 @@
+## Tests stale block matching with pseudo probes.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 
--infer-stale-profile --profile-use-pseudo-probes 
--stale-matching-with-block-pseudo-probes 2>&1 | FileCheck %s
+
+# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic 
blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale)
+
+#--- main.s
+ .text
+  .globl  main# -- Begin function main
+  .p2align4, 0x90
+  .type   main,@function
+main:   # @main
+# %bb.0:
+  pushq   %rbp
+  movq%rsp, %rbp
+  movl$0, -4(%rbp)
+  .pseudoprobe15822663052811949562 1 0 0 main

wlei-llvm wrote:

I think this comment is still valid? could you add the testcase to check the 
feature of inline tree matching?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -722,12 +722,28 @@ class BinaryContext {
 /// Stats for stale profile matching:
 ///   the total number of basic blocks in the profile
 uint32_t NumStaleBlocks{0};
-///   the number of matched basic blocks
-uint32_t NumMatchedBlocks{0};
+///   the number of exactly matched basic blocks
+uint32_t NumExactMatchedBlocks{0};
+///   the number of loosely matched basic blocks
+uint32_t NumLooseMatchedBlocks{0};
+///   the number of exactly pseudo probe matched basic blocks
+uint32_t NumPseudoProbeExactMatchedBlocks{0};
+///   the number of loosely pseudo probe matched basic blocks
+uint32_t NumPseudoProbeLooseMatchedBlocks{0};
+///   the number of call matched basic blocks
+uint32_t NumCallMatchedBlocks{0};
 ///   the total count of samples in the profile
 uint64_t StaleSampleCount{0};
-///   the count of matched samples
-uint64_t MatchedSampleCount{0};
+///   the count of exactly matched samples
+uint64_t ExactMatchedSampleCount{0};
+///   the count of exactly matched samples

wlei-llvm wrote:

typo: `exactly` --> `loosely`

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :

wlei-llvm wrote:

Is it possible to use `find` instead of the linear scanning for the callsite 
query?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -266,6 +305,69 @@ class StaleMatcher {
 }
 return BestBlock;
   }
+
+  /// Matches a profile block with an binary block based on pseudo probes.
+  /// Returns the best matching block (or nullptr) and whether the match is
+  /// unambiguous.
+  std::pair matchWithPseudoProbes(
+  const ArrayRef BlockPseudoProbes,
+  const ArrayRef InlineTree) const {
+if (!opts::StaleMatchingWithPseudoProbes)
+  return {nullptr, false};
+
+DenseMap FlowBlockMatchCount;
+
+auto match = [&](uint32_t NodeId, uint64_t ProbeId) -> const FlowBlock * {
+  const MCDecodedPseudoProbeInlineTree *Node =
+  InlineTreeNodeMap.getInlineTreeNode(NodeId);
+  if (!Node)
+return nullptr;
+  const MCDecodedPseudoProbe *BinaryProbe = nullptr;
+  for (const MCDecodedPseudoProbe &Probe : Node->getProbes()) {
+if (Probe.getIndex() != ProbeId)

wlei-llvm wrote:

nit:  just for the readability, is it possible to add the `Binary` or `Profile` 
prefix to the variable names, especially when they are used closely. Same to 
other places like `Node/Tree/Probe/...` but only when you think it's not too 
verbose:)

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase {
 YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+DenseMap Map;

wlei-llvm wrote:

add comments to explain the key and the value of the map.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -266,6 +305,69 @@ class StaleMatcher {
 }
 return BestBlock;
   }
+
+  /// Matches a profile block with an binary block based on pseudo probes.

wlei-llvm wrote:

nit: `an` -> `a`

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase {
 YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+DenseMap Map;
+
+void mapInlineTreeNode(uint32_t ProfileNode,
+   const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+  auto Res = Map.try_emplace(ProfileNode, BinaryNode);

wlei-llvm wrote:

nit: perhaps use `[[maybe_unused]]` to avoid the `(void)Res;` line

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase {
 YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+DenseMap Map;
+
+void mapInlineTreeNode(uint32_t ProfileNode,
+   const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+  auto Res = Map.try_emplace(ProfileNode, BinaryNode);
+  assert(Res.second &&
+ "Duplicate mapping from profile node index to binary inline 
tree");
+  (void)Res;
+}
+
+  public:
+/// Returns matched InlineTree * for a given profile inline_tree_id.
+const MCDecodedPseudoProbeInlineTree *
+getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
+  auto It = Map.find(ProfileInlineTreeNodeId);
+  if (It == Map.end())
+return nullptr;
+  return It->second;
+}
+
+// Match up YAML inline tree with binary inline tree.
+// \p GetRootCallback is invoked for matching up the first YAML inline tree
+// node and has the following signature:
+// const MCDecodedPseudoProbeInlineTree *GetRootCallback(uint64_t RootGUID)
+void matchInlineTrees(

wlei-llvm wrote:

Maybe rename to `readAndMatchInlineTrees` or `buildAndMatchingInlinTrees`? I 
see a big part of the code is to read and build the trees from yaml. Or maybe 
split it into two functions?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase {
 YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+DenseMap Map;
+
+void mapInlineTreeNode(uint32_t ProfileNode,
+   const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+  auto Res = Map.try_emplace(ProfileNode, BinaryNode);
+  assert(Res.second &&
+ "Duplicate mapping from profile node index to binary inline 
tree");
+  (void)Res;
+}
+
+  public:
+/// Returns matched InlineTree * for a given profile inline_tree_id.
+const MCDecodedPseudoProbeInlineTree *
+getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
+  auto It = Map.find(ProfileInlineTreeNodeId);
+  if (It == Map.end())
+return nullptr;
+  return It->second;
+}
+
+// Match up YAML inline tree with binary inline tree.

wlei-llvm wrote:

Could you add more comments to explain the output of this function and how it 
will be used for the matching later?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());

wlei-llvm wrote:

nit: add assertion msg

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -96,6 +101,42 @@ class YAMLProfileReader : public ProfileReaderBase {
 YamlBFAdjacencyMap;
   };
 
+  // A class for matching inline tree nodes between profile and binary.
+  class InlineTreeNodeMapTy {
+DenseMap Map;
+
+void mapInlineTreeNode(uint32_t ProfileNode,

wlei-llvm wrote:

nit: `ProfileNode` --> `ProfileNodeIdx`

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -482,6 +584,30 @@ matchWeightsByHashes(BinaryContext &BC,
   << Twine::utohexstr(BB->getHash()) << "\n");
   }
   StaleMatcher Matcher;
+  // Collects function pseudo probes for use in the StaleMatcher.
+  if (opts::StaleMatchingWithPseudoProbes) {
+const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
+assert(Decoder &&
+   "If pseudo probes are in use, pseudo probe decoder should exist");
+const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
+const uint64_t FuncAddr = BF.getAddress();
+auto GetTopLevelNodeByGUID =

wlei-llvm wrote:

Can we move this into the `matchInlineTrees`? I didn't find any other usages in 
this function.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :
+   Parent->getChildren()) {
+if (Child.Guid == GUID) {
+  if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
+Cur = &Child;
+  break;
+}
+  }
+}
+if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash)

wlei-llvm wrote:

Why needs to meet this `Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash`, 
should this be an assertion?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/109185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin ready_for_review 
https://github.com/llvm/llvm-project/pull/109185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin ready_for_review 
https://github.com/llvm/llvm-project/pull/109184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-pgo

Author: Mircea Trofin (mtrofin)


Changes

The `step` instrumentation shouldn't be treated, during use, like an 
`increment`. The latter is treated as a BB ID. The step isn't that, it's more 
of a type of value profiling. We need to distinguish between the 2 when really 
looking for BB IDs (==increments), and handle appropriately `step`s. In 
particular, we need to know when to elide them because `select`s may get elided 
by function cloning, if the condition of the select is statically known.

---
Full diff: https://github.com/llvm/llvm-project/pull/109185.diff


5 Files Affected:

- (modified) llvm/include/llvm/Analysis/CtxProfAnalysis.h (+3) 
- (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+9) 
- (modified) llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
(+43-2) 
- (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+9-1) 
- (added) llvm/test/Analysis/CtxProfAnalysis/handle-select.ll (+76) 


``diff
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public 
AnalysisInfoMixin {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst 
*CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+if (auto *Step = dyn_cast(Prev))
+  return Step;
+  return nullptr;
+}
+
 template 
 static void preorderVisit(ProfilesTy &Profiles,
   function_ref Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..30bb251364fdef 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
+uint64_t getCount() const { return *Count;}
+
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
 setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
 return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+for (const auto &BB : F) {
+  if (getBBInfo(BB).getCount() > 0) {
+for (const auto &I : BB) {
+  if (const auto *SI = dyn_cast(&I)) {
+if (!SI->getMetadata(LLVMContext::MD_prof)) {
+  return false;
+}
+  }
+}
+  }
+}
+return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,33 @@ class ProfileAnnotator final {
 PB.addEntryCount(Counters[0]);
 
 for (auto &BB : F) {
+  const auto &BBInfo = getBBInfo(BB);
+  if (BBInfo.getCount() > 0) {
+for (auto &I : BB) {
+  if (auto *SI = dyn_cast(&I)) {
+if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+  auto Index = Step->getIndex()->getZExtValue();
+  assert(Index < Counters.size() &&
+"The index of the step instruction must be inside the "
+"counters vector by "
+"construction - tripping this assertion indicates a bug in 
"
+"how the contextual profile is managed by IPO transforms");
+  auto TotalCount = BBInfo.getCount();
+  auto TrueCount = Counters[Index];
+  auto FalseCount =
+  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+  setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+  std::max(TrueCount, FalseCount));
+}
+  }
+}
+  }
   if (succ_size(&BB) < 2)
 continue;
   auto *Term = BB.getTerminator();
   SmallVector EdgeCounts(Term->getNumSuccessors(), 0);
   uint64_t MaxCount = 0;
-  const auto &BBInfo = getBBInfo(BB);
+
   for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdge

[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Mircea Trofin (mtrofin)


Changes

Reinforcing properties ensured at instrumentation time.

---
Full diff: https://github.com/llvm/llvm-project/pull/109184.diff


1 Files Affected:

- (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+9-2) 


``diff
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index c29709b613410e..3df72983862d98 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module 
&M,
 }
 
 InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
-  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode())
+  if (!InstrProfCallsite::canInstrumentCallsite(CB))
+return nullptr;
+  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
 if (auto *IPC = dyn_cast(Prev))
   return IPC;
+assert(!isa(Prev) &&
+   "didn't expect to find another call, that's not the callsite "
+   "instrumentation, before an instrumentable callsite");
+  }
   return nullptr;
 }
 
 InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   for (auto &I : BB)
 if (auto *Incr = dyn_cast(&I))
-  return Incr;
+  if (!isa(&I))
+return Incr;
   return nullptr;
 }
 

``




https://github.com/llvm/llvm-project/pull/109184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/109184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` (PR #109185)

2024-09-18 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 987562aab1c409b62b1a4c5d6d8566ad812b8313 
09642a4889da1d0e10f54b17b84e32dae5c8557e --extensions cpp,h -- 
llvm/include/llvm/Analysis/CtxProfAnalysis.h 
llvm/lib/Analysis/CtxProfAnalysis.cpp 
llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
llvm/lib/Transforms/Utils/InlineFunction.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 30bb251364..3a3c47e90a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,7 +154,7 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
-uint64_t getCount() const { return *Count;}
+uint64_t getCount() const { return *Count; }
 
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
@@ -347,11 +347,12 @@ public:
   if (auto *SI = dyn_cast(&I)) {
 if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
   auto Index = Step->getIndex()->getZExtValue();
-  assert(Index < Counters.size() &&
-"The index of the step instruction must be inside the "
-"counters vector by "
-"construction - tripping this assertion indicates a bug in 
"
-"how the contextual profile is managed by IPO transforms");
+  assert(
+  Index < Counters.size() &&
+  "The index of the step instruction must be inside the "
+  "counters vector by "
+  "construction - tripping this assertion indicates a bug in "
+  "how the contextual profile is managed by IPO transforms");
   auto TotalCount = BBInfo.getCount();
   auto TrueCount = Counters[Index];
   auto FalseCount =

``




https://github.com/llvm/llvm-project/pull/109185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [nfc][ctx_prof] Don't try finding callsite annotation for un-instrumentable callsites (PR #109184)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109184

>From 152a2a965e4c500f207b960293578c3715ffd903 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:03:30 -0700
Subject: [PATCH] [ctx_prof] Don't try finding callsite annotation for
 un-instrumentable callsites

---
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index c29709b613410e..3df72983862d98 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -234,16 +234,23 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module 
&M,
 }
 
 InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
-  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode())
+  if (!InstrProfCallsite::canInstrumentCallsite(CB))
+return nullptr;
+  for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
 if (auto *IPC = dyn_cast(Prev))
   return IPC;
+assert(!isa(Prev) &&
+   "didn't expect to find another call, that's not the callsite "
+   "instrumentation, before an instrumentable callsite");
+  }
   return nullptr;
 }
 
 InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   for (auto &I : BB)
 if (auto *Incr = dyn_cast(&I))
-  return Incr;
+  if (!isa(&I))
+return Incr;
   return nullptr;
 }
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :
+   Parent->getChildren()) {
+if (Child.Guid == GUID) {
+  if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
+Cur = &Child;
+  break;
+}
+  }
+}
+if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash)

wlei-llvm wrote:

I see, please add comments to explain this is the case for the stale profile, 
we don't match the probes whose function checksum is mismatched.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From bcac23616ef0b321e4e7c48e4f08067241807ea7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-09-18 Thread Lei Wang via llvm-branch-commits


@@ -587,6 +590,51 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext 
&BC) {
   return MatchedWithCallGraph;
 }
 
+void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees(
+const MCPseudoProbeDecoder &Decoder,
+const yaml::bolt::PseudoProbeDesc &YamlPD,
+const std::vector &YamlInlineTree,
+llvm::function_ref
+GetRootCallback) {
+
+  // Match inline tree nodes by GUID, checksum, parent, and call site.
+  uint32_t ParentId = 0;
+  uint32_t PrevGUIDIdx = 0;
+  uint32_t Index = 0;
+  for (const yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) {
+uint64_t GUIDIdx = InlineTreeNode.GUIDIndex;
+if (GUIDIdx)
+  PrevGUIDIdx = GUIDIdx;
+else
+  GUIDIdx = PrevGUIDIdx;
+assert(GUIDIdx < YamlPD.GUID.size());
+assert(GUIDIdx < YamlPD.GUIDHashIdx.size());
+uint64_t GUID = YamlPD.GUID[GUIDIdx];
+uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx];
+assert(HashIdx < YamlPD.Hash.size());
+uint64_t Hash = YamlPD.Hash[HashIdx];
+uint32_t InlineTreeNodeId = Index++;
+ParentId += InlineTreeNode.ParentIndexDelta;
+uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
+if (!InlineTreeNodeId) {
+  Cur = GetRootCallback(GUID);
+} else if (const MCDecodedPseudoProbeInlineTree *Parent =
+   getInlineTreeNode(ParentId)) {
+  for (const MCDecodedPseudoProbeInlineTree &Child :

wlei-llvm wrote:

I see, I just remembered it has been changed to a vector structure not a map, 
then that needs the pre-sorting things. OK, I'm good with the current version 
assuming the perf here is fine. 

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

2024-09-18 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109185

>From 7e92883f5ac815136d891b8c15728af1e7086df7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 17 Sep 2024 22:00:42 -0700
Subject: [PATCH] [ctx_prof] Handle `select`

---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  3 +
 llvm/lib/Analysis/CtxProfAnalysis.cpp |  9 +++
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 46 ++-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 10 ++-
 .../Analysis/CtxProfAnalysis/handle-select.ll | 76 +++
 5 files changed, 141 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/handle-select.ll

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public 
AnalysisInfoMixin {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp 
b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst 
*CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+if (auto *Step = dyn_cast(Prev))
+  return Step;
+  return nullptr;
+}
+
 template 
 static void preorderVisit(ProfilesTy &Profiles,
   function_ref Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..3a3c47e90a168a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
 bool hasCount() const { return Count.has_value(); }
 
+uint64_t getCount() const { return *Count; }
+
 bool trySetSingleUnknownInEdgeCount() {
   if (UnknownCountInEdges == 1) {
 setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
 return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+for (const auto &BB : F) {
+  if (getBBInfo(BB).getCount() > 0) {
+for (const auto &I : BB) {
+  if (const auto *SI = dyn_cast(&I)) {
+if (!SI->getMetadata(LLVMContext::MD_prof)) {
+  return false;
+}
+  }
+}
+  }
+}
+return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,34 @@ class ProfileAnnotator final {
 PB.addEntryCount(Counters[0]);
 
 for (auto &BB : F) {
+  const auto &BBInfo = getBBInfo(BB);
+  if (BBInfo.getCount() > 0) {
+for (auto &I : BB) {
+  if (auto *SI = dyn_cast(&I)) {
+if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+  auto Index = Step->getIndex()->getZExtValue();
+  assert(
+  Index < Counters.size() &&
+  "The index of the step instruction must be inside the "
+  "counters vector by "
+  "construction - tripping this assertion indicates a bug in "
+  "how the contextual profile is managed by IPO transforms");
+  auto TotalCount = BBInfo.getCount();
+  auto TrueCount = Counters[Index];
+  auto FalseCount =
+  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+  setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+  std::max(TrueCount, FalseCount));
+}
+  }
+}
+  }
   if (succ_size(&BB) < 2)
 continue;
   auto *Term = BB.getTerminator();
   SmallVector EdgeCounts(Term->getNumSuccessors(), 0);
   uint64_t MaxCount = 0;
-  const auto &BBInfo = getBBInfo(BB);
+
   for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < 
Size;
++SuccIdx) {
 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +382,15 @@ class ProfileAnnotator final {
 setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
 }
 assert(allCountersAreAssigned() &&
-   "Expected all counters have been ass

[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-18 Thread Nikita Popov via llvm-branch-commits

nikic wrote:

Do I understand correctly that a side effect of this change is to change the 
half float ABI on loongarch from passing via FP regs to passing via GPR regs?

https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >