date:20240626

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/96739

These have been replaced with atomicrmw.

>From e95c252f91dea9dbb89711eb3b851fcfe6555f7c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 11 Jun 2024 11:46:15 +0200
Subject: [PATCH] AMDGPU: Remove ds_fmin/ds_fmax intrinsics

These have been replaced with atomicrmw.
---
 llvm/docs/ReleaseNotes.rst|  5 ++
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  | 14 -
 llvm/lib/IR/AutoUpgrade.cpp   |  8 ++-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |  3 --
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |  2 -
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 20 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +-
 llvm/test/Bitcode/amdgcn-atomic.ll| 52 +++
 9 files changed, 65 insertions(+), 86 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 76356dd76f1d2..7644da2b78bd7 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -132,6 +132,11 @@ Changes to the AMDGPU Backend
 
 * Implemented :ref:`llvm.get.rounding ` and 
:ref:`llvm.set.rounding `
 
+* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and
+  ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the
+  :ref:`atomicrmw ` instruction with `fadd`, `fmin` and
+  `fmax` with addrspace(3) instead.
+
 Changes to the ARM Backend
 --
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 11662ccc1a695..2aa52ef99aaf8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz :
 [IntrNoMem, IntrSpeculatable]
 >;
 
-class AMDGPULDSIntrin :
-  Intrinsic<[llvm_any_ty],
-[LLVMQualPointerType<3>,
-LLVMMatchType<0>,
-llvm_i32_ty, // ordering
-llvm_i32_ty, // scope
-llvm_i1_ty], // isVolatile
-[IntrArgMemOnly, IntrWillReturn, NoCapture>,
- ImmArg>, ImmArg>, ImmArg>, 
IntrNoCallback, IntrNoFree]
->;
-
 // FIXME: The m0 argument should be moved after the normal arguments
 class AMDGPUDSOrderedIntrinsic : Intrinsic<
   [llvm_i32_ty],
@@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
 def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
 def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
 
-def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
-def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
-
 } // TargetPrefix = "amdgcn"
 
 // New-style image intrinsics
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d7825d9b3e3e5..32076a07d30e7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, 
Function *&NewFn,
 break; // No other 'amdgcn.atomic.*'
   }
 
-  if (Name.starts_with("ds.fadd")) {
-// Replaced with atomicrmw fadd, so there's no new declaration.
+  if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
+  Name.starts_with("ds.fmax")) {
+// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
+// declaration.
 NewFn = nullptr;
 return true;
   }
@@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   AtomicRMWInst::BinOp RMWOp =
   StringSwitch(Name)
   .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
+  .StartsWith("ds.fmin", AtomicRMWInst::FMin)
+  .StartsWith("ds.fmax", AtomicRMWInst::FMax)
   .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
   .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4b48091b7143e..83a5933ceaed6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5401,35 +5401,6 @@ bool 
AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
   return true;
 }
 
-static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
-  switch (IID) {
-  case Intrinsic::amdgcn_ds_fmin:
-return AMDGPU::G_ATOMICRMW_FMIN;
-  case Intrinsic::amdgcn_ds_fmax:
-return AMDGPU::G_ATOMICRMW_FMAX;
-  default:
-llvm_unreachable("not a DS FP intrinsic");
-  }
-}
-
-bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
-  MachineInstr &MI,
-  Intrinsic::ID IID) const 
{
-  GISelChangeObserver &Observer = Helper.Observer;
-  Observer.changingInstr(MI);
-
-  MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
-
-  // The remaining operands were used to set fields in the MemOperand on
-  // construction.
-  for (int I = 6; I > 3; --I

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/96739?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#96739** https://app.graphite.dev/github/pr/llvm/llvm-project/96739?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#96738** https://app.graphite.dev/github/pr/llvm/llvm-project/96738?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/96739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

These have been replaced with atomicrmw.

---
Full diff: https://github.com/llvm/llvm-project/pull/96739.diff


9 Files Affected:

- (modified) llvm/docs/ReleaseNotes.rst (+5) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-14) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+6-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (-32) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+1-19) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-14) 
- (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+52) 


``diff
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 76356dd76f1d2..7644da2b78bd7 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -132,6 +132,11 @@ Changes to the AMDGPU Backend
 
 * Implemented :ref:`llvm.get.rounding ` and 
:ref:`llvm.set.rounding `
 
+* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and
+  ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the
+  :ref:`atomicrmw ` instruction with `fadd`, `fmin` and
+  `fmax` with addrspace(3) instead.
+
 Changes to the ARM Backend
 --
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 11662ccc1a695..2aa52ef99aaf8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz :
 [IntrNoMem, IntrSpeculatable]
 >;
 
-class AMDGPULDSIntrin :
-  Intrinsic<[llvm_any_ty],
-[LLVMQualPointerType<3>,
-LLVMMatchType<0>,
-llvm_i32_ty, // ordering
-llvm_i32_ty, // scope
-llvm_i1_ty], // isVolatile
-[IntrArgMemOnly, IntrWillReturn, NoCapture>,
- ImmArg>, ImmArg>, ImmArg>, 
IntrNoCallback, IntrNoFree]
->;
-
 // FIXME: The m0 argument should be moved after the normal arguments
 class AMDGPUDSOrderedIntrinsic : Intrinsic<
   [llvm_i32_ty],
@@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
 def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
 def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
 
-def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
-def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
-
 } // TargetPrefix = "amdgcn"
 
 // New-style image intrinsics
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d7825d9b3e3e5..32076a07d30e7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, 
Function *&NewFn,
 break; // No other 'amdgcn.atomic.*'
   }
 
-  if (Name.starts_with("ds.fadd")) {
-// Replaced with atomicrmw fadd, so there's no new declaration.
+  if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
+  Name.starts_with("ds.fmax")) {
+// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
+// declaration.
 NewFn = nullptr;
 return true;
   }
@@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   AtomicRMWInst::BinOp RMWOp =
   StringSwitch(Name)
   .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
+  .StartsWith("ds.fmin", AtomicRMWInst::FMin)
+  .StartsWith("ds.fmax", AtomicRMWInst::FMax)
   .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
   .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4b48091b7143e..83a5933ceaed6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5401,35 +5401,6 @@ bool 
AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
   return true;
 }
 
-static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
-  switch (IID) {
-  case Intrinsic::amdgcn_ds_fmin:
-return AMDGPU::G_ATOMICRMW_FMIN;
-  case Intrinsic::amdgcn_ds_fmax:
-return AMDGPU::G_ATOMICRMW_FMAX;
-  default:
-llvm_unreachable("not a DS FP intrinsic");
-  }
-}
-
-bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
-  MachineInstr &MI,
-  Intrinsic::ID IID) const 
{
-  GISelChangeObserver &Observer = Helper.Observer;
-  Observer.changingInstr(MI);
-
-  MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
-
-  // The remaining operands were used to set fields in the MemOperand on
-  // construction.
-  for (int I = 6; I > 3; --I)
-MI.removeOperand(I);
-
-  MI.removeOperand(1); // Remove the intrinsic ID.
-  Observer.changedInstr(MI);
-  return true;
-}
-
 // TODO: Fix pointer

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/96739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-26 Thread Jay Foad via llvm-branch-commits



@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
+  bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;

jayfoad wrote:

What does "AtomicMemoryAtomic" mean?

https://github.com/llvm/llvm-project/pull/96443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96443

>From 78edc216186854e3320ec5e16b78a26af19dee66 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 23 Jun 2024 16:44:08 +0200
Subject: [PATCH 1/3] AMDGPU: Add subtarget feature for global atomic fadd
 denormal support

Not sure what the behavior for gfx90a is. The SPG says it always flushes.
The instruction documentation says it does not.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td  | 14 --
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  7 +++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 56ec5e9c4cfc2..6b212e1b2af03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureMemoryAtomicFaddF32DenormalSupport
+  : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
+  "HasAtomicMemoryAtomicFaddF32DenormalSupport",
+  "true",
+  "global/flat/buffer atomic fadd for float supports denormal handling"
+>;
+
 def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
   : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
   "HasAgentScopeFineGrainedRemoteMemoryAtomics",
@@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
-   FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+   FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
+   FeatureMemoryAtomicFaddF32DenormalSupport
]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1631,7 +1639,9 @@ def FeatureISAVersion12 : FeatureSet<
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
-   Feature1_5xVGPRs]>;
+   Feature1_5xVGPRs,
+   FeatureMemoryAtomicFaddF32DenormalSupport]>;
+   ]>;
 
 def FeatureISAVersion12_Generic: FeatureSet<
   !listconcat(FeatureISAVersion12.Features,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h 
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 9e2a316a9ed28..db0b2b67a0388 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicFlatPkAdd16Insts = false;
   bool HasAtomicFaddRtnInsts = false;
   bool HasAtomicFaddNoRtnInsts = false;
+  bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;
   bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
   bool HasAtomicBufferGlobalPkAddF16Insts = false;
   bool HasAtomicCSubNoRtnInsts = false;
@@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
+  /// \return true if the target's flat, global, and buffer atomic fadd for
+  /// float supports denormal handling.
+  bool hasMemoryAtomicFaddF32DenormalSupport() const {
+return HasAtomicMemoryAtomicFaddF32DenormalSupport;
+  }
+
   /// \return true if atomic operations targeting fine-grained memory work
   /// correctly at device scope, in allocations in host or peer PCIe device
   /// memory.

>From 47017c26844bc49a9842b2c40056392184119943 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Jun 2024 12:10:37 +0200
Subject: [PATCH 2/3] Add to gfx11.

RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm"
thought I'm not sure I trust it.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6b212e1b2af03..39a1d629a4aea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1547,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeaturePackedTID,
-   FeatureVcmpxPermlaneHazard]>;
+   FeatureVcmpxPermlaneHazard,
+   FeatureMemoryAtomicFaddF32DenormalSupport]>;
 
 // There are few workarounds that need to be
 // added to all targets. This pessimizes codegen
@@ -1640,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
-   FeatureMemoryAtomicFaddF32DenormalSupport]>;
+   FeatureMemoryAtomicFaddF32DenormalSupport
]>;
 
 def FeatureISAVersion12_Generic: FeatureSet<

>From 23ec97c971fb5a93a39908da6e652899830dcb4e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 11:30:51 +0200
Subject: [PATCH 3/3] Rename

---
 llvm/lib/Target/AMDGPU/AMDGPU.td  | 10 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 39a1d629a4aea..34c6f6ff19bff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -78

[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96444

>From db519863301bd95fe0d50b56d74584b0f7f2fbf6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64

---
 llvm/lib/Target/AMDGPU/AMDGPU.td   | 21 ++---
 llvm/lib/Target/AMDGPU/BUFInstructions.td  | 10 ++
 llvm/lib/Target/AMDGPU/FLATInstructions.td |  6 +++---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h  | 10 +++---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp  |  2 +-
 5 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 34c6f6ff19bff..84ea040477763 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+  : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+  "HasFlatBufferGlobalAtomicFaddF64Inst",
+  "true",
+  "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
 def FeatureMemoryAtomicFAddF32DenormalSupport
   : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
   "HasMemoryAtomicFaddF32DenormalSupport",
@@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
  FeatureBackOffBarrier,
  FeatureKernargPreload,
  FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
  ])>;
 
 def FeatureISAVersion9_0_C : FeatureSet<
@@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
-   FeatureMemoryAtomicFAddF32DenormalSupport
+   FeatureMemoryAtomicFAddF32DenormalSupport,
+   FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1932,11 +1941,9 @@ def isGFX12Plus :
 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
   AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
 
-
-def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
-  Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
-  // FIXME: This is too coarse, and working around using pseudo's predicates 
on real instruction.
-  AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, 
FeatureSouthernIslands, FeatureSeaIslands)>;
+def HasFlatBufferGlobalAtomicFaddF64Inst :
+  Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
+  AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;
 
 def HasAtomicFMinFMaxF32GlobalInsts :
   Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 3b8d94b744000..a904c8483dbf5 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
   }
 } // End SubtargetPredicate = isGFX90APlus
 
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
   defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", 
VReg_64, f64>;
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
 
+let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
   // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
   // depending on some subtargets.
   defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", 
VReg_64, f64>;
   defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", 
VReg_64, f64>;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+}
 
 def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
   let SubtargetPredicate = isGFX940Plus;
@@ -1836,9 +1838,9 @@ let SubtargetPredicate = 
HasAtomicBufferGlobalPkAddF16Insts in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, 
"BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
 } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
 
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, 
"BUFFER_ATOMIC_ADD_F64">;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
 
 let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, 
"BUFFER_ATOMIC_MIN_F64">;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td 
b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 98054dde398b3..89946a4719557 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td

[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)

2024-06-26 Thread Pengcheng Wang via llvm-branch-commits


https://github.com/wangpc-pp updated 
https://github.com/llvm/llvm-project/pull/80124

>From e3fb1fe7bdd4b7c24f9361c4d14dd1206fc8c067 Mon Sep 17 00:00:00 2001
From: wangpc 
Date: Sun, 18 Feb 2024 11:12:16 +0800
Subject: [PATCH 1/2] Move after addIRPasses

Created using spr 1.3.4
---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fdf1c023fff87..7a26e1956424c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -450,15 +450,15 @@ void RISCVPassConfig::addIRPasses() {
 if (EnableLoopDataPrefetch)
   addPass(createLoopDataPrefetchPass());
 
-if (EnableSelectOpt && getOptLevel() == CodeGenOptLevel::Aggressive)
-  addPass(createSelectOptimizePass());
-
 addPass(createRISCVGatherScatterLoweringPass());
 addPass(createInterleavedAccessPass());
 addPass(createRISCVCodeGenPreparePass());
   }
 
   TargetPassConfig::addIRPasses();
+
+  if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
+addPass(createSelectOptimizePass());
 }
 
 bool RISCVPassConfig::addPreISel() {

>From 5d5398596dc30c47c67572ec20137fb3f9434940 Mon Sep 17 00:00:00 2001
From: wangpc 
Date: Wed, 21 Feb 2024 21:21:28 +0800
Subject: [PATCH 2/2] Fix test

Created using spr 1.3.4
---
 llvm/test/CodeGen/RISCV/O3-pipeline.ll | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll 
b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 62c1af52e6c20..8b52e3fe7b2f1 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -34,15 +34,6 @@
 ; CHECK-NEXT:   Optimization Remark Emitter
 ; CHECK-NEXT:   Scalar Evolution Analysis
 ; CHECK-NEXT:   Loop Data Prefetch
-; CHECK-NEXT:   Post-Dominator Tree Construction
-; CHECK-NEXT:   Branch Probability Analysis
-; CHECK-NEXT:   Block Frequency Analysis
-; CHECK-NEXT:   Lazy Branch Probability Analysis
-; CHECK-NEXT:   Lazy Block Frequency Analysis
-; CHECK-NEXT:   Optimization Remark Emitter
-; CHECK-NEXT:   Optimize selects
-; CHECK-NEXT:   Dominator Tree Construction
-; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   RISC-V gather/scatter lowering
 ; CHECK-NEXT:   Interleaved Access Pass
 ; CHECK-NEXT:   RISC-V CodeGenPrepare
@@ -77,6 +68,15 @@
 ; CHECK-NEXT:   Expand reduction intrinsics
 ; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   TLS Variable Hoist
+; CHECK-NEXT:   Post-Dominator Tree Construction
+; CHECK-NEXT:   Branch Probability Analysis
+; CHECK-NEXT:   Block Frequency Analysis
+; CHECK-NEXT:   Lazy Branch Probability Analysis
+; CHECK-NEXT:   Lazy Block Frequency Analysis
+; CHECK-NEXT:   Optimization Remark Emitter
+; CHECK-NEXT:   Optimize selects
+; CHECK-NEXT:   Dominator Tree Construction
+; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   CodeGen Prepare
 ; CHECK-NEXT:   Dominator Tree Construction
 ; CHECK-NEXT:   Exception handling preparation

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)

2024-06-26 Thread Pengcheng Wang via llvm-branch-commits


https://github.com/wangpc-pp updated 
https://github.com/llvm/llvm-project/pull/80124

>From e3fb1fe7bdd4b7c24f9361c4d14dd1206fc8c067 Mon Sep 17 00:00:00 2001
From: wangpc 
Date: Sun, 18 Feb 2024 11:12:16 +0800
Subject: [PATCH 1/2] Move after addIRPasses

Created using spr 1.3.4
---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index fdf1c023fff87..7a26e1956424c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -450,15 +450,15 @@ void RISCVPassConfig::addIRPasses() {
 if (EnableLoopDataPrefetch)
   addPass(createLoopDataPrefetchPass());
 
-if (EnableSelectOpt && getOptLevel() == CodeGenOptLevel::Aggressive)
-  addPass(createSelectOptimizePass());
-
 addPass(createRISCVGatherScatterLoweringPass());
 addPass(createInterleavedAccessPass());
 addPass(createRISCVCodeGenPreparePass());
   }
 
   TargetPassConfig::addIRPasses();
+
+  if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
+addPass(createSelectOptimizePass());
 }
 
 bool RISCVPassConfig::addPreISel() {

>From 5d5398596dc30c47c67572ec20137fb3f9434940 Mon Sep 17 00:00:00 2001
From: wangpc 
Date: Wed, 21 Feb 2024 21:21:28 +0800
Subject: [PATCH 2/2] Fix test

Created using spr 1.3.4
---
 llvm/test/CodeGen/RISCV/O3-pipeline.ll | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll 
b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 62c1af52e6c20..8b52e3fe7b2f1 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -34,15 +34,6 @@
 ; CHECK-NEXT:   Optimization Remark Emitter
 ; CHECK-NEXT:   Scalar Evolution Analysis
 ; CHECK-NEXT:   Loop Data Prefetch
-; CHECK-NEXT:   Post-Dominator Tree Construction
-; CHECK-NEXT:   Branch Probability Analysis
-; CHECK-NEXT:   Block Frequency Analysis
-; CHECK-NEXT:   Lazy Branch Probability Analysis
-; CHECK-NEXT:   Lazy Block Frequency Analysis
-; CHECK-NEXT:   Optimization Remark Emitter
-; CHECK-NEXT:   Optimize selects
-; CHECK-NEXT:   Dominator Tree Construction
-; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   RISC-V gather/scatter lowering
 ; CHECK-NEXT:   Interleaved Access Pass
 ; CHECK-NEXT:   RISC-V CodeGenPrepare
@@ -77,6 +68,15 @@
 ; CHECK-NEXT:   Expand reduction intrinsics
 ; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   TLS Variable Hoist
+; CHECK-NEXT:   Post-Dominator Tree Construction
+; CHECK-NEXT:   Branch Probability Analysis
+; CHECK-NEXT:   Block Frequency Analysis
+; CHECK-NEXT:   Lazy Branch Probability Analysis
+; CHECK-NEXT:   Lazy Block Frequency Analysis
+; CHECK-NEXT:   Optimization Remark Emitter
+; CHECK-NEXT:   Optimize selects
+; CHECK-NEXT:   Dominator Tree Construction
+; CHECK-NEXT:   Natural Loop Information
 ; CHECK-NEXT:   CodeGen Prepare
 ; CHECK-NEXT:   Dominator Tree Construction
 ; CHECK-NEXT:   Exception handling preparation

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)

2024-06-26 Thread Michael Klemm via llvm-branch-commits


mjklemm wrote:

@skatrak Is this ready for final review?

https://github.com/llvm/llvm-project/pull/92524
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)

2024-06-26 Thread Haojian Wu via llvm-branch-commits


https://github.com/hokein edited https://github.com/llvm/llvm-project/pull/96475
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)

2024-06-26 Thread Haojian Wu via llvm-branch-commits


https://github.com/hokein edited https://github.com/llvm/llvm-project/pull/96475
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/96760
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/96759
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)

2024-06-26 Thread Haojian Wu via llvm-branch-commits


hokein wrote:

I have separated the refactoring change in #96758. This PR now only focuses on 
the assignment support.

https://github.com/llvm/llvm-project/pull/96475
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)

2024-06-26 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

Consider the new atomic metadata when choosing to expand as cmpxchg
instead.

---

Patch is 1.01 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96759.diff


13 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+53-30) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll 
(+203-130) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll 
(+203-130) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+148-298) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+148-298) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll (+191-388) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll (+191-388) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+634-1766) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+634-1766) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll (+1786-266) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll 
(+1294-202) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll (+888-128) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+642-96) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fc34277c580a8..11ebfe7511f7b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16093,6 +16093,34 @@ static bool isBFloat2(Type *Ty) {
   return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
 }
 
+/// \returns true if it's valid to emit a native instruction for \p RMW, based
+/// on the properties of the target memory.
+static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
+const AtomicRMWInst *RMW,
+bool HasSystemScope) {
+  // The remote/fine-grained access logic is different from the integer
+  // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
+  // fine-grained access does not work, even for a device local allocation.
+  //
+  // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
+  // allocations work.
+  if (HasSystemScope) {
+if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
+RMW->hasMetadata("amdgpu.no.remote.memory"))
+  return true;
+  } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
+return true;
+
+  if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
+return true;
+
+  // TODO: Auto-upgrade this attribute to the metadata in function body and 
stop
+  // checking it.
+  return RMW->getFunction()
+  ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+  .getValueAsBool();
+}
+
 TargetLowering::AtomicExpansionKind
 SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
   unsigned AS = RMW->getPointerAddressSpace();
@@ -16236,37 +16264,32 @@ 
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
 Type *Ty = RMW->getType();
 
 // LDS float and double fmin/fmax were always supported.
-if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
-  return AtomicExpansionKind::None;
-
-if (unsafeFPAtomicsDisabled(RMW->getFunction()))
-  return AtomicExpansionKind::CmpXChg;
-
-// Always expand system scope fp atomics.
-if (HasSystemScope)
-  return AtomicExpansionKind::CmpXChg;
+if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+  return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
+ : 
AtomicExpansionKind::CmpXChg;
+}
 
-// For flat and global cases:
-// float, double in gfx7. Manual claims denormal support.
-// Removed in gfx8.
-// float, double restored in gfx10.
-// double removed again in gfx11, so only f32 for gfx11/gfx12.
-//
-// For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but 
no
-// f32.
-//
-// FIXME: Check scope and fine grained memory
-if (AS == AMDGPUAS::FLAT_ADDRESS) {
-  if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
-return ReportUnsafeHWInst(AtomicExpansionKind::None);
-  if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
-return ReportUnsafeHWInst(AtomicExpansionKind::None);
-} else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
-   AS == AMDGPUAS::BUFFER_FAT_POINTER) {
-  if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
-return ReportUnsafeHWInst(AtomicExpansionKind::None);
-  if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
-return ReportUnsafeHWInst(Ato

[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/96759
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/96760
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-06-26 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This is the most complex atomicrmw support case. Note we don't have
accurate remarks for all of the cases, which I'm planning on fixing
in a later change with more precise wording.

Continue respecting amdgpu-unsafe-fp-atomics until it's eventual removal.
Also seems to fix a few cases not interpreting amdgpu-unsafe-fp-atomics
appropriately aaggressively.

---

Patch is 1.02 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96760.diff


37 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+69-81) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+4-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+4-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll 
(+61-178) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll (+420-101) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll 
(+262-17) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll 
(+110-39) 
- (modified) llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll (+51-69) 
- (modified) llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll (+11-9) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll 
(+236-87) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll (+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+736-958) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+13-50) 
- (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+59-156) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll (+73-28) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd-wrong-subtarget.ll 
(+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll (+746-232) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll 
(+4-2) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+78-182) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll 
(+53-51) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll (+52-676) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll 
(+182-1186) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll (+4-52) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+20-175) 
- (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll 
(+30-30) 
- (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll 
(+10-22) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
 (+45-45) 
- (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll 
(+1471-3143) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll
 (+3-3) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll 
(+33-223) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll (+52-4) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll 
(+59-201) 
- (modified) 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll (+13-1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 11ebfe7511f7b..f9b5aea10 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16037,26 +16037,15 @@ bool 
SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
 SNaN, Depth);
 }
 
-#if 0
-// FIXME: This should be checked before unsafe fp atomics are enabled
-// Global FP atomic instructions have a hardcoded FP mode and do not support
-// FP32 denormals, and only support v2f16 denormals.
-static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
-  const fltSemantics &Flt = RMW->getType()->getScalarType()->getFltSemantics();
-  auto DenormMode = RMW->getParent()->getParent()->getDenormalMode(Flt);
-  if (&Flt == &APFloat::IEEEsingle())
-return DenormMode == DenormalMode::getPreserveSign();
-  return DenormMode == DenormalMode::getIEEE();
-}
-#endif
+// On older subtargets, global FP atomic instructions have a hardcoded FP mode
+// and do not support FP32 denormals, and only support v2f16/f64 denormals.
+static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW) {
+  if (RMW

[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)

2024-06-26 Thread via llvm-branch-commits


github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 2b5d1fb889fca7287858db0791bfecc1465f23e1 
43ffbc27fe7d128586b54dbd33fd676532233032 --extensions 'c,cpp,h' -- 
clang/lib/Sema/CheckExprLifetime.cpp clang/lib/Sema/CheckExprLifetime.h 
clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaInit.cpp 
clang/test/Parser/compound_literal.c clang/test/SemaCXX/attr-lifetimebound.cpp 
clang/test/SemaCXX/warn-dangling-local.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp 
b/clang/lib/Sema/CheckExprLifetime.cpp
index 73b3fd2d3a..bbca1b209f 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -42,21 +42,20 @@ enum LifetimeKind {
 };
 using LifetimeResult =
 llvm::PointerIntPair;
-}
+} // namespace
 
 /// Determine the declaration which an initialized entity ultimately refers to,
 /// for the purpose of lifetime-extending a temporary bound to a reference in
 /// the initialization of \p Entity.
-static LifetimeResult getEntityLifetime(
-const InitializedEntity *Entity,
-const InitializedEntity *InitField = nullptr) {
+static LifetimeResult
+getEntityLifetime(const InitializedEntity *Entity,
+  const InitializedEntity *InitField = nullptr) {
   // C++11 [class.temporary]p5:
   switch (Entity->getKind()) {
   case InitializedEntity::EK_Variable:
 //   The temporary [...] persists for the lifetime of the reference
 return {Entity, LK_Extended};
 
-
   case InitializedEntity::EK_Member:
 // For subobjects, we look at the complete object.
 if (Entity->getParent())
@@ -90,7 +89,8 @@ static LifetimeResult getEntityLifetime(
 return {nullptr, LK_FullExpression};
 
   case InitializedEntity::EK_TemplateParameter:
-// FIXME: This will always be ill-formed; should we eagerly diagnose it 
here?
+// FIXME: This will always be ill-formed; should we eagerly diagnose it
+// here?
 return {nullptr, LK_FullExpression};
 
   case InitializedEntity::EK_Result:
@@ -171,7 +171,7 @@ enum ReferenceKind {
 ///  * A DeclRefExpr whose declaration is a local.
 ///  * An AddrLabelExpr.
 ///  * A BlockExpr for a block with captures.
-using Local = Expr*;
+using Local = Expr *;
 
 /// Expressions we stepped over when looking for the local state. Any steps
 /// that would inhibit lifetime extension or take us out of subexpressions of
@@ -359,9 +359,9 @@ static void handleGslAnnotatedTypes(IndirectLocalPath 
&Path, Expr *Call,
   } else if (auto *OCE = dyn_cast(Call)) {
 FunctionDecl *Callee = OCE->getDirectCallee();
 if (Callee && Callee->isCXXInstanceMember() &&
-shouldTrackImplicitObjectArg(cast(Callee))) 
+shouldTrackImplicitObjectArg(cast(Callee)))
   VisitPointerArg(Callee, OCE->getArg(0),
-  !Callee->getReturnType()->isReferenceType()); 
+  !Callee->getReturnType()->isReferenceType());
 return;
   } else if (auto *CE = dyn_cast(Call)) {
 FunctionDecl *Callee = CE->getDirectCallee();
@@ -419,7 +419,7 @@ static bool implicitObjectParamIsLifetimeBound(const 
FunctionDecl *FD) {
 static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call,
 LocalVisitor Visit) {
   const FunctionDecl *Callee;
-  ArrayRef Args;
+  ArrayRef Args;
 
   if (auto *CE = dyn_cast(Call)) {
 Callee = CE->getDirectCallee();
@@ -610,7 +610,7 @@ static void 
visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
 break;
   }
 
-  // FIXME: Visit the left-hand side of an -> or ->*.
+// FIXME: Visit the left-hand side of an -> or ->*.
 
   default:
 break;
@@ -632,7 +632,8 @@ static void 
visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
 // Step into CXXDefaultInitExprs so we can diagnose cases where a
 // constructor inherits one as an implicit mem-initializer.
 if (auto *DIE = dyn_cast(Init)) {
-  Path.push_back({IndirectLocalPathEntry::DefaultInit, DIE, 
DIE->getField()});
+  Path.push_back(
+  {IndirectLocalPathEntry::DefaultInit, DIE, DIE->getField()});
   Init = DIE->getExpr();
 }
 
@@ -657,21 +658,23 @@ static void 
visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
 return visitLocalsRetainedByReferenceBinding(
 Path, Init, RK_ReferenceBinding,
 [&](IndirectLocalPath &Path, Local L, ReferenceKind RK) -> bool {
-  if (auto *DRE = dyn_cast(L)) {
-auto *VD = dyn_cast(DRE->getDecl());
-if (VD && VD->getType().isConstQualified() && VD->getInit() &&
-!isVarOnPath(Path, VD)) {
-  Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD});
-  visitLocalsRetainedByInitializer(Path, VD->getInit(), Visit, 
true,
-

[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)

2024-06-26 Thread Sergio Afonso via llvm-branch-commits


skatrak wrote:

> @skatrak Is this ready for final review?

Yes, it may need very minor changes when rebasing due to recent additions to 
the main branch, but this should be it for the most part.

https://github.com/llvm/llvm-project/pull/92524
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Clause-based OpenMP operation definition (PR #92523)

2024-06-26 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

> > I guess fixing byref is on me (#92244). Unfortunately I can't work on this 
> > immediately so I won't hold up this PR for it.
> 
> @skatrak does #96215 cover everything you need?

Thank you for the heads-up, that certainly helps. My plan is to update the PR 
stack after yours and one or two other PRs that conflict with this change land 
and hopefully by then all patches in this stack should be reviewed/approved to 
be merged.

https://github.com/llvm/llvm-project/pull/92523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] d1ed32e - Revert "[clang][dataflow] Teach `AnalysisASTVisitor` that `typeid()` can be e…"

2024-06-26 Thread via llvm-branch-commits


Author: martinboehme
Date: 2024-06-26T15:40:06+02:00
New Revision: d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b

URL: 
https://github.com/llvm/llvm-project/commit/d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b
DIFF: 
https://github.com/llvm/llvm-project/commit/d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b.diff

LOG: Revert "[clang][dataflow] Teach `AnalysisASTVisitor` that `typeid()` can 
be e…"

This reverts commit dfe80a73223edff5c53f8be7925d302883cb40bc.

Added: 


Modified: 
clang/include/clang/Analysis/FlowSensitive/ASTOps.h
clang/unittests/Analysis/FlowSensitive/TransferTest.cpp

Removed: 




diff  --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h 
b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
index f9c923a36ad22..925b99af9141a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -113,11 +113,7 @@ class AnalysisASTVisitor : public 
RecursiveASTVisitor {
   // nevertheless it appears in the Clang CFG, so we don't exclude it here.
   bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; }
   bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; }
-  bool TraverseCXXTypeidExpr(CXXTypeidExpr *TIE) {
-if (TIE->isPotentiallyEvaluated())
-  return RecursiveASTVisitor::TraverseCXXTypeidExpr(TIE);
-return true;
-  }
+  bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; }
   bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) {
 return true;
   }

diff  --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp 
b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 39e7001393e5e..e743eefa5d458 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1637,49 +1637,6 @@ TEST(TransferTest, StructModeledFieldsWithAccessor) {
   });
 }
 
-TEST(TransferTest, StructModeledFieldsInTypeid) {
-  // Test that we model fields mentioned inside a `typeid()` expression only if
-  // that expression is potentially evaluated -- i.e. if the expression inside
-  // `typeid()` is a glvalue of polymorphic type (see
-  // `CXXTypeidExpr::isPotentiallyEvaluated()` and [expr.typeid]p3).
-  std::string Code = R"(
-// Definitions needed for `typeid`.
-namespace std {
-  class type_info {};
-  class bad_typeid {};
-}  // namespace std
-
-struct NonPolymorphic {};
-
-struct Polymorphic {
-  virtual ~Polymorphic() = default;
-};
-
-struct S {
-  NonPolymorphic *NonPoly;
-  Polymorphic *Poly;
-};
-
-void target(S &s) {
-  typeid(*s.NonPoly);
-  typeid(*s.Poly);
-  // [[p]]
-}
-  )";
-  runDataflow(
-  Code,
-  [](const llvm::StringMap> &Results,
- ASTContext &ASTCtx) {
-const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
-auto &SLoc = getLocForDecl(ASTCtx, Env, "s");
-std::vector Fields;
-for (auto [Field, _] : SLoc.children())
-  Fields.push_back(Field);
-EXPECT_THAT(Fields,
-UnorderedElementsAre(findValueDecl(ASTCtx, "Poly")));
-  });
-}
-
 TEST(TransferTest, StructModeledFieldsWithComplicatedInheritance) {
   std::string Code = R"(
 struct Base1 {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)

2024-06-26 Thread Tom Eccles via llvm-branch-commits


https://github.com/tblah approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/92524
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -0,0 +1,88 @@
+// RUN: mlir-opt %s -transform-interpreter -canonicalize --split-input-file | 
FileCheck %s
+
+func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, 
%arg2: tensor<1xf32>) -> tensor<2x8x8x2xf32> {
+  %0 = tensor.empty() : tensor<2x8x8x2xf32>
+  %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, 
affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = 
["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) 
outs(%0 : tensor<2x8x8x2xf32>) {
+  ^bb0(%in: f32, %out: f32):
+linalg.yield %in : f32
+  } -> tensor<2x8x8x2xf32>
+  %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides 
= dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x5xf32>, 
tensor<2x3x3x5xf32>) outs(%1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32>
+  return %2 : tensor<2x8x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op 
{transform.readonly}) {
+%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 
: (!transform.any_op) -> !transform.any_op
+%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : 
(!transform.any_op) -> (!transform.any_op)
+transform.yield
+  }
+}
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv2d
+// CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: 
tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>) -> tensor<2x8x8x2xf32> {
+// CHECK:%[[S0:.*]] = tensor.empty() : tensor<2x8x8x2xf32>
+// CHECK-NEXT:   %[[S1:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], 
#[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} 
ins(%[[ARG2]] : tensor<1xf32>) outs(%[[S0]] : tensor<2x8x8x2xf32>) {
+// CHECK-NEXT:   ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32):
+// CHECK-NEXT: linalg.yield %[[IN]] : f32
+// CHECK-NEXT:   } -> tensor<2x8x8x2xf32>
+// CHECK-NEXT:   %[[S2:.*]] = tensor.empty() : tensor<2x2x6x6x5x2xf32>
+// CHECK-NEXT:   %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) 
ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<2x2x6x6x5x2xf32>) -> 
tensor<2x2x6x6x5x2xf32>
+// CHECK-NEXT:   %[[S4:.*]] = tensor.empty() : tensor<2x2x6x6x2x5xf32>
+// CHECK-NEXT:   %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) 
ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<2x2x6x6x2x5xf32>) 
-> tensor<2x2x6x6x2x5xf32>
+// CHECK-NEXT:   %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 
1, 2, 3], [4], [5]] : tensor<2x2x6x6x5x2xf32> into tensor<144x5x2xf32>
+// CHECK-NEXT:   %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 
1, 2, 3], [4], [5]] : tensor<2x2x6x6x2x5xf32> into tensor<144x2x5xf32>
+// CHECK-NEXT:   %[[S6:.*]] = tensor.empty() : tensor<144x2x2xf32>
+// CHECK-NEXT:   %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], 
%[[COLLAPSED]] : tensor<144x2x5xf32>, tensor<144x5x2xf32>) outs(%[[S6]] : 
tensor<144x2x2xf32>) -> tensor<144x2x2xf32>
+// CHECK-NEXT:   %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1, 
2, 3], [4], [5]] output_shape [2, 2, 6, 6, 2, 2] : tensor<144x2x2xf32> into 
tensor<2x2x6x6x2x2xf32>
+// CHECK-NEXT:   %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) 
ins(%[[EXPANDED]] : tensor<2x2x6x6x2x2xf32>) outs(%[[S1]] : 
tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32>
+// CHECK-NEXT:   return %[[S8]] : tensor<2x8x8x2xf32>
+// CHECK-NEXT: }
+
+// -
+
+func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: 
tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>) -> tensor<2x9x9x2xf32> {
+  %0 = tensor.empty() : tensor<2x9x9x2xf32>
+  %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, 
affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = 
["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) 
outs(%0 : tensor<2x9x9x2xf32>) {
+  ^bb0(%in: f32, %out: f32):
+linalg.yield %in : f32
+  } -> tensor<2x9x9x2xf32>
+  %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides 
= dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x11x11x5xf32>, 
tensor<2x3x3x5xf32>) outs(%1 : tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32>
+  return %2 : tensor<2x9x9x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op 
{transform.readonly}) {
+%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 
: (!transform.any_op) -> !transform.any_op
+%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : 
(!transform.any_op) -> (!transform.any_op)
+transform.yield
+  }
+}
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv2d_unaligned

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -0,0 +1,88 @@
+// RUN: mlir-opt %s -transform-interpreter -canonicalize --split-input-file | 
FileCheck %s
+
+func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, 
%arg2: tensor<1xf32>) -> tensor<2x8x8x2xf32> {
+  %0 = tensor.empty() : tensor<2x8x8x2xf32>
+  %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, 
affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = 
["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) 
outs(%0 : tensor<2x8x8x2xf32>) {
+  ^bb0(%in: f32, %out: f32):
+linalg.yield %in : f32
+  } -> tensor<2x8x8x2xf32>
+  %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides 
= dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x5xf32>, 
tensor<2x3x3x5xf32>) outs(%1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32>
+  return %2 : tensor<2x8x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op 
{transform.readonly}) {
+%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 
: (!transform.any_op) -> !transform.any_op
+%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : 
(!transform.any_op) -> (!transform.any_op)
+transform.yield
+  }
+}
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv2d
+// CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: 
tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>) -> tensor<2x8x8x2xf32> {
+// CHECK:%[[S0:.*]] = tensor.empty() : tensor<2x8x8x2xf32>
+// CHECK-NEXT:   %[[S1:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], 
#[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} 
ins(%[[ARG2]] : tensor<1xf32>) outs(%[[S0]] : tensor<2x8x8x2xf32>) {
+// CHECK-NEXT:   ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32):
+// CHECK-NEXT: linalg.yield %[[IN]] : f32
+// CHECK-NEXT:   } -> tensor<2x8x8x2xf32>
+// CHECK-NEXT:   %[[S2:.*]] = tensor.empty() : tensor<2x2x6x6x5x2xf32>
+// CHECK-NEXT:   %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) 
ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<2x2x6x6x5x2xf32>) -> 
tensor<2x2x6x6x5x2xf32>
+// CHECK-NEXT:   %[[S4:.*]] = tensor.empty() : tensor<2x2x6x6x2x5xf32>
+// CHECK-NEXT:   %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) 
ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<2x2x6x6x2x5xf32>) 
-> tensor<2x2x6x6x2x5xf32>
+// CHECK-NEXT:   %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 
1, 2, 3], [4], [5]] : tensor<2x2x6x6x5x2xf32> into tensor<144x5x2xf32>
+// CHECK-NEXT:   %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 
1, 2, 3], [4], [5]] : tensor<2x2x6x6x2x5xf32> into tensor<144x2x5xf32>
+// CHECK-NEXT:   %[[S6:.*]] = tensor.empty() : tensor<144x2x2xf32>
+// CHECK-NEXT:   %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], 
%[[COLLAPSED]] : tensor<144x2x5xf32>, tensor<144x5x2xf32>) outs(%[[S6]] : 
tensor<144x2x2xf32>) -> tensor<144x2x2xf32>
+// CHECK-NEXT:   %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1, 
2, 3], [4], [5]] output_shape [2, 2, 6, 6, 2, 2] : tensor<144x2x2xf32> into 
tensor<2x2x6x6x2x2xf32>
+// CHECK-NEXT:   %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) 
ins(%[[EXPANDED]] : tensor<2x2x6x6x2x2xf32>) outs(%[[S1]] : 
tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32>
+// CHECK-NEXT:   return %[[S8]] : tensor<2x8x8x2xf32>
+// CHECK-NEXT: }

Hsiangkai wrote:

Done.

https://github.com/llvm/llvm-project/pull/96182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -3480,6 +3480,31 @@ DiagnosedSilenceableFailure 
transform::MapCopyToThreadsOp::applyToOne(
   return DiagnosedSilenceableFailure::success();
 }
 
+//===--===//
+// WinogradConv2DOp
+//===--===//
+
+DiagnosedSilenceableFailure transform::WinogradConv2DOp::applyToOne(
+transform::TransformRewriter &rewriter, linalg::LinalgOp target,
+transform::ApplyToEachResultList &results,
+transform::TransformState &state) {
+  rewriter.setInsertionPoint(target);
+  auto maybeTransformed =
+  TypeSwitch>(target)
+  .Case([&](linalg::Conv2DNhwcFhwcOp op) {
+return winogradConv2D(rewriter, op, getM(), getR());
+  })
+  .Default([&](Operation *op) {
+return rewriter.notifyMatchFailure(op, "not supported");

Hsiangkai wrote:

Use `emitError` to output error messages.

https://github.com/llvm/llvm-project/pull/96182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -2587,4 +2587,55 @@ def MapCopyToThreadsOp :
   }];
 }
 
+//===--===//
+// Winograd Conv2D
+//===--===//
+
+def WinogradConv2DOp : Op {
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation

Hsiangkai wrote:

Fixed.

https://github.com/llvm/llvm-project/pull/96182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -2587,4 +2587,55 @@ def MapCopyToThreadsOp :
   }];
 }
 
+//===--===//
+// Winograd Conv2D
+//===--===//
+
+def WinogradConv2DOp : Op {
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched

Hsiangkai wrote:

Fixed.

https://github.com/llvm/llvm-project/pull/96182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits


https://github.com/Hsiangkai edited 
https://github.com/llvm/llvm-project/pull/96182
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] ce1e4ad - Revert "[GlobalISel] Add support for lowering byref attribute"

2024-06-26 Thread via llvm-branch-commits


Author: Thorsten Schütt
Date: 2024-06-26T17:34:33+02:00
New Revision: ce1e4ade530a75921dada55f1211c85343c98d42

URL: 
https://github.com/llvm/llvm-project/commit/ce1e4ade530a75921dada55f1211c85343c98d42
DIFF: 
https://github.com/llvm/llvm-project/commit/ce1e4ade530a75921dada55f1211c85343c98d42.diff

LOG: Revert "[GlobalISel] Add support for lowering byref attribute"

This reverts commit 3e1ebd77e4e9a772e4f06f12d19c64860fb1f070.

Added: 


Modified: 
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp 
b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 5efb3be0e53ae..2ee035790eff1 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -49,8 +49,6 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
 Flags.setNest();
   if (AttrFn(Attribute::ByVal))
 Flags.setByVal();
-  if (AttrFn(Attribute::ByRef))
-Flags.setByRef();
   if (AttrFn(Attribute::Preallocated))
 Flags.setPreallocated();
   if (AttrFn(Attribute::InAlloca))
@@ -223,26 +221,17 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo 
&Arg, unsigned OpIdx,
   }
 
   Align MemAlign = DL.getABITypeAlign(Arg.Ty);
-  if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
-  Flags.isByRef()) {
+  if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
 assert(OpIdx >= AttributeList::FirstArgIndex);
 unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
 
 Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
-if (!ElementTy)
-  ElementTy = FuncInfo.getParamByRefType(ParamIdx);
 if (!ElementTy)
   ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
 if (!ElementTy)
   ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
-
 assert(ElementTy && "Must have byval, inalloca or preallocated type");
-
-uint64_t MemSize = DL.getTypeAllocSize(ElementTy);
-if (Flags.isByRef())
-  Flags.setByRefSize(MemSize);
-else
-  Flags.setByValSize(MemSize);
+Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
 
 // For ByVal, alignment should be passed from FE.  BE will guess if
 // this info is not there but there are cases it cannot get right.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96739

>From 401d82fb69592c8715e6ffa367ffdedd923746ae Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 11 Jun 2024 11:46:15 +0200
Subject: [PATCH] AMDGPU: Remove ds_fmin/ds_fmax intrinsics

These have been replaced with atomicrmw.
---
 llvm/docs/ReleaseNotes.rst|  5 ++
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  | 14 -
 llvm/lib/IR/AutoUpgrade.cpp   |  8 ++-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |  3 --
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |  2 -
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 20 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +-
 llvm/test/Bitcode/amdgcn-atomic.ll| 52 +++
 9 files changed, 65 insertions(+), 86 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 76356dd76f1d2..7644da2b78bd7 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -132,6 +132,11 @@ Changes to the AMDGPU Backend
 
 * Implemented :ref:`llvm.get.rounding ` and 
:ref:`llvm.set.rounding `
 
+* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and
+  ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the
+  :ref:`atomicrmw ` instruction with `fadd`, `fmin` and
+  `fmax` with addrspace(3) instead.
+
 Changes to the ARM Backend
 --
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 11662ccc1a695..2aa52ef99aaf8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz :
 [IntrNoMem, IntrSpeculatable]
 >;
 
-class AMDGPULDSIntrin :
-  Intrinsic<[llvm_any_ty],
-[LLVMQualPointerType<3>,
-LLVMMatchType<0>,
-llvm_i32_ty, // ordering
-llvm_i32_ty, // scope
-llvm_i1_ty], // isVolatile
-[IntrArgMemOnly, IntrWillReturn, NoCapture>,
- ImmArg>, ImmArg>, ImmArg>, 
IntrNoCallback, IntrNoFree]
->;
-
 // FIXME: The m0 argument should be moved after the normal arguments
 class AMDGPUDSOrderedIntrinsic : Intrinsic<
   [llvm_i32_ty],
@@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
 def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
 def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
 
-def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
-def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
-
 } // TargetPrefix = "amdgcn"
 
 // New-style image intrinsics
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d7825d9b3e3e5..32076a07d30e7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, 
Function *&NewFn,
 break; // No other 'amdgcn.atomic.*'
   }
 
-  if (Name.starts_with("ds.fadd")) {
-// Replaced with atomicrmw fadd, so there's no new declaration.
+  if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
+  Name.starts_with("ds.fmax")) {
+// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
+// declaration.
 NewFn = nullptr;
 return true;
   }
@@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   AtomicRMWInst::BinOp RMWOp =
   StringSwitch(Name)
   .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
+  .StartsWith("ds.fmin", AtomicRMWInst::FMin)
+  .StartsWith("ds.fmax", AtomicRMWInst::FMax)
   .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
   .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4b48091b7143e..83a5933ceaed6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5401,35 +5401,6 @@ bool 
AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
   return true;
 }
 
-static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
-  switch (IID) {
-  case Intrinsic::amdgcn_ds_fmin:
-return AMDGPU::G_ATOMICRMW_FMIN;
-  case Intrinsic::amdgcn_ds_fmax:
-return AMDGPU::G_ATOMICRMW_FMAX;
-  default:
-llvm_unreachable("not a DS FP intrinsic");
-  }
-}
-
-bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
-  MachineInstr &MI,
-  Intrinsic::ID IID) const 
{
-  GISelChangeObserver &Observer = Helper.Observer;
-  Observer.changingInstr(MI);
-
-  MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
-
-  // The remaining operands were used to set fields in the MemOperand on
-  // construction.
-  for (int I = 6; I > 3; --I)
-MI.removeOperand(I);
-
-  MI.remove

[llvm-branch-commits] [flang] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #96266)

2024-06-26 Thread via llvm-branch-commits


agozillon wrote:

A small ping on this PR stack for some reviewer attention if at all possible 
please, it would be greatly appreciated! Thank you very much ahead of time :-)

https://github.com/llvm/llvm-project/pull/96266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)

2024-06-26 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/96739
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::read

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::read

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::read

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::read

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung edited 
https://github.com/llvm/llvm-project/pull/95884
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung edited 
https://github.com/llvm/llvm-project/pull/95884
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-26 Thread shaw young via llvm-branch-commits


https://github.com/shawbyoung edited 
https://github.com/llvm/llvm-project/pull/95884
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 00dcd9a - Revert "[ADT] Always use 32-bit size type for SmallVector with 16-bit element…"

2024-06-26 Thread via llvm-branch-commits


Author: Chelsea Cassanova
Date: 2024-06-26T15:21:36-07:00
New Revision: 00dcd9a85ca77ee5e19fa90353b8bab361de983e

URL: 
https://github.com/llvm/llvm-project/commit/00dcd9a85ca77ee5e19fa90353b8bab361de983e
DIFF: 
https://github.com/llvm/llvm-project/commit/00dcd9a85ca77ee5e19fa90353b8bab361de983e.diff

LOG: Revert "[ADT] Always use 32-bit size type for SmallVector with 16-bit 
element…"

This reverts commit 2582d11f1a8a5783828156d3ced354727f422885.

Added: 


Modified: 
llvm/include/llvm/ADT/SmallVector.h
llvm/lib/Support/SmallVector.cpp

Removed: 




diff  --git a/llvm/include/llvm/ADT/SmallVector.h 
b/llvm/include/llvm/ADT/SmallVector.h
index db34b16ecf9e7..09676d792dfeb 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -116,7 +116,8 @@ template  class SmallVectorBase {
 
 template 
 using SmallVectorSizeType =
-std::conditional_t;
+std::conditional_t= 8, uint64_t,
+   uint32_t>;
 
 /// Figure out the offset of the first element.
 template  struct SmallVectorAlignmentAndSize {

diff  --git a/llvm/lib/Support/SmallVector.cpp 
b/llvm/lib/Support/SmallVector.cpp
index e77b747984173..b6ce37842040b 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -37,7 +37,9 @@ struct Struct32B {
 #pragma GCC diagnostic pop
 #endif
 }
-
+static_assert(sizeof(SmallVector) ==
+  sizeof(unsigned) * 2 + sizeof(void *),
+  "wasted space in SmallVector size 0");
 static_assert(alignof(SmallVector) >= alignof(Struct16B),
   "wrong alignment for 16-byte aligned T");
 static_assert(alignof(SmallVector) >= alignof(Struct32B),
@@ -46,19 +48,13 @@ static_assert(sizeof(SmallVector) >= 
alignof(Struct16B),
   "missing padding for 16-byte aligned T");
 static_assert(sizeof(SmallVector) >= alignof(Struct32B),
   "missing padding for 32-byte aligned T");
-
-static_assert(sizeof(SmallVector) ==
-  sizeof(unsigned) * 2 + sizeof(void *),
-  "wasted space in SmallVector size 0");
 static_assert(sizeof(SmallVector) ==
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
+
 static_assert(sizeof(SmallVector) ==
   sizeof(void *) * 2 + sizeof(void *),
   "1 byte elements have word-sized type for size and capacity");
-static_assert(sizeof(SmallVector) ==
-  sizeof(unsigned) * 2 + sizeof(void *),
-  "2 byte elements have 32-bit type for size and capacity");
 
 /// Report that MinSize doesn't fit into this vector's size type. Throws
 /// std::length_error or calls report_fatal_error.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] 379cd11 - Revert "[libc++] Use _If for conditional_t (#96193)"

2024-06-26 Thread via llvm-branch-commits


Author: James Y Knight
Date: 2024-06-26T23:57:04-04:00
New Revision: 379cd1193a8453850f0c2e12c005160d7535b373

URL: 
https://github.com/llvm/llvm-project/commit/379cd1193a8453850f0c2e12c005160d7535b373
DIFF: 
https://github.com/llvm/llvm-project/commit/379cd1193a8453850f0c2e12c005160d7535b373.diff

LOG: Revert "[libc++] Use _If for conditional_t (#96193)"

This reverts commit 2274c66e6faaaf29ad693b1ae3e5a7b0228a1950.

Added: 


Modified: 
libcxx/include/__type_traits/conditional.h

Removed: 




diff  --git a/libcxx/include/__type_traits/conditional.h 
b/libcxx/include/__type_traits/conditional.h
index 7d5849ee824e3..5b5445a837427 100644
--- a/libcxx/include/__type_traits/conditional.h
+++ b/libcxx/include/__type_traits/conditional.h
@@ -44,14 +44,15 @@ struct _LIBCPP_TEMPLATE_VIS conditional {
   using type _LIBCPP_NODEBUG = _Then;
 };
 
-template 
-using __conditional_t _LIBCPP_NODEBUG = _If<_Bp, _IfRes, _ElseRes>;
-
 #if _LIBCPP_STD_VER >= 14
 template 
-using conditional_t _LIBCPP_NODEBUG = __conditional_t<_Bp, _IfRes, _ElseRes>;
+using conditional_t _LIBCPP_NODEBUG = typename conditional<_Bp, _IfRes, 
_ElseRes>::type;
 #endif
 
+// Helper so we can use "conditional_t" in all language versions.
+template 
+using __conditional_t _LIBCPP_NODEBUG = typename conditional<_Bp, _If, 
_Then>::type;
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_CONDITIONAL_H



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 5b2feee - Revert "[CodeGen] Introduce `MachineDomTreeUpdater` (#95369)"

2024-06-26 Thread via llvm-branch-commits


Author: paperchalice
Date: 2024-06-27T12:30:50+08:00
New Revision: 5b2feeef6364981528c1cf083bf8d952f7104b02

URL: 
https://github.com/llvm/llvm-project/commit/5b2feeef6364981528c1cf083bf8d952f7104b02
DIFF: 
https://github.com/llvm/llvm-project/commit/5b2feeef6364981528c1cf083bf8d952f7104b02.diff

LOG: Revert "[CodeGen] Introduce `MachineDomTreeUpdater` (#95369)"

This reverts commit 6ca387cbcb207abe2a07bbb1b536f099c2e246e7.

Added: 


Modified: 
llvm/include/llvm/Analysis/DomTreeUpdater.h
llvm/include/llvm/CodeGen/MachineBasicBlock.h
llvm/include/llvm/CodeGen/MachinePostDominators.h
llvm/lib/Analysis/DomTreeUpdater.cpp
llvm/lib/CodeGen/CMakeLists.txt
llvm/lib/CodeGen/MachineBasicBlock.cpp
llvm/unittests/CodeGen/CMakeLists.txt

Removed: 
llvm/include/llvm/Analysis/GenericDomTreeUpdater.h
llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h
llvm/lib/CodeGen/MachineDomTreeUpdater.cpp
llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp



diff  --git a/llvm/include/llvm/Analysis/DomTreeUpdater.h 
b/llvm/include/llvm/Analysis/DomTreeUpdater.h
index 2b838a311440e..ddb958455ccd7 100644
--- a/llvm/include/llvm/Analysis/DomTreeUpdater.h
+++ b/llvm/include/llvm/Analysis/DomTreeUpdater.h
@@ -15,8 +15,6 @@
 #define LLVM_ANALYSIS_DOMTREEUPDATER_H
 
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/GenericDomTreeUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Compiler.h"
@@ -25,17 +23,66 @@
 #include 
 
 namespace llvm {
+class PostDominatorTree;
 
-class DomTreeUpdater
-: public GenericDomTreeUpdater {
-  friend GenericDomTreeUpdater;
-
+class DomTreeUpdater {
 public:
-  using Base =
-  GenericDomTreeUpdater;
-  using Base::Base;
+  enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 };
+
+  explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_)
+  : DT(&DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_)
+  : DT(DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_)
+  : PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_)
+  : PDT(PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_,
+ UpdateStrategy Strategy_)
+  : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_,
+ UpdateStrategy Strategy_)
+  : DT(DT_), PDT(PDT_), Strategy(Strategy_) {}
+
+  ~DomTreeUpdater() { flush(); }
+
+  /// Returns true if the current strategy is Lazy.
+  bool isLazy() const { return Strategy == UpdateStrategy::Lazy; };
+
+  /// Returns true if the current strategy is Eager.
+  bool isEager() const { return Strategy == UpdateStrategy::Eager; };
+
+  /// Returns true if it holds a DominatorTree.
+  bool hasDomTree() const { return DT != nullptr; }
+
+  /// Returns true if it holds a PostDominatorTree.
+  bool hasPostDomTree() const { return PDT != nullptr; }
+
+  /// Returns true if there is BasicBlock awaiting deletion.
+  /// The deletion will only happen until a flush event and
+  /// all available trees are up-to-date.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); }
+
+  /// Returns true if DelBB is awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool isBBPendingDeletion(BasicBlock *DelBB) const;
+
+  /// Returns true if either of DT or PDT is valid and the tree has at
+  /// least one update pending. If DT or PDT is nullptr it is treated
+  /// as having no pending updates. This function does not check
+  /// whether there is BasicBlock awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingUpdates() const;
+
+  /// Returns true if there are DominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or DT is nullptr.
+  bool hasPendingDomTreeUpdates() const;
+
+  /// Returns true if there are PostDominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or PDT is nullptr.
+  bool hasPendingPostDomTreeUpdates() const;
 
   ///@{
   /// \name Mutation APIs
@@ -58,6 +105,51 @@ class DomTreeUpdater
   /// Although GenericDomTree provides several update primitives,
   /// it is not encouraged to use these APIs directly.
 
+  /// Submit updates to all available trees.
+  /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+  /// queues the updates.
+  ///
+  /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+  /// in sync with + all updates before that single update.
+  ///
+  /// CAUTION!
+  /// 1. It is required for t

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-26 Thread Hsiangkai Wang via llvm-branch-commits



@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase &rewriter, Location loc, 
Value data) {
   reassociation);
 }
 
+// This function transforms the filter. The data layout of the filter is FHWC.
+// The transformation matrix is 2-dimension. We need to extract H x W from
+// FHWC first. We need to generate 2 levels of loops to iterate on F and C.
+// After the transformation, we get
+//
+// scf.for %f = lo_f to hi_f step 1
+//   scf.for %c = lo_c to hi_c step 1
+// %extracted = extract filter from filter
+// %ret = linalg.matmul G, %extracted
+// %ret = linalg.matmul %ret, GT
+// %inserted = insert %ret into filter
+//
+Value filterTransform(RewriterBase &rewriter, Location loc, Value filter,
+  Value retValue, int64_t m, int64_t r,
+  bool leftTransform = true, bool rightTransform = true) {
+  // Map from (m, r) to G transform matrix.
+  static const llvm::SmallDenseMap
+  GMatrices = {
+  {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)},
+  {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)},
+  {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)},
+  };
+
+  // Map from (m, r) to GT transform matrix.
+  static const llvm::SmallDenseMap
+  GTMatrices = {
+  {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)},
+  {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)},
+  {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)},
+  };
+
+  auto filterType = cast(filter.getType());
+  Type elementType = filterType.getElementType();
+  auto filterShape = filterType.getShape(); // F, H, W, C
+  int64_t filterF = filterShape[0];
+  int64_t filterH = filterShape[1];
+  int64_t filterW = filterShape[2];
+  int64_t filterC = filterShape[3];
+
+  if (filterH != r && filterH != 1)
+return Value();
+  if (filterW != r && filterW != 1)
+return Value();
+
+  // Return shape is 
+  auto zeroIdx = rewriter.create(loc, 0);
+  auto fUpperBound = rewriter.create(loc, filterF);
+  auto cUpperBound = rewriter.create(loc, filterC);
+  auto oneStep = rewriter.create(loc, 1);
+  auto outerForOp =
+  rewriter.create(loc, zeroIdx, fUpperBound, oneStep, 
retValue);
+  Block *outerForBody = outerForOp.getBody();
+  rewriter.setInsertionPointToStart(outerForBody);
+  Value FIter = outerForBody->getArgument(0);
+
+  auto innerForOp = rewriter.create(
+  loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]);
+  Block *innerForBody = innerForOp.getBody();
+  rewriter.setInsertionPointToStart(innerForBody);
+  Value CIter = innerForBody->getArgument(0);
+
+  // Extract (H, W) from (F, H, W, C)
+  auto extractFilter = extract2DData(
+  rewriter, loc, filter, FIter, CIter, /*outLoopIdx=*/0,
+  /*inLoopIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2, /*srcSize=*/4);
+
+  TransformMapKeyTy key = {m, r};
+  int64_t retRows = 1;
+  Value matmulRetValue = extractFilter;
+  if (leftTransform) {
+// Get constant transform matrix G
+auto it = GMatrices.find(key);
+if (it == GMatrices.end())
+  return Value();
+const TransformMatrix &GMatrix = it->second;
+
+retRows = GMatrix.rows;
+auto matmulType = RankedTensorType::get({retRows, filterW}, elementType);
+auto init = rewriter.create(loc, matmulType.getShape(),
+ elementType);
+
+Value G = create2DTransformMatrix(rewriter, loc, GMatrix, elementType);

Hsiangkai wrote:

There is a `ConstantOpInterface` that can convert `arith.constant` to 
`memref.get_global` after bufferization.

https://github.com/llvm/llvm-project/pull/96183
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 5a5ab74 - Revert "[CodeGen][NewPM] Port machine-branch-prob to new pass manager (#96389)"

2024-06-26 Thread via llvm-branch-commits


Author: paperchalice
Date: 2024-06-27T14:55:50+08:00
New Revision: 5a5ab746879bf0d7248e23978e56849e96ab67e8

URL: 
https://github.com/llvm/llvm-project/commit/5a5ab746879bf0d7248e23978e56849e96ab67e8
DIFF: 
https://github.com/llvm/llvm-project/commit/5a5ab746879bf0d7248e23978e56849e96ab67e8.diff

LOG: Revert "[CodeGen][NewPM] Port machine-branch-prob to new pass manager 
(#96389)"

This reverts commit 73e46c2bb4949be986d9eac98d95a206d7f003e2.

Added: 


Modified: 
llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/Passes/MachinePassRegistry.def
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/lib/CodeGen/BranchFolding.cpp
llvm/lib/CodeGen/EarlyIfConversion.cpp
llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
llvm/lib/CodeGen/IfConversion.cpp
llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
llvm/lib/CodeGen/MachineBlockPlacement.cpp
llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
llvm/lib/CodeGen/MachineSink.cpp
llvm/lib/CodeGen/MachineTraceMetrics.cpp
llvm/lib/CodeGen/TailDuplication.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
llvm/test/CodeGen/Generic/MachineBranchProb.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h 
b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 12d33f96edd11..bd544421bc0ff 100644
--- a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -14,13 +14,14 @@
 #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 
 namespace llvm {
 
-class MachineBranchProbabilityInfo {
+class MachineBranchProbabilityInfo : public ImmutablePass {
+  virtual void anchor();
+
   // Default weight value. Used when we don't have information about the edge.
   // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
   // the successors have a weight yet. But it doesn't make sense when providing
@@ -30,8 +31,13 @@ class MachineBranchProbabilityInfo {
   static const uint32_t DEFAULT_WEIGHT = 16;
 
 public:
-  bool invalidate(MachineFunction &, const PreservedAnalyses &PA,
-  MachineFunctionAnalysisManager::Invalidator &);
+  static char ID;
+
+  MachineBranchProbabilityInfo();
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.setPreservesAll();
+  }
 
   // Return edge probability.
   BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
@@ -55,45 +61,6 @@ class MachineBranchProbabilityInfo {
 const MachineBasicBlock *Dst) const;
 };
 
-class MachineBranchProbabilityAnalysis
-: public AnalysisInfoMixin {
-  friend AnalysisInfoMixin;
-
-  static AnalysisKey Key;
-
-public:
-  using Result = MachineBranchProbabilityInfo;
-
-  Result run(MachineFunction &, MachineFunctionAnalysisManager &);
-};
-
-class MachineBranchProbabilityPrinterPass
-: public PassInfoMixin {
-  raw_ostream &OS;
-
-public:
-  MachineBranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(MachineFunction &MF,
-MachineFunctionAnalysisManager &MFAM);
-};
-
-class MachineBranchProbabilityInfoWrapperPass : public ImmutablePass {
-  virtual void anchor();
-
-  MachineBranchProbabilityInfo MBPI;
-
-public:
-  static char ID;
-
-  MachineBranchProbabilityInfoWrapperPass();
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.setPreservesAll();
-  }
-
-  MachineBranchProbabilityInfo &getMBPI() { return MBPI; }
-  const MachineBranchProbabilityInfo &getMBPI() const { return MBPI; }
-};
 }
 
 

diff  --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 421c09ada7a19..4ddb7112a47bb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -182,7 +182,7 @@ void initializeMIRPrintingPassPass(PassRegistry&);
 void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
 void initializeMachineBlockPlacementPass(PassRegistry&);
 void initializeMachineBlockPlacementStatsPass(PassRegistry&);
-void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
+void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
 void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSEPas

46 matches

Mail list logo