[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96739 These have been replaced with atomicrmw. >From e95c252f91dea9dbb89711eb3b851fcfe6555f7c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 11:46:15 +0200 Subject: [PATCH] AMDGPU: Remove ds_fmin/ds_fmax intrinsics These have been replaced with atomicrmw. --- llvm/docs/ReleaseNotes.rst| 5 ++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 - llvm/lib/IR/AutoUpgrade.cpp | 8 ++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 -- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 20 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +- llvm/test/Bitcode/amdgcn-atomic.ll| 52 +++ 9 files changed, 65 insertions(+), 86 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 76356dd76f1d2..7644da2b78bd7 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -132,6 +132,11 @@ Changes to the AMDGPU Backend * Implemented :ref:`llvm.get.rounding ` and :ref:`llvm.set.rounding ` +* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and + ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd`, `fmin` and + `fmax` with addrspace(3) instead. + Changes to the ARM Backend -- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 11662ccc1a695..2aa52ef99aaf8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz : [IntrNoMem, IntrSpeculatable] >; -class AMDGPULDSIntrin : - Intrinsic<[llvm_any_ty], -[LLVMQualPointerType<3>, -LLVMMatchType<0>, -llvm_i32_ty, // ordering -llvm_i32_ty, // scope -llvm_i1_ty], // isVolatile -[IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree] ->; - // FIXME: The m0 argument should be moved after the normal arguments class AMDGPUDSOrderedIntrinsic : Intrinsic< [llvm_i32_ty], @@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_ds_fmin : AMDGPULDSIntrin; -def int_amdgcn_ds_fmax : AMDGPULDSIntrin; - } // TargetPrefix = "amdgcn" // New-style image intrinsics diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d7825d9b3e3e5..32076a07d30e7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd")) { -// Replaced with atomicrmw fadd, so there's no new declaration. + if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || + Name.starts_with("ds.fmax")) { +// Replaced with atomicrmw fadd/fmin/fmax, so there's no new +// declaration. NewFn = nullptr; return true; } @@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst::BinOp RMWOp = StringSwitch(Name) .StartsWith("ds.fadd", AtomicRMWInst::FAdd) + .StartsWith("ds.fmin", AtomicRMWInst::FMin) + .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4b48091b7143e..83a5933ceaed6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, return true; } -static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::amdgcn_ds_fmin: -return AMDGPU::G_ATOMICRMW_FMIN; - case Intrinsic::amdgcn_ds_fmax: -return AMDGPU::G_ATOMICRMW_FMAX; - default: -llvm_unreachable("not a DS FP intrinsic"); - } -} - -bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI, - Intrinsic::ID IID) const { - GISelChangeObserver &Observer = Helper.Observer; - Observer.changingInstr(MI); - - MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID))); - - // The remaining operands were used to set fields in the MemOperand on - // construction. - for (int I = 6; I > 3; --I
[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/96739?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#96739** https://app.graphite.dev/github/pr/llvm/llvm-project/96739?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#96738** https://app.graphite.dev/github/pr/llvm/llvm-project/96738?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/96739 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes These have been replaced with atomicrmw. --- Full diff: https://github.com/llvm/llvm-project/pull/96739.diff 9 Files Affected: - (modified) llvm/docs/ReleaseNotes.rst (+5) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-14) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+6-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (-32) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+1-19) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-14) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+52) ``diff diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 76356dd76f1d2..7644da2b78bd7 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -132,6 +132,11 @@ Changes to the AMDGPU Backend * Implemented :ref:`llvm.get.rounding ` and :ref:`llvm.set.rounding ` +* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and + ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd`, `fmin` and + `fmax` with addrspace(3) instead. + Changes to the ARM Backend -- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 11662ccc1a695..2aa52ef99aaf8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz : [IntrNoMem, IntrSpeculatable] >; -class AMDGPULDSIntrin : - Intrinsic<[llvm_any_ty], -[LLVMQualPointerType<3>, -LLVMMatchType<0>, -llvm_i32_ty, // ordering -llvm_i32_ty, // scope -llvm_i1_ty], // isVolatile -[IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree] ->; - // FIXME: The m0 argument should be moved after the normal arguments class AMDGPUDSOrderedIntrinsic : Intrinsic< [llvm_i32_ty], @@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_ds_fmin : AMDGPULDSIntrin; -def int_amdgcn_ds_fmax : AMDGPULDSIntrin; - } // TargetPrefix = "amdgcn" // New-style image intrinsics diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d7825d9b3e3e5..32076a07d30e7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd")) { -// Replaced with atomicrmw fadd, so there's no new declaration. + if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || + Name.starts_with("ds.fmax")) { +// Replaced with atomicrmw fadd/fmin/fmax, so there's no new +// declaration. NewFn = nullptr; return true; } @@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst::BinOp RMWOp = StringSwitch(Name) .StartsWith("ds.fadd", AtomicRMWInst::FAdd) + .StartsWith("ds.fmin", AtomicRMWInst::FMin) + .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4b48091b7143e..83a5933ceaed6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, return true; } -static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::amdgcn_ds_fmin: -return AMDGPU::G_ATOMICRMW_FMIN; - case Intrinsic::amdgcn_ds_fmax: -return AMDGPU::G_ATOMICRMW_FMAX; - default: -llvm_unreachable("not a DS FP intrinsic"); - } -} - -bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI, - Intrinsic::ID IID) const { - GISelChangeObserver &Observer = Helper.Observer; - Observer.changingInstr(MI); - - MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID))); - - // The remaining operands were used to set fields in the MemOperand on - // construction. - for (int I = 6; I > 3; --I) -MI.removeOperand(I); - - MI.removeOperand(1); // Remove the intrinsic ID. - Observer.changedInstr(MI); - return true; -} - // TODO: Fix pointer
[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/96739 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; jayfoad wrote: What does "AtomicMemoryAtomic" mean? https://github.com/llvm/llvm-project/pull/96443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96443 >From 78edc216186854e3320ec5e16b78a26af19dee66 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 16:44:08 +0200 Subject: [PATCH 1/3] AMDGPU: Add subtarget feature for global atomic fadd denormal support Not sure what the behavior for gfx90a is. The SPG says it always flushes. The instruction documentation says it does not. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 -- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 56ec5e9c4cfc2..6b212e1b2af03 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureMemoryAtomicFaddF32DenormalSupport + : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", + "HasAtomicMemoryAtomicFaddF32DenormalSupport", + "true", + "global/flat/buffer atomic fadd for float supports denormal handling" +>; + def FeatureAgentScopeFineGrainedRemoteMemoryAtomics : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", "HasAgentScopeFineGrainedRemoteMemoryAtomics", @@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureAgentScopeFineGrainedRemoteMemoryAtomics + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1631,7 +1639,9 @@ def FeatureISAVersion12 : FeatureSet< FeatureScalarDwordx3Loads, FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, - Feature1_5xVGPRs]>; + Feature1_5xVGPRs, + FeatureMemoryAtomicFaddF32DenormalSupport]>; + ]>; def FeatureISAVersion12_Generic: FeatureSet< !listconcat(FeatureISAVersion12.Features, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 9e2a316a9ed28..db0b2b67a0388 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; bool HasAtomicCSubNoRtnInsts = false; @@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target's flat, global, and buffer atomic fadd for + /// float supports denormal handling. + bool hasMemoryAtomicFaddF32DenormalSupport() const { +return HasAtomicMemoryAtomicFaddF32DenormalSupport; + } + /// \return true if atomic operations targeting fine-grained memory work /// correctly at device scope, in allocations in host or peer PCIe device /// memory. >From 47017c26844bc49a9842b2c40056392184119943 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Jun 2024 12:10:37 +0200 Subject: [PATCH 2/3] Add to gfx11. RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm" thought I'm not sure I trust it. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6b212e1b2af03..39a1d629a4aea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1547,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard]>; + FeatureVcmpxPermlaneHazard, + FeatureMemoryAtomicFaddF32DenormalSupport]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen @@ -1640,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, Feature1_5xVGPRs, - FeatureMemoryAtomicFaddF32DenormalSupport]>; + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion12_Generic: FeatureSet< >From 23ec97c971fb5a93a39908da6e652899830dcb4e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 11:30:51 +0200 Subject: [PATCH 3/3] Rename --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 39a1d629a4aea..34c6f6ff19bff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -78
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From db519863301bd95fe0d50b56d74584b0f7f2fbf6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 98054dde398b3..89946a4719557 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/80124 >From e3fb1fe7bdd4b7c24f9361c4d14dd1206fc8c067 Mon Sep 17 00:00:00 2001 From: wangpc Date: Sun, 18 Feb 2024 11:12:16 +0800 Subject: [PATCH 1/2] Move after addIRPasses Created using spr 1.3.4 --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fdf1c023fff87..7a26e1956424c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -450,15 +450,15 @@ void RISCVPassConfig::addIRPasses() { if (EnableLoopDataPrefetch) addPass(createLoopDataPrefetchPass()); -if (EnableSelectOpt && getOptLevel() == CodeGenOptLevel::Aggressive) - addPass(createSelectOptimizePass()); - addPass(createRISCVGatherScatterLoweringPass()); addPass(createInterleavedAccessPass()); addPass(createRISCVCodeGenPreparePass()); } TargetPassConfig::addIRPasses(); + + if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt) +addPass(createSelectOptimizePass()); } bool RISCVPassConfig::addPreISel() { >From 5d5398596dc30c47c67572ec20137fb3f9434940 Mon Sep 17 00:00:00 2001 From: wangpc Date: Wed, 21 Feb 2024 21:21:28 +0800 Subject: [PATCH 2/2] Fix test Created using spr 1.3.4 --- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 62c1af52e6c20..8b52e3fe7b2f1 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -34,15 +34,6 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Data Prefetch -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Optimize selects -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: RISC-V gather/scatter lowering ; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: RISC-V CodeGenPrepare @@ -77,6 +68,15 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Optimize selects +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Support select optimization (PR #80124)
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/80124 >From e3fb1fe7bdd4b7c24f9361c4d14dd1206fc8c067 Mon Sep 17 00:00:00 2001 From: wangpc Date: Sun, 18 Feb 2024 11:12:16 +0800 Subject: [PATCH 1/2] Move after addIRPasses Created using spr 1.3.4 --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fdf1c023fff87..7a26e1956424c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -450,15 +450,15 @@ void RISCVPassConfig::addIRPasses() { if (EnableLoopDataPrefetch) addPass(createLoopDataPrefetchPass()); -if (EnableSelectOpt && getOptLevel() == CodeGenOptLevel::Aggressive) - addPass(createSelectOptimizePass()); - addPass(createRISCVGatherScatterLoweringPass()); addPass(createInterleavedAccessPass()); addPass(createRISCVCodeGenPreparePass()); } TargetPassConfig::addIRPasses(); + + if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt) +addPass(createSelectOptimizePass()); } bool RISCVPassConfig::addPreISel() { >From 5d5398596dc30c47c67572ec20137fb3f9434940 Mon Sep 17 00:00:00 2001 From: wangpc Date: Wed, 21 Feb 2024 21:21:28 +0800 Subject: [PATCH 2/2] Fix test Created using spr 1.3.4 --- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 62c1af52e6c20..8b52e3fe7b2f1 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -34,15 +34,6 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Data Prefetch -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Branch Probability Analysis -; CHECK-NEXT: Block Frequency Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis -; CHECK-NEXT: Optimization Remark Emitter -; CHECK-NEXT: Optimize selects -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: RISC-V gather/scatter lowering ; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: RISC-V CodeGenPrepare @@ -77,6 +68,15 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Optimize selects +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)
mjklemm wrote: @skatrak Is this ready for final review? https://github.com/llvm/llvm-project/pull/92524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)
https://github.com/hokein edited https://github.com/llvm/llvm-project/pull/96475 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)
https://github.com/hokein edited https://github.com/llvm/llvm-project/pull/96475 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/96760 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/96759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)
hokein wrote: I have separated the refactoring change in #96758. This PR now only focuses on the assignment support. https://github.com/llvm/llvm-project/pull/96475 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Consider the new atomic metadata when choosing to expand as cmpxchg instead. --- Patch is 1.01 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96759.diff 13 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+53-30) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll (+203-130) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll (+203-130) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+148-298) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+148-298) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll (+191-388) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll (+191-388) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+634-1766) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+634-1766) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll (+1786-266) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll (+1294-202) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll (+888-128) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+642-96) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fc34277c580a8..11ebfe7511f7b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16093,6 +16093,34 @@ static bool isBFloat2(Type *Ty) { return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy(); } +/// \returns true if it's valid to emit a native instruction for \p RMW, based +/// on the properties of the target memory. +static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget, +const AtomicRMWInst *RMW, +bool HasSystemScope) { + // The remote/fine-grained access logic is different from the integer + // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support, + // fine-grained access does not work, even for a device local allocation. + // + // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local + // allocations work. + if (HasSystemScope) { +if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() && +RMW->hasMetadata("amdgpu.no.remote.memory")) + return true; + } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics()) +return true; + + if (RMW->hasMetadata("amdgpu.no.fine.grained.memory")) +return true; + + // TODO: Auto-upgrade this attribute to the metadata in function body and stop + // checking it. + return RMW->getFunction() + ->getFnAttribute("amdgpu-unsafe-fp-atomics") + .getValueAsBool(); +} + TargetLowering::AtomicExpansionKind SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { unsigned AS = RMW->getPointerAddressSpace(); @@ -16236,37 +16264,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { Type *Ty = RMW->getType(); // LDS float and double fmin/fmax were always supported. -if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy())) - return AtomicExpansionKind::None; - -if (unsafeFPAtomicsDisabled(RMW->getFunction())) - return AtomicExpansionKind::CmpXChg; - -// Always expand system scope fp atomics. -if (HasSystemScope) - return AtomicExpansionKind::CmpXChg; +if (AS == AMDGPUAS::LOCAL_ADDRESS) { + return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None + : AtomicExpansionKind::CmpXChg; +} -// For flat and global cases: -// float, double in gfx7. Manual claims denormal support. -// Removed in gfx8. -// float, double restored in gfx10. -// double removed again in gfx11, so only f32 for gfx11/gfx12. -// -// For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no -// f32. -// -// FIXME: Check scope and fine grained memory -if (AS == AMDGPUAS::FLAT_ADDRESS) { - if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy()) -return ReportUnsafeHWInst(AtomicExpansionKind::None); - if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy()) -return ReportUnsafeHWInst(AtomicExpansionKind::None); -} else if (AMDGPU::isExtendedGlobalAddrSpace(AS) || - AS == AMDGPUAS::BUFFER_FAT_POINTER) { - if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy()) -return ReportUnsafeHWInst(AtomicExpansionKind::None); - if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy()) -return ReportUnsafeHWInst(Ato
[llvm-branch-commits] [llvm] AMDGPU: Handle remote/fine-grained memory in atomicrmw fmin/fmax lowering (PR #96759)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/96759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/96760 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This is the most complex atomicrmw support case. Note we don't have accurate remarks for all of the cases, which I'm planning on fixing in a later change with more precise wording. Continue respecting amdgpu-unsafe-fp-atomics until it's eventual removal. Also seems to fix a few cases not interpreting amdgpu-unsafe-fp-atomics appropriately aaggressively. --- Patch is 1.02 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96760.diff 37 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+69-81) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+4-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+4-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+5-3) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll (+61-178) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll (+420-101) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll (+262-17) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll (+110-39) - (modified) llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll (+51-69) - (modified) llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll (+11-9) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll (+236-87) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll (+5-3) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (+5-3) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+736-958) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+13-50) - (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+59-156) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll (+73-28) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd-wrong-subtarget.ll (+5-3) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll (+746-232) - (modified) llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll (+4-2) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+78-182) - (modified) llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll (+53-51) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll (+52-676) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll (+182-1186) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll (+4-52) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll (+20-175) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i128.ll (+30-30) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll (+10-22) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll (+45-45) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll (+1471-3143) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll (+3-3) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll (+33-223) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll (+52-4) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll (+59-201) - (modified) llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll (+13-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 11ebfe7511f7b..f9b5aea10 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16037,26 +16037,15 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, SNaN, Depth); } -#if 0 -// FIXME: This should be checked before unsafe fp atomics are enabled -// Global FP atomic instructions have a hardcoded FP mode and do not support -// FP32 denormals, and only support v2f16 denormals. -static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) { - const fltSemantics &Flt = RMW->getType()->getScalarType()->getFltSemantics(); - auto DenormMode = RMW->getParent()->getParent()->getDenormalMode(Flt); - if (&Flt == &APFloat::IEEEsingle()) -return DenormMode == DenormalMode::getPreserveSign(); - return DenormMode == DenormalMode::getIEEE(); -} -#endif +// On older subtargets, global FP atomic instructions have a hardcoded FP mode +// and do not support FP32 denormals, and only support v2f16/f64 denormals. +static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW) { + if (RMW
[llvm-branch-commits] [clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 2b5d1fb889fca7287858db0791bfecc1465f23e1 43ffbc27fe7d128586b54dbd33fd676532233032 --extensions 'c,cpp,h' -- clang/lib/Sema/CheckExprLifetime.cpp clang/lib/Sema/CheckExprLifetime.h clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaInit.cpp clang/test/Parser/compound_literal.c clang/test/SemaCXX/attr-lifetimebound.cpp clang/test/SemaCXX/warn-dangling-local.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index 73b3fd2d3a..bbca1b209f 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -42,21 +42,20 @@ enum LifetimeKind { }; using LifetimeResult = llvm::PointerIntPair; -} +} // namespace /// Determine the declaration which an initialized entity ultimately refers to, /// for the purpose of lifetime-extending a temporary bound to a reference in /// the initialization of \p Entity. -static LifetimeResult getEntityLifetime( -const InitializedEntity *Entity, -const InitializedEntity *InitField = nullptr) { +static LifetimeResult +getEntityLifetime(const InitializedEntity *Entity, + const InitializedEntity *InitField = nullptr) { // C++11 [class.temporary]p5: switch (Entity->getKind()) { case InitializedEntity::EK_Variable: // The temporary [...] persists for the lifetime of the reference return {Entity, LK_Extended}; - case InitializedEntity::EK_Member: // For subobjects, we look at the complete object. if (Entity->getParent()) @@ -90,7 +89,8 @@ static LifetimeResult getEntityLifetime( return {nullptr, LK_FullExpression}; case InitializedEntity::EK_TemplateParameter: -// FIXME: This will always be ill-formed; should we eagerly diagnose it here? +// FIXME: This will always be ill-formed; should we eagerly diagnose it +// here? return {nullptr, LK_FullExpression}; case InitializedEntity::EK_Result: @@ -171,7 +171,7 @@ enum ReferenceKind { /// * A DeclRefExpr whose declaration is a local. /// * An AddrLabelExpr. /// * A BlockExpr for a block with captures. -using Local = Expr*; +using Local = Expr *; /// Expressions we stepped over when looking for the local state. Any steps /// that would inhibit lifetime extension or take us out of subexpressions of @@ -359,9 +359,9 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, } else if (auto *OCE = dyn_cast(Call)) { FunctionDecl *Callee = OCE->getDirectCallee(); if (Callee && Callee->isCXXInstanceMember() && -shouldTrackImplicitObjectArg(cast(Callee))) +shouldTrackImplicitObjectArg(cast(Callee))) VisitPointerArg(Callee, OCE->getArg(0), - !Callee->getReturnType()->isReferenceType()); + !Callee->getReturnType()->isReferenceType()); return; } else if (auto *CE = dyn_cast(Call)) { FunctionDecl *Callee = CE->getDirectCallee(); @@ -419,7 +419,7 @@ static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call, LocalVisitor Visit) { const FunctionDecl *Callee; - ArrayRef Args; + ArrayRef Args; if (auto *CE = dyn_cast(Call)) { Callee = CE->getDirectCallee(); @@ -610,7 +610,7 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, break; } - // FIXME: Visit the left-hand side of an -> or ->*. +// FIXME: Visit the left-hand side of an -> or ->*. default: break; @@ -632,7 +632,8 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path, // Step into CXXDefaultInitExprs so we can diagnose cases where a // constructor inherits one as an implicit mem-initializer. if (auto *DIE = dyn_cast(Init)) { - Path.push_back({IndirectLocalPathEntry::DefaultInit, DIE, DIE->getField()}); + Path.push_back( + {IndirectLocalPathEntry::DefaultInit, DIE, DIE->getField()}); Init = DIE->getExpr(); } @@ -657,21 +658,23 @@ static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path, return visitLocalsRetainedByReferenceBinding( Path, Init, RK_ReferenceBinding, [&](IndirectLocalPath &Path, Local L, ReferenceKind RK) -> bool { - if (auto *DRE = dyn_cast(L)) { -auto *VD = dyn_cast(DRE->getDecl()); -if (VD && VD->getType().isConstQualified() && VD->getInit() && -!isVarOnPath(Path, VD)) { - Path.push_back({IndirectLocalPathEntry::VarInit, DRE, VD}); - visitLocalsRetainedByInitializer(Path, VD->getInit(), Visit, true, -
[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)
skatrak wrote: > @skatrak Is this ready for final review? Yes, it may need very minor changes when rebasing due to recent additions to the main branch, but this should be it for the most part. https://github.com/llvm/llvm-project/pull/92524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Clause-based OpenMP operation definition (PR #92523)
skatrak wrote: > > I guess fixing byref is on me (#92244). Unfortunately I can't work on this > > immediately so I won't hold up this PR for it. > > @skatrak does #96215 cover everything you need? Thank you for the heads-up, that certainly helps. My plan is to update the PR stack after yours and one or two other PRs that conflict with this change land and hopefully by then all patches in this stack should be reviewed/approved to be merged. https://github.com/llvm/llvm-project/pull/92523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] d1ed32e - Revert "[clang][dataflow] Teach `AnalysisASTVisitor` that `typeid()` can be e…"
Author: martinboehme Date: 2024-06-26T15:40:06+02:00 New Revision: d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b URL: https://github.com/llvm/llvm-project/commit/d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b DIFF: https://github.com/llvm/llvm-project/commit/d1ed32e5cb1cb43acf2d9085960ff37c3fe6b09b.diff LOG: Revert "[clang][dataflow] Teach `AnalysisASTVisitor` that `typeid()` can be e…" This reverts commit dfe80a73223edff5c53f8be7925d302883cb40bc. Added: Modified: clang/include/clang/Analysis/FlowSensitive/ASTOps.h clang/unittests/Analysis/FlowSensitive/TransferTest.cpp Removed: diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h index f9c923a36ad22..925b99af9141a 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h +++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h @@ -113,11 +113,7 @@ class AnalysisASTVisitor : public RecursiveASTVisitor { // nevertheless it appears in the Clang CFG, so we don't exclude it here. bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; } bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; } - bool TraverseCXXTypeidExpr(CXXTypeidExpr *TIE) { -if (TIE->isPotentiallyEvaluated()) - return RecursiveASTVisitor::TraverseCXXTypeidExpr(TIE); -return true; - } + bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; } bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) { return true; } diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 39e7001393e5e..e743eefa5d458 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1637,49 +1637,6 @@ TEST(TransferTest, StructModeledFieldsWithAccessor) { }); } -TEST(TransferTest, StructModeledFieldsInTypeid) { - // Test that we model fields mentioned inside a `typeid()` expression only if - // that expression is potentially evaluated -- i.e. if the expression inside - // `typeid()` is a glvalue of polymorphic type (see - // `CXXTypeidExpr::isPotentiallyEvaluated()` and [expr.typeid]p3). - std::string Code = R"( -// Definitions needed for `typeid`. -namespace std { - class type_info {}; - class bad_typeid {}; -} // namespace std - -struct NonPolymorphic {}; - -struct Polymorphic { - virtual ~Polymorphic() = default; -}; - -struct S { - NonPolymorphic *NonPoly; - Polymorphic *Poly; -}; - -void target(S &s) { - typeid(*s.NonPoly); - typeid(*s.Poly); - // [[p]] -} - )"; - runDataflow( - Code, - [](const llvm::StringMap> &Results, - ASTContext &ASTCtx) { -const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); -auto &SLoc = getLocForDecl(ASTCtx, Env, "s"); -std::vector Fields; -for (auto [Field, _] : SLoc.children()) - Fields.push_back(Field); -EXPECT_THAT(Fields, -UnorderedElementsAre(findValueDecl(ASTCtx, "Poly"))); - }); -} - TEST(TransferTest, StructModeledFieldsWithComplicatedInheritance) { std::string Code = R"( struct Base1 { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)
https://github.com/tblah approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/92524 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
@@ -0,0 +1,88 @@ +// RUN: mlir-opt %s -transform-interpreter -canonicalize --split-input-file | FileCheck %s + +func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>) -> tensor<2x8x8x2xf32> { + %0 = tensor.empty() : tensor<2x8x8x2xf32> + %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) outs(%0 : tensor<2x8x8x2xf32>) { + ^bb0(%in: f32, %out: f32): +linalg.yield %in : f32 + } -> tensor<2x8x8x2xf32> + %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x5xf32>, tensor<2x3x3x5xf32>) outs(%1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> + return %2 : tensor<2x8x8x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { +%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op +%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) +transform.yield + } +} + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv2d +// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>) -> tensor<2x8x8x2xf32> { +// CHECK:%[[S0:.*]] = tensor.empty() : tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S1:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG2]] : tensor<1xf32>) outs(%[[S0]] : tensor<2x8x8x2xf32>) { +// CHECK-NEXT: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): +// CHECK-NEXT: linalg.yield %[[IN]] : f32 +// CHECK-NEXT: } -> tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<2x2x6x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<2x2x6x6x5x2xf32>) -> tensor<2x2x6x6x5x2xf32> +// CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<2x2x6x6x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<2x2x6x6x2x5xf32>) -> tensor<2x2x6x6x2x5xf32> +// CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1, 2, 3], [4], [5]] : tensor<2x2x6x6x5x2xf32> into tensor<144x5x2xf32> +// CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1, 2, 3], [4], [5]] : tensor<2x2x6x6x2x5xf32> into tensor<144x2x5xf32> +// CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<144x2x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<144x2x5xf32>, tensor<144x5x2xf32>) outs(%[[S6]] : tensor<144x2x2xf32>) -> tensor<144x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1, 2, 3], [4], [5]] output_shape [2, 2, 6, 6, 2, 2] : tensor<144x2x2xf32> into tensor<2x2x6x6x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<2x2x6x6x2x2xf32>) outs(%[[S1]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> +// CHECK-NEXT: return %[[S8]] : tensor<2x8x8x2xf32> +// CHECK-NEXT: } + +// - + +func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>) -> tensor<2x9x9x2xf32> { + %0 = tensor.empty() : tensor<2x9x9x2xf32> + %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) outs(%0 : tensor<2x9x9x2xf32>) { + ^bb0(%in: f32, %out: f32): +linalg.yield %in : f32 + } -> tensor<2x9x9x2xf32> + %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x11x11x5xf32>, tensor<2x3x3x5xf32>) outs(%1 : tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32> + return %2 : tensor<2x9x9x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { +%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op +%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) +transform.yield + } +} + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv2d_unaligned
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
@@ -0,0 +1,88 @@ +// RUN: mlir-opt %s -transform-interpreter -canonicalize --split-input-file | FileCheck %s + +func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>) -> tensor<2x8x8x2xf32> { + %0 = tensor.empty() : tensor<2x8x8x2xf32> + %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2 : tensor<1xf32>) outs(%0 : tensor<2x8x8x2xf32>) { + ^bb0(%in: f32, %out: f32): +linalg.yield %in : f32 + } -> tensor<2x8x8x2xf32> + %2 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x5xf32>, tensor<2x3x3x5xf32>) outs(%1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> + return %2 : tensor<2x8x8x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { +%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op +%1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) +transform.yield + } +} + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv2d +// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>) -> tensor<2x8x8x2xf32> { +// CHECK:%[[S0:.*]] = tensor.empty() : tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S1:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG2]] : tensor<1xf32>) outs(%[[S0]] : tensor<2x8x8x2xf32>) { +// CHECK-NEXT: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): +// CHECK-NEXT: linalg.yield %[[IN]] : f32 +// CHECK-NEXT: } -> tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<2x2x6x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<2x2x6x6x5x2xf32>) -> tensor<2x2x6x6x5x2xf32> +// CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<2x2x6x6x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<2x2x6x6x2x5xf32>) -> tensor<2x2x6x6x2x5xf32> +// CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1, 2, 3], [4], [5]] : tensor<2x2x6x6x5x2xf32> into tensor<144x5x2xf32> +// CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1, 2, 3], [4], [5]] : tensor<2x2x6x6x2x5xf32> into tensor<144x2x5xf32> +// CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<144x2x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<144x2x5xf32>, tensor<144x5x2xf32>) outs(%[[S6]] : tensor<144x2x2xf32>) -> tensor<144x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1, 2, 3], [4], [5]] output_shape [2, 2, 6, 6, 2, 2] : tensor<144x2x2xf32> into tensor<2x2x6x6x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<2x2x6x6x2x2xf32>) outs(%[[S1]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> +// CHECK-NEXT: return %[[S8]] : tensor<2x8x8x2xf32> +// CHECK-NEXT: } Hsiangkai wrote: Done. https://github.com/llvm/llvm-project/pull/96182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
@@ -3480,6 +3480,31 @@ DiagnosedSilenceableFailure transform::MapCopyToThreadsOp::applyToOne( return DiagnosedSilenceableFailure::success(); } +//===--===// +// WinogradConv2DOp +//===--===// + +DiagnosedSilenceableFailure transform::WinogradConv2DOp::applyToOne( +transform::TransformRewriter &rewriter, linalg::LinalgOp target, +transform::ApplyToEachResultList &results, +transform::TransformState &state) { + rewriter.setInsertionPoint(target); + auto maybeTransformed = + TypeSwitch>(target) + .Case([&](linalg::Conv2DNhwcFhwcOp op) { +return winogradConv2D(rewriter, op, getM(), getR()); + }) + .Default([&](Operation *op) { +return rewriter.notifyMatchFailure(op, "not supported"); Hsiangkai wrote: Use `emitError` to output error messages. https://github.com/llvm/llvm-project/pull/96182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
@@ -2587,4 +2587,55 @@ def MapCopyToThreadsOp : }]; } +//===--===// +// Winograd Conv2D +//===--===// + +def WinogradConv2DOp : Op { + let description = [{ +Winograd Conv2D algorithm will convert linalg Conv2D operator into batched +matrix multiply. Before the matrix multiply, it will convert filter and +input into a format suitable for batched matrix multiply. After the matrix +multiply, it will convert output to the final result tensor. + +The algorithm F(m x m, r x r) is + +Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A + +The size of output Y is m x m. The size of filter g is r x r. The size of +input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are +transformation matrices. + + Return modes: + +This operation fails if `target` is unsupported. Otherwise, the operation Hsiangkai wrote: Fixed. https://github.com/llvm/llvm-project/pull/96182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
@@ -2587,4 +2587,55 @@ def MapCopyToThreadsOp : }]; } +//===--===// +// Winograd Conv2D +//===--===// + +def WinogradConv2DOp : Op { + let description = [{ +Winograd Conv2D algorithm will convert linalg Conv2D operator into batched Hsiangkai wrote: Fixed. https://github.com/llvm/llvm-project/pull/96182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)
https://github.com/Hsiangkai edited https://github.com/llvm/llvm-project/pull/96182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ce1e4ad - Revert "[GlobalISel] Add support for lowering byref attribute"
Author: Thorsten Schütt Date: 2024-06-26T17:34:33+02:00 New Revision: ce1e4ade530a75921dada55f1211c85343c98d42 URL: https://github.com/llvm/llvm-project/commit/ce1e4ade530a75921dada55f1211c85343c98d42 DIFF: https://github.com/llvm/llvm-project/commit/ce1e4ade530a75921dada55f1211c85343c98d42.diff LOG: Revert "[GlobalISel] Add support for lowering byref attribute" This reverts commit 3e1ebd77e4e9a772e4f06f12d19c64860fb1f070. Added: Modified: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp Removed: diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 5efb3be0e53ae..2ee035790eff1 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -49,8 +49,6 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, Flags.setNest(); if (AttrFn(Attribute::ByVal)) Flags.setByVal(); - if (AttrFn(Attribute::ByRef)) -Flags.setByRef(); if (AttrFn(Attribute::Preallocated)) Flags.setPreallocated(); if (AttrFn(Attribute::InAlloca)) @@ -223,26 +221,17 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, } Align MemAlign = DL.getABITypeAlign(Arg.Ty); - if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || - Flags.isByRef()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { assert(OpIdx >= AttributeList::FirstArgIndex); unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); -if (!ElementTy) - ElementTy = FuncInfo.getParamByRefType(ParamIdx); if (!ElementTy) ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); if (!ElementTy) ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); - assert(ElementTy && "Must have byval, inalloca or preallocated type"); - -uint64_t MemSize = DL.getTypeAllocSize(ElementTy); -if (Flags.isByRef()) - Flags.setByRefSize(MemSize); -else - Flags.setByValSize(MemSize); +Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96739 >From 401d82fb69592c8715e6ffa367ffdedd923746ae Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 11:46:15 +0200 Subject: [PATCH] AMDGPU: Remove ds_fmin/ds_fmax intrinsics These have been replaced with atomicrmw. --- llvm/docs/ReleaseNotes.rst| 5 ++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 - llvm/lib/IR/AutoUpgrade.cpp | 8 ++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 -- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 20 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +- llvm/test/Bitcode/amdgcn-atomic.ll| 52 +++ 9 files changed, 65 insertions(+), 86 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 76356dd76f1d2..7644da2b78bd7 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -132,6 +132,11 @@ Changes to the AMDGPU Backend * Implemented :ref:`llvm.get.rounding ` and :ref:`llvm.set.rounding ` +* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and + ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd`, `fmin` and + `fmax` with addrspace(3) instead. + Changes to the ARM Backend -- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 11662ccc1a695..2aa52ef99aaf8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz : [IntrNoMem, IntrSpeculatable] >; -class AMDGPULDSIntrin : - Intrinsic<[llvm_any_ty], -[LLVMQualPointerType<3>, -LLVMMatchType<0>, -llvm_i32_ty, // ordering -llvm_i32_ty, // scope -llvm_i1_ty], // isVolatile -[IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree] ->; - // FIXME: The m0 argument should be moved after the normal arguments class AMDGPUDSOrderedIntrinsic : Intrinsic< [llvm_i32_ty], @@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_ds_fmin : AMDGPULDSIntrin; -def int_amdgcn_ds_fmax : AMDGPULDSIntrin; - } // TargetPrefix = "amdgcn" // New-style image intrinsics diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d7825d9b3e3e5..32076a07d30e7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd")) { -// Replaced with atomicrmw fadd, so there's no new declaration. + if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || + Name.starts_with("ds.fmax")) { +// Replaced with atomicrmw fadd/fmin/fmax, so there's no new +// declaration. NewFn = nullptr; return true; } @@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst::BinOp RMWOp = StringSwitch(Name) .StartsWith("ds.fadd", AtomicRMWInst::FAdd) + .StartsWith("ds.fmin", AtomicRMWInst::FMin) + .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4b48091b7143e..83a5933ceaed6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, return true; } -static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::amdgcn_ds_fmin: -return AMDGPU::G_ATOMICRMW_FMIN; - case Intrinsic::amdgcn_ds_fmax: -return AMDGPU::G_ATOMICRMW_FMAX; - default: -llvm_unreachable("not a DS FP intrinsic"); - } -} - -bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI, - Intrinsic::ID IID) const { - GISelChangeObserver &Observer = Helper.Observer; - Observer.changingInstr(MI); - - MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID))); - - // The remaining operands were used to set fields in the MemOperand on - // construction. - for (int I = 6; I > 3; --I) -MI.removeOperand(I); - - MI.remove
[llvm-branch-commits] [flang] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #96266)
agozillon wrote: A small ping on this PR stack for some reviewer attention if at all possible please, it would be greatly appreciated! Thank you very much ahead of time :-) https://github.com/llvm/llvm-project/pull/96266 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove ds_fmin/ds_fmax intrinsics (PR #96739)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/96739 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 00dcd9a - Revert "[ADT] Always use 32-bit size type for SmallVector with 16-bit element…"
Author: Chelsea Cassanova Date: 2024-06-26T15:21:36-07:00 New Revision: 00dcd9a85ca77ee5e19fa90353b8bab361de983e URL: https://github.com/llvm/llvm-project/commit/00dcd9a85ca77ee5e19fa90353b8bab361de983e DIFF: https://github.com/llvm/llvm-project/commit/00dcd9a85ca77ee5e19fa90353b8bab361de983e.diff LOG: Revert "[ADT] Always use 32-bit size type for SmallVector with 16-bit element…" This reverts commit 2582d11f1a8a5783828156d3ced354727f422885. Added: Modified: llvm/include/llvm/ADT/SmallVector.h llvm/lib/Support/SmallVector.cpp Removed: diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index db34b16ecf9e7..09676d792dfeb 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -116,7 +116,8 @@ template class SmallVectorBase { template using SmallVectorSizeType = -std::conditional_t; +std::conditional_t= 8, uint64_t, + uint32_t>; /// Figure out the offset of the first element. template struct SmallVectorAlignmentAndSize { diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index e77b747984173..b6ce37842040b 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -37,7 +37,9 @@ struct Struct32B { #pragma GCC diagnostic pop #endif } - +static_assert(sizeof(SmallVector) == + sizeof(unsigned) * 2 + sizeof(void *), + "wasted space in SmallVector size 0"); static_assert(alignof(SmallVector) >= alignof(Struct16B), "wrong alignment for 16-byte aligned T"); static_assert(alignof(SmallVector) >= alignof(Struct32B), @@ -46,19 +48,13 @@ static_assert(sizeof(SmallVector) >= alignof(Struct16B), "missing padding for 16-byte aligned T"); static_assert(sizeof(SmallVector) >= alignof(Struct32B), "missing padding for 32-byte aligned T"); - -static_assert(sizeof(SmallVector) == - sizeof(unsigned) * 2 + sizeof(void *), - "wasted space in SmallVector size 0"); static_assert(sizeof(SmallVector) == sizeof(unsigned) * 2 + sizeof(void *) * 2, "wasted space in SmallVector size 1"); + static_assert(sizeof(SmallVector) == sizeof(void *) * 2 + sizeof(void *), "1 byte elements have word-sized type for size and capacity"); -static_assert(sizeof(SmallVector) == - sizeof(unsigned) * 2 + sizeof(void *), - "2 byte elements have 32-bit type for size and capacity"); /// Report that MinSize doesn't fit into this vector's size type. Throws /// std::length_error or calls report_fatal_error. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] 379cd11 - Revert "[libc++] Use _If for conditional_t (#96193)"
Author: James Y Knight Date: 2024-06-26T23:57:04-04:00 New Revision: 379cd1193a8453850f0c2e12c005160d7535b373 URL: https://github.com/llvm/llvm-project/commit/379cd1193a8453850f0c2e12c005160d7535b373 DIFF: https://github.com/llvm/llvm-project/commit/379cd1193a8453850f0c2e12c005160d7535b373.diff LOG: Revert "[libc++] Use _If for conditional_t (#96193)" This reverts commit 2274c66e6faaaf29ad693b1ae3e5a7b0228a1950. Added: Modified: libcxx/include/__type_traits/conditional.h Removed: diff --git a/libcxx/include/__type_traits/conditional.h b/libcxx/include/__type_traits/conditional.h index 7d5849ee824e3..5b5445a837427 100644 --- a/libcxx/include/__type_traits/conditional.h +++ b/libcxx/include/__type_traits/conditional.h @@ -44,14 +44,15 @@ struct _LIBCPP_TEMPLATE_VIS conditional { using type _LIBCPP_NODEBUG = _Then; }; -template -using __conditional_t _LIBCPP_NODEBUG = _If<_Bp, _IfRes, _ElseRes>; - #if _LIBCPP_STD_VER >= 14 template -using conditional_t _LIBCPP_NODEBUG = __conditional_t<_Bp, _IfRes, _ElseRes>; +using conditional_t _LIBCPP_NODEBUG = typename conditional<_Bp, _IfRes, _ElseRes>::type; #endif +// Helper so we can use "conditional_t" in all language versions. +template +using __conditional_t _LIBCPP_NODEBUG = typename conditional<_Bp, _If, _Then>::type; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___TYPE_TRAITS_CONDITIONAL_H ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 5b2feee - Revert "[CodeGen] Introduce `MachineDomTreeUpdater` (#95369)"
Author: paperchalice Date: 2024-06-27T12:30:50+08:00 New Revision: 5b2feeef6364981528c1cf083bf8d952f7104b02 URL: https://github.com/llvm/llvm-project/commit/5b2feeef6364981528c1cf083bf8d952f7104b02 DIFF: https://github.com/llvm/llvm-project/commit/5b2feeef6364981528c1cf083bf8d952f7104b02.diff LOG: Revert "[CodeGen] Introduce `MachineDomTreeUpdater` (#95369)" This reverts commit 6ca387cbcb207abe2a07bbb1b536f099c2e246e7. Added: Modified: llvm/include/llvm/Analysis/DomTreeUpdater.h llvm/include/llvm/CodeGen/MachineBasicBlock.h llvm/include/llvm/CodeGen/MachinePostDominators.h llvm/lib/Analysis/DomTreeUpdater.cpp llvm/lib/CodeGen/CMakeLists.txt llvm/lib/CodeGen/MachineBasicBlock.cpp llvm/unittests/CodeGen/CMakeLists.txt Removed: llvm/include/llvm/Analysis/GenericDomTreeUpdater.h llvm/include/llvm/CodeGen/MachineDomTreeUpdater.h llvm/lib/CodeGen/MachineDomTreeUpdater.cpp llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp diff --git a/llvm/include/llvm/Analysis/DomTreeUpdater.h b/llvm/include/llvm/Analysis/DomTreeUpdater.h index 2b838a311440e..ddb958455ccd7 100644 --- a/llvm/include/llvm/Analysis/DomTreeUpdater.h +++ b/llvm/include/llvm/Analysis/DomTreeUpdater.h @@ -15,8 +15,6 @@ #define LLVM_ANALYSIS_DOMTREEUPDATER_H #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/GenericDomTreeUpdater.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Compiler.h" @@ -25,17 +23,66 @@ #include namespace llvm { +class PostDominatorTree; -class DomTreeUpdater -: public GenericDomTreeUpdater { - friend GenericDomTreeUpdater; - +class DomTreeUpdater { public: - using Base = - GenericDomTreeUpdater; - using Base::Base; + enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 }; + + explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_) + : DT(&DT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_) + : DT(DT_), Strategy(Strategy_) {} + DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_) + : PDT(&PDT_), Strategy(Strategy_) {} + DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_) + : PDT(PDT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_, + UpdateStrategy Strategy_) + : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_, + UpdateStrategy Strategy_) + : DT(DT_), PDT(PDT_), Strategy(Strategy_) {} + + ~DomTreeUpdater() { flush(); } + + /// Returns true if the current strategy is Lazy. + bool isLazy() const { return Strategy == UpdateStrategy::Lazy; }; + + /// Returns true if the current strategy is Eager. + bool isEager() const { return Strategy == UpdateStrategy::Eager; }; + + /// Returns true if it holds a DominatorTree. + bool hasDomTree() const { return DT != nullptr; } + + /// Returns true if it holds a PostDominatorTree. + bool hasPostDomTree() const { return PDT != nullptr; } + + /// Returns true if there is BasicBlock awaiting deletion. + /// The deletion will only happen until a flush event and + /// all available trees are up-to-date. + /// Returns false under Eager UpdateStrategy. + bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); } + + /// Returns true if DelBB is awaiting deletion. + /// Returns false under Eager UpdateStrategy. + bool isBBPendingDeletion(BasicBlock *DelBB) const; + + /// Returns true if either of DT or PDT is valid and the tree has at + /// least one update pending. If DT or PDT is nullptr it is treated + /// as having no pending updates. This function does not check + /// whether there is BasicBlock awaiting deletion. + /// Returns false under Eager UpdateStrategy. + bool hasPendingUpdates() const; + + /// Returns true if there are DominatorTree updates queued. + /// Returns false under Eager UpdateStrategy or DT is nullptr. + bool hasPendingDomTreeUpdates() const; + + /// Returns true if there are PostDominatorTree updates queued. + /// Returns false under Eager UpdateStrategy or PDT is nullptr. + bool hasPendingPostDomTreeUpdates() const; ///@{ /// \name Mutation APIs @@ -58,6 +105,51 @@ class DomTreeUpdater /// Although GenericDomTree provides several update primitives, /// it is not encouraged to use these APIs directly. + /// Submit updates to all available trees. + /// The Eager Strategy flushes updates immediately while the Lazy Strategy + /// queues the updates. + /// + /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is + /// in sync with + all updates before that single update. + /// + /// CAUTION! + /// 1. It is required for t
[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)
@@ -48,6 +287,261 @@ Value collapse2DData(RewriterBase &rewriter, Location loc, Value data) { reassociation); } +// This function transforms the filter. The data layout of the filter is FHWC. +// The transformation matrix is 2-dimension. We need to extract H x W from +// FHWC first. We need to generate 2 levels of loops to iterate on F and C. +// After the transformation, we get +// +// scf.for %f = lo_f to hi_f step 1 +// scf.for %c = lo_c to hi_c step 1 +// %extracted = extract filter from filter +// %ret = linalg.matmul G, %extracted +// %ret = linalg.matmul %ret, GT +// %inserted = insert %ret into filter +// +Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, + Value retValue, int64_t m, int64_t r, + bool leftTransform = true, bool rightTransform = true) { + // Map from (m, r) to G transform matrix. + static const llvm::SmallDenseMap + GMatrices = { + {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, + {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, + {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, + }; + + // Map from (m, r) to GT transform matrix. + static const llvm::SmallDenseMap + GTMatrices = { + {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, + {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, + {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, + }; + + auto filterType = cast(filter.getType()); + Type elementType = filterType.getElementType(); + auto filterShape = filterType.getShape(); // F, H, W, C + int64_t filterF = filterShape[0]; + int64_t filterH = filterShape[1]; + int64_t filterW = filterShape[2]; + int64_t filterC = filterShape[3]; + + if (filterH != r && filterH != 1) +return Value(); + if (filterW != r && filterW != 1) +return Value(); + + // Return shape is + auto zeroIdx = rewriter.create(loc, 0); + auto fUpperBound = rewriter.create(loc, filterF); + auto cUpperBound = rewriter.create(loc, filterC); + auto oneStep = rewriter.create(loc, 1); + auto outerForOp = + rewriter.create(loc, zeroIdx, fUpperBound, oneStep, retValue); + Block *outerForBody = outerForOp.getBody(); + rewriter.setInsertionPointToStart(outerForBody); + Value FIter = outerForBody->getArgument(0); + + auto innerForOp = rewriter.create( + loc, zeroIdx, cUpperBound, oneStep, outerForOp.getRegionIterArgs()[0]); + Block *innerForBody = innerForOp.getBody(); + rewriter.setInsertionPointToStart(innerForBody); + Value CIter = innerForBody->getArgument(0); + + // Extract (H, W) from (F, H, W, C) + auto extractFilter = extract2DData( + rewriter, loc, filter, FIter, CIter, /*outLoopIdx=*/0, + /*inLoopIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2, /*srcSize=*/4); + + TransformMapKeyTy key = {m, r}; + int64_t retRows = 1; + Value matmulRetValue = extractFilter; + if (leftTransform) { +// Get constant transform matrix G +auto it = GMatrices.find(key); +if (it == GMatrices.end()) + return Value(); +const TransformMatrix &GMatrix = it->second; + +retRows = GMatrix.rows; +auto matmulType = RankedTensorType::get({retRows, filterW}, elementType); +auto init = rewriter.create(loc, matmulType.getShape(), + elementType); + +Value G = create2DTransformMatrix(rewriter, loc, GMatrix, elementType); Hsiangkai wrote: There is a `ConstantOpInterface` that can convert `arith.constant` to `memref.get_global` after bufferization. https://github.com/llvm/llvm-project/pull/96183 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 5a5ab74 - Revert "[CodeGen][NewPM] Port machine-branch-prob to new pass manager (#96389)"
Author: paperchalice Date: 2024-06-27T14:55:50+08:00 New Revision: 5a5ab746879bf0d7248e23978e56849e96ab67e8 URL: https://github.com/llvm/llvm-project/commit/5a5ab746879bf0d7248e23978e56849e96ab67e8 DIFF: https://github.com/llvm/llvm-project/commit/5a5ab746879bf0d7248e23978e56849e96ab67e8.diff LOG: Revert "[CodeGen][NewPM] Port machine-branch-prob to new pass manager (#96389)" This reverts commit 73e46c2bb4949be986d9eac98d95a206d7f003e2. Added: Modified: llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h llvm/include/llvm/InitializePasses.h llvm/include/llvm/Passes/MachinePassRegistry.def llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/lib/CodeGen/BranchFolding.cpp llvm/lib/CodeGen/EarlyIfConversion.cpp llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp llvm/lib/CodeGen/IfConversion.cpp llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp llvm/lib/CodeGen/MachineBlockPlacement.cpp llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp llvm/lib/CodeGen/MachineSink.cpp llvm/lib/CodeGen/MachineTraceMetrics.cpp llvm/lib/CodeGen/TailDuplication.cpp llvm/lib/Passes/PassBuilder.cpp llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp llvm/test/CodeGen/Generic/MachineBranchProb.ll Removed: diff --git a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index 12d33f96edd11..bd544421bc0ff 100644 --- a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -14,13 +14,14 @@ #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" namespace llvm { -class MachineBranchProbabilityInfo { +class MachineBranchProbabilityInfo : public ImmutablePass { + virtual void anchor(); + // Default weight value. Used when we don't have information about the edge. // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of // the successors have a weight yet. But it doesn't make sense when providing @@ -30,8 +31,13 @@ class MachineBranchProbabilityInfo { static const uint32_t DEFAULT_WEIGHT = 16; public: - bool invalidate(MachineFunction &, const PreservedAnalyses &PA, - MachineFunctionAnalysisManager::Invalidator &); + static char ID; + + MachineBranchProbabilityInfo(); + + void getAnalysisUsage(AnalysisUsage &AU) const override { +AU.setPreservesAll(); + } // Return edge probability. BranchProbability getEdgeProbability(const MachineBasicBlock *Src, @@ -55,45 +61,6 @@ class MachineBranchProbabilityInfo { const MachineBasicBlock *Dst) const; }; -class MachineBranchProbabilityAnalysis -: public AnalysisInfoMixin { - friend AnalysisInfoMixin; - - static AnalysisKey Key; - -public: - using Result = MachineBranchProbabilityInfo; - - Result run(MachineFunction &, MachineFunctionAnalysisManager &); -}; - -class MachineBranchProbabilityPrinterPass -: public PassInfoMixin { - raw_ostream &OS; - -public: - MachineBranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} - PreservedAnalyses run(MachineFunction &MF, -MachineFunctionAnalysisManager &MFAM); -}; - -class MachineBranchProbabilityInfoWrapperPass : public ImmutablePass { - virtual void anchor(); - - MachineBranchProbabilityInfo MBPI; - -public: - static char ID; - - MachineBranchProbabilityInfoWrapperPass(); - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.setPreservesAll(); - } - - MachineBranchProbabilityInfo &getMBPI() { return MBPI; } - const MachineBranchProbabilityInfo &getMBPI() const { return MBPI; } -}; } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 421c09ada7a19..4ddb7112a47bb 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -182,7 +182,7 @@ void initializeMIRPrintingPassPass(PassRegistry&); void initializeMachineBlockFrequencyInfoPass(PassRegistry&); void initializeMachineBlockPlacementPass(PassRegistry&); void initializeMachineBlockPlacementStatsPass(PassRegistry&); -void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &); +void initializeMachineBranchProbabilityInfoPass(PassRegistry&); void initializeMachineCFGPrinterPass(PassRegistry &); void initializeMachineCSEPas