[llvm-branch-commits] [mlir] [MLIR][OpenMP] Clause-based OpenMP operation definition (PR #92523)
https://github.com/TIFitis approved this pull request. I'm happy with the patch. Thanks for the good work :) https://github.com/llvm/llvm-project/pull/92523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96443 >From a17cf3323f6bffc9eab50a1bdb52a2568e3314a2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 16:44:08 +0200 Subject: [PATCH 1/3] AMDGPU: Add subtarget feature for global atomic fadd denormal support Not sure what the behavior for gfx90a is. The SPG says it always flushes. The instruction documentation says it does not. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 -- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 56ec5e9c4cfc2..6b212e1b2af03 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureMemoryAtomicFaddF32DenormalSupport + : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", + "HasAtomicMemoryAtomicFaddF32DenormalSupport", + "true", + "global/flat/buffer atomic fadd for float supports denormal handling" +>; + def FeatureAgentScopeFineGrainedRemoteMemoryAtomics : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", "HasAgentScopeFineGrainedRemoteMemoryAtomics", @@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureAgentScopeFineGrainedRemoteMemoryAtomics + FeatureAgentScopeFineGrainedRemoteMemoryAtomics, + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1631,7 +1639,9 @@ def FeatureISAVersion12 : FeatureSet< FeatureScalarDwordx3Loads, FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, - Feature1_5xVGPRs]>; + Feature1_5xVGPRs, + FeatureMemoryAtomicFaddF32DenormalSupport]>; + ]>; def FeatureISAVersion12_Generic: FeatureSet< !listconcat(FeatureISAVersion12.Features, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 9e2a316a9ed28..db0b2b67a0388 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasAtomicFlatPkAdd16Insts = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false; bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; bool HasAtomicBufferGlobalPkAddF16Insts = false; bool HasAtomicCSubNoRtnInsts = false; @@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + /// \return true if the target's flat, global, and buffer atomic fadd for + /// float supports denormal handling. + bool hasMemoryAtomicFaddF32DenormalSupport() const { +return HasAtomicMemoryAtomicFaddF32DenormalSupport; + } + /// \return true if atomic operations targeting fine-grained memory work /// correctly at device scope, in allocations in host or peer PCIe device /// memory. >From c9310a21c1a8aa451c73f1ac727400bb2e94df0c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Jun 2024 12:10:37 +0200 Subject: [PATCH 2/3] Add to gfx11. RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm" thought I'm not sure I trust it. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6b212e1b2af03..39a1d629a4aea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1547,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard]>; + FeatureVcmpxPermlaneHazard, + FeatureMemoryAtomicFaddF32DenormalSupport]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen @@ -1640,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, Feature1_5xVGPRs, - FeatureMemoryAtomicFaddF32DenormalSupport]>; + FeatureMemoryAtomicFaddF32DenormalSupport ]>; def FeatureISAVersion12_Generic: FeatureSet< >From ab5278870202875ebc28736da8182a751ae6ee92 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 11:30:51 +0200 Subject: [PATCH 3/3] Rename --- llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 39a1d629a4aea..34c6f6ff19bff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -78
[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444 >From 20d2b3f20bfd4a9a919ae6281d436b070d87c289 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 23 Jun 2024 17:07:53 +0200 Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++--- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++ llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++--- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 34c6f6ff19bff..84ea040477763 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst "Has flat_atomic_add_f32 instruction" >; +def FeatureFlatBufferGlobalAtomicFaddF64Inst + : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", + "HasFlatBufferGlobalAtomicFaddF64Inst", + "true", + "Has flat, buffer, and global instructions for f64 atomic fadd" +>; + def FeatureMemoryAtomicFAddF32DenormalSupport : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", "HasMemoryAtomicFaddF32DenormalSupport", @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureBackOffBarrier, FeatureKernargPreload, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF64FlatInsts, + FeatureFlatBufferGlobalAtomicFaddF64Inst ])>; def FeatureISAVersion9_0_C : FeatureSet< @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF64FlatInsts, FeatureAgentScopeFineGrainedRemoteMemoryAtomics, - FeatureMemoryAtomicFAddF32DenormalSupport + FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureFlatBufferGlobalAtomicFaddF64Inst ]>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -1932,11 +1941,9 @@ def isGFX12Plus : def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; - -def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd - Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">, - // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction. - AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>; +def HasFlatBufferGlobalAtomicFaddF64Inst : + Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, + AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; def HasAtomicFMinFMaxF32GlobalInsts : Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3b8d94b744000..a904c8483dbf5 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst +let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2 // depending on some subtargets. defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { let SubtargetPredicate = isGFX940Plus; @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>; } // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts -let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in { +let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; -} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 +} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 4bf8f20269a15..16dc019ede810 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/97048?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#97048** https://app.graphite.dev/github/pr/llvm/llvm-project/97048?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#96872](https://github.com/llvm/llvm-project/pull/96872) https://app.graphite.dev/github/pr/llvm/llvm-project/96872?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/97048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes We only handled the easy LDS case before. Handle the other address spaces with the more complicated legality logic. --- Patch is 185.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97048.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+14-2) - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll (+2125) - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll (+2125) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index a219d01518458..88e40da110555 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1670,10 +1670,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasAtomicFlatPkAdd16Insts()) Atomic.legalFor({{V2F16, FlatPtr}, {V2BF16, FlatPtr}}); - // FIXME: Handle flat, global and buffer cases. - getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) + + // Most of the legalization work here is done by AtomicExpand. We could + // probably use a simpler legality rule that just assumes anything is OK. + auto &AtomicFMinFMax = +getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) .legalFor({{F32, LocalPtr}, {F64, LocalPtr}}); + if (ST.hasAtomicFMinFMaxF32GlobalInsts()) +AtomicFMinFMax.legalFor({{F32, GlobalPtr},{F32, BufferFatPtr}}); + if (ST.hasAtomicFMinFMaxF64GlobalInsts()) +AtomicFMinFMax.legalFor({{F64, GlobalPtr}, {F64, BufferFatPtr}}); + if (ST.hasAtomicFMinFMaxF32FlatInsts()) +AtomicFMinFMax.legalFor({F32, FlatPtr}); + if (ST.hasAtomicFMinFMaxF64FlatInsts()) +AtomicFMinFMax.legalFor({F64, FlatPtr}); + // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output // demarshalling getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll new file mode 100644 index 0..cbcaaa8530597 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll @@ -0,0 +1,2125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s + +; TODO: Delete this and add run lines to use *-atomicrmw-fmax.ll tests + +define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) { +; GFX12-LABEL: local_atomic_fmax_ret_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT:s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:s_wait_expcnt 0x0 +; GFX12-NEXT:s_wait_samplecnt 0x0 +; GFX12-NEXT:s_wait_bvhcnt 0x0 +; GFX12-NEXT:s_wait_kmcnt 0x0 +; GFX12-NEXT:s_wait_storecnt 0x0 +; GFX12-NEXT:ds_max_num_rtn_f32 v0, v0, v1 +; GFX12-NEXT:s_wait_dscnt 0x0 +; GFX12-NEXT:global_inv scope:SCOPE_SE +; GFX12-NEXT:s_setpc_b64 s[30:31] +; +; GFX940-LABEL: local_atomic_fmax_ret_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT:ds_max_rtn_f32 v0, v0, v1 +; GFX940-NEXT:s_waitcnt lgkmcnt(0) +; GFX940-NEXT:s_setpc_b64 s[30:31] +; +; GFX11-LABEL: local_atomic_fmax_ret_f32: +; GFX11: ; %bb.0: +; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT:ds_max_rtn_f32 v0, v0, v1 +; GFX11-NEXT:s_waitcnt lgkmcnt(0) +; GFX11-NEXT:buffer_gl0_inv +; GFX11-NEXT:s_setpc_b64 s[30:31] +; +; GFX10-LABEL: local_atomic_fmax_ret_f32: +; GFX10: ; %bb.0: +; GFX10-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT:ds_max_rtn_f32 v0, v0, v1 +; GFX10-NEXT:s_waitcnt lgkmcnt(0) +; GFX10-NEXT:buffer_gl0_inv +; GFX10-NEXT:s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: local_atomic_fmax_ret_f32: +; GFX90A: ; %bb.0: +; GFX90A-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT:ds_max_rtn_f32 v0, v0, v1 +; GFX90A-NEXT:s_waitcnt lgkmcn
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/97048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96872 >From bca1faaabf11381164e0f85704c217d7b1d77fa0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 10:58:44 +0200 Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} Need to emit syncscope and new metadata to get the native instruction, most of the time. --- clang/lib/CodeGen/CGBuiltin.cpp | 39 +-- .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl| 2 +- .../builtins-fp-atomics-gfx12.cl | 4 +- .../builtins-fp-atomics-gfx90a.cl | 4 +- .../builtins-fp-atomics-gfx940.cl | 4 +- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 98c2f70664ec7..382812aeecf7f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18632,8 +18633,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: @@ -18645,18 +18644,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -19071,7 +19063,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19087,6 +19081,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19121,8 +19117,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { - // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. - SSID = llvm::SyncScope::System; + // Most of the builtins do not have syncscope/order arguments. For DS + // atomics the scope doesn't really matter, as they implicitly operate at + // workgroup scope. + // + // The global/flat cases need to use agent scope to consistently produce + // the native instruction instead of a cmpxchg expansion. + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::SequentiallyConsistent; // The v2bf16 builtin uses i16 instead of a natural bfloat type. @@ -19137,6 +19138,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); + +unsigned AddrSpace = Ptr.getType()->getAddressSpace(); +if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) { + // Most targets require "amdgpu.no.fine.grained.memory" to emit the nativ
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96873 >From 9d3d3997c1becc06537a463b7e00d10d0a98ff46 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:12:59 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 20 ++- .../builtins-fp-atomics-gfx12.cl | 9 ++--- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../builtins-fp-atomics-gfx940.cl | 3 ++- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3015a5de4e690..4729f81a48707 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18633,22 +18633,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -18668,11 +18661,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19065,7 +19053,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19083,6 +19073,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 6b8a6d14575db..07e63a8711c7f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_flat_add_2f16 // GFX12: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { @@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { } // CHECK-LABEL: test_global_add_half2 -// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_global_add_half2 // GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2, off
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From fb7699e8c35f70bd6adc55b148b6386239c01fe3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4729f81a48707..6354051c77ebd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18635,10 +18635,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18648,19 +18646,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19055,7 +19046,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19075,6 +19068,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079..02e289427238f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d..bd9b8c7268e06 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef half _
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96875 >From 16b9d36210a78d36b99e809bef87b6c51f204734 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:34:43 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 26 ++- .../builtins-fp-atomics-gfx12.cl | 24 - .../builtins-fp-atomics-gfx90a.cl | 6 ++--- .../builtins-fp-atomics-gfx940.cl | 14 +++--- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6354051c77ebd..e5a9c715c8a07 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18659,22 +18659,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { -Intrinsic::ID IID; -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19048,7 +19032,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19070,6 +19056,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19114,7 +19102,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. - if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 07e63a8711c7f..e8b6eb57c38d7 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL: test_flat_add_2f
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96876 >From 6e2f7566e6367d438f95c452536f123fbd8d0e2a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 23:18:32 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +-- .../builtins-fp-atomics-gfx90a.cl | 18 ++ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e5a9c715c8a07..e925b02ca110a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18633,32 +18633,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { -Intrinsic::ID IID; -llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = -CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19034,7 +19008,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19061,8 +19039,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: BinOp = llvm::AtomicRMWInst::FMin; break; +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 9381ce951df3e..556e553903d1a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr, doub
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/97050 These are now fully covered by atomicrmw. >From 09c8b097c1a305a8c4ab82324b60c8c64c533742 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Jun 2024 16:32:48 +0200 Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics These are now fully covered by atomicrmw. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 14 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - llvm/lib/Target/AMDGPU/FLATInstructions.td| 2 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/test/Bitcode/amdgcn-atomic.ll| 22 ++ .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 54 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 100 - llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 -- 11 files changed, 33 insertions(+), 368 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 71b1e832bde3c..9cf4d6352d23d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2907,10 +2907,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 5beefaa1ec701..8faaff5636665 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -2352,7 +2354,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -2407,8 +2411,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48..db8b44149cf47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9e7694f41d6b8..74686cd10512c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4897,8 +4897,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/97050?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#97050** https://app.graphite.dev/github/pr/llvm/llvm-project/97050?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#96876** https://app.graphite.dev/github/pr/llvm/llvm-project/96876?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96875** https://app.graphite.dev/github/pr/llvm/llvm-project/96875?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96874** https://app.graphite.dev/github/pr/llvm/llvm-project/96874?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96873** https://app.graphite.dev/github/pr/llvm/llvm-project/96873?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96872** https://app.graphite.dev/github/pr/llvm/llvm-project/96872?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#97048](https://github.com/llvm/llvm-project/pull/97048) https://app.graphite.dev/github/pr/llvm/llvm-project/97048?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/97050 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/97051?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#97051** https://app.graphite.dev/github/pr/llvm/llvm-project/97051?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#97050** https://app.graphite.dev/github/pr/llvm/llvm-project/97050?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96876** https://app.graphite.dev/github/pr/llvm/llvm-project/96876?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96875** https://app.graphite.dev/github/pr/llvm/llvm-project/96875?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96874** https://app.graphite.dev/github/pr/llvm/llvm-project/96874?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96873** https://app.graphite.dev/github/pr/llvm/llvm-project/96873?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96872** https://app.graphite.dev/github/pr/llvm/llvm-project/96872?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96760** https://app.graphite.dev/github/pr/llvm/llvm-project/96760?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#97048](https://github.com/llvm/llvm-project/pull/97048) https://app.graphite.dev/github/pr/llvm/llvm-project/97048?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * **#96759** https://app.graphite.dev/github/pr/llvm/llvm-project/96759?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96444** https://app.graphite.dev/github/pr/llvm/llvm-project/96444?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96443** https://app.graphite.dev/github/pr/llvm/llvm-project/96443?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#96442** https://app.graphite.dev/github/pr/llvm/llvm-project/96442?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95930** https://app.graphite.dev/github/pr/llvm/llvm-project/95930?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#95929** https://app.graphite.dev/github/pr/llvm/llvm-project/95929?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/97051 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/97050 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes These are now fully covered by atomicrmw. --- Patch is 27.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97050.diff 11 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-4) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+10-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-2) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (-2) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-5) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+22) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (-54) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (-100) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (-193) ``diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 71b1e832bde3c..9cf4d6352d23d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2907,10 +2907,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 5beefaa1ec701..8faaff5636665 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -2352,7 +2354,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -2407,8 +2411,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48..db8b44149cf47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9e7694f41d6b8..74686cd10512c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4897,8 +4897,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgc
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) Changes These have been replaced with atomicrmw. --- Patch is 266.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97051.diff 42 Files Affected: - (modified) llvm/docs/ReleaseNotes.rst (+5) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+2-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (-5) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (-2) - (modified) llvm/lib/Target/AMDGPU/DSInstructions.td (+1-5) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (-13) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (-4) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+32) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+2-44) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+4-37) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+1-54) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll (+34-237) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll (+5-116) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll (+2-121) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll (-167) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+2-112) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+2-70) - (removed) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll (-21) - (removed) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll (-126) - (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll (+23-15) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (+2-2) - (removed) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll (-63) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (+20-2) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+56-6) - (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+40-440) - (modified) llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (-115) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll (+4-124) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll (-171) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll (+29-29) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll (+19-20) - (modified) llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll (+14-9) - (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll (-77) - (modified) llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll (+3-3) - (removed) llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll (-28) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll (-34) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll (+69-30) ``diff diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7eafc49059dd6..b5ee1f3a13bda 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -137,6 +137,11 @@ Changes to the AMDGPU Backend :ref:`atomicrmw ` instruction with `fadd`, `fmin` and `fmax` with addrspace(3) instead. +* Removed ``llvm.amdgcn.flat.atomic.fadd`` and + ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd` and + addrspace(0) or addrspace(1) instead. + Changes to the ARM Backend -- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9cf4d6352d23d..3b4b3614b59a8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2820,8 +2820,6 @@ def int_amdgcn_dot4_f32_bf8_bf8 : AMDGPU8bitFloatDot4Intrinsic; // gfx908 intrinsics // ===--===// -def int_amdgcn_global_atomic_fadd : AMDGPUAtomicRtn; - // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp class AMDGPUMfmaIntrinsic : ClangBuiltin, @@ -2860,7 +2858,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic; def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd : AMDGPUAtomicRtn; def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn; def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn; diff --git a/llvm/lib/IR/AutoUpgra
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) Changes These have been replaced with atomicrmw. --- Patch is 266.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97051.diff 42 Files Affected: - (modified) llvm/docs/ReleaseNotes.rst (+5) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+2-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (-5) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (-2) - (modified) llvm/lib/Target/AMDGPU/DSInstructions.td (+1-5) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (-13) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (-4) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+32) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+2-44) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+4-37) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+1-54) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll (+34-237) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll (+5-116) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll (+2-121) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll (-167) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+2-112) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+2-70) - (removed) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll (-21) - (removed) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll (-126) - (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll (+23-15) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (+2-2) - (removed) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll (-63) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (+20-2) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+56-6) - (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+40-440) - (modified) llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (-115) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll (+4-124) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll (-171) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll (+29-29) - (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll (+19-20) - (modified) llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll (+14-9) - (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll (-77) - (modified) llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll (+3-3) - (removed) llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll (-28) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll (-34) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll (+69-30) ``diff diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7eafc49059dd6..b5ee1f3a13bda 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -137,6 +137,11 @@ Changes to the AMDGPU Backend :ref:`atomicrmw ` instruction with `fadd`, `fmin` and `fmax` with addrspace(3) instead. +* Removed ``llvm.amdgcn.flat.atomic.fadd`` and + ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the + :ref:`atomicrmw ` instruction with `fadd` and + addrspace(0) or addrspace(1) instead. + Changes to the ARM Backend -- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9cf4d6352d23d..3b4b3614b59a8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2820,8 +2820,6 @@ def int_amdgcn_dot4_f32_bf8_bf8 : AMDGPU8bitFloatDot4Intrinsic; // gfx908 intrinsics // ===--===// -def int_amdgcn_global_atomic_fadd : AMDGPUAtomicRtn; - // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp class AMDGPUMfmaIntrinsic : ClangBuiltin, @@ -2860,7 +2858,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic; def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd : AMDGPUAtomicRtn; def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn; def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn; diff --git a/llvm/lib/IR/AutoUpgrade
[llvm-branch-commits] [llvm] AMDGPU: Remove global/flat atomic fadd intrinics (PR #97051)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/97051 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
@@ -1670,10 +1670,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasAtomicFlatPkAdd16Insts()) Atomic.legalFor({{V2F16, FlatPtr}, {V2BF16, FlatPtr}}); - // FIXME: Handle flat, global and buffer cases. - getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) + + // Most of the legalization work here is done by AtomicExpand. We could + // probably use a simpler legality rule that just assumes anything is OK. + auto &AtomicFMinFMax = +getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) .legalFor({{F32, LocalPtr}, {F64, LocalPtr}}); + if (ST.hasAtomicFMinFMaxF32GlobalInsts()) +AtomicFMinFMax.legalFor({{F32, GlobalPtr},{F32, BufferFatPtr}}); shiltian wrote: so those targets that support global ptrs also support buffer fat ptrs? https://github.com/llvm/llvm-project/pull/97048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
@@ -1670,10 +1670,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasAtomicFlatPkAdd16Insts()) Atomic.legalFor({{V2F16, FlatPtr}, {V2BF16, FlatPtr}}); - // FIXME: Handle flat, global and buffer cases. - getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) + + // Most of the legalization work here is done by AtomicExpand. We could + // probably use a simpler legality rule that just assumes anything is OK. + auto &AtomicFMinFMax = +getActionDefinitionsBuilder({G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX}) .legalFor({{F32, LocalPtr}, {F64, LocalPtr}}); + if (ST.hasAtomicFMinFMaxF32GlobalInsts()) +AtomicFMinFMax.legalFor({{F32, GlobalPtr},{F32, BufferFatPtr}}); arsenm wrote: Yes, it works for the fmin/fmax case. The fadd had some has-global-no-buffer cases https://github.com/llvm/llvm-project/pull/97048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Legalize atomicrmw fmin/fmax (PR #97048)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/97048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Update flang with changes to the OpenMP dialect (PR #92524)
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/92524 >From 441b83ca7fdc31b8717207529b6e86e33a1f130a Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Fri, 17 May 2024 11:38:36 +0100 Subject: [PATCH] [Flang][OpenMP] Update flang with changes to the OpenMP dialect This patch applies fixes after the updates to OpenMP clause operands, as well as updating some tests that were impacted by changes to the ordering or assembly format of some clauses in MLIR. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp| 4 ++-- flang/lib/Lower/OpenMP/ClauseProcessor.h | 4 ++-- flang/lib/Lower/OpenMP/OpenMP.cpp | 19 --- flang/test/Lower/OpenMP/atomic-capture.f90| 2 +- flang/test/Lower/OpenMP/copyin-order.f90 | 2 +- flang/test/Lower/OpenMP/parallel-wsloop.f90 | 2 +- flang/test/Lower/OpenMP/parallel.f90 | 24 +-- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/target.f90| 24 +-- .../use-device-ptr-to-use-device-addr.f90 | 2 +- 10 files changed, 43 insertions(+), 42 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index f78cd0f9df1a1..d507e58b164dd 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1073,7 +1073,7 @@ bool ClauseProcessor::processEnter( } bool ClauseProcessor::processUseDeviceAddr( -mlir::omp::UseDeviceClauseOps &result, +mlir::omp::UseDeviceAddrClauseOps &result, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSyms) const { @@ -1085,7 +1085,7 @@ bool ClauseProcessor::processUseDeviceAddr( } bool ClauseProcessor::processUseDevicePtr( -mlir::omp::UseDeviceClauseOps &result, +mlir::omp::UseDevicePtrClauseOps &result, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSyms) const { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 53571ae5abc20..43795d5c25399 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -129,12 +129,12 @@ class ClauseProcessor { mlir::omp::ReductionClauseOps &result) const; bool processTo(llvm::SmallVectorImpl &result) const; bool processUseDeviceAddr( - mlir::omp::UseDeviceClauseOps &result, + mlir::omp::UseDeviceAddrClauseOps &result, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSyms) const; bool processUseDevicePtr( - mlir::omp::UseDeviceClauseOps &result, + mlir::omp::UseDevicePtrClauseOps &result, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSyms) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 22c41fce31723..d8679fb693659 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -244,7 +244,8 @@ createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, // clause. Support for such list items in a use_device_ptr clause // is deprecated." static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( -mlir::omp::UseDeviceClauseOps &clauseOps, +llvm::SmallVectorImpl &useDeviceAddrVars, +llvm::SmallVectorImpl &useDevicePtrVars, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSymbols) { @@ -256,10 +257,9 @@ static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( // Iterate over our use_device_ptr list and shift all non-cptr arguments into // use_device_addr. - for (auto *it = clauseOps.useDevicePtrVars.begin(); - it != clauseOps.useDevicePtrVars.end();) { + for (auto *it = useDevicePtrVars.begin(); it != useDevicePtrVars.end();) { if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType( { - clauseOps.useDeviceAddrVars.push_back(*it); + useDeviceAddrVars.push_back(*it); // We have to shuffle the symbols around as well, to maintain // the correct Input -> BlockArg for use_device_ptr/use_device_addr. // NOTE: However, as map's do not seem to be included currently @@ -267,11 +267,11 @@ static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( // future alterations. I believe the reason they are not currently // is that the BlockArg assign/lowering needs to be extended // to a greater set of types. - auto idx = std::distance(clauseOps.useDevicePtrVars.begin(), it); + auto idx = std::distance(useDevicePtrVars.begin(), it); moveElementToBack(idx, useDeviceTypes); moveElementToBack(idx, useDeviceLocs); moveEl
[llvm-branch-commits] [mlir] 684b928 - Revert "[mlir][NVVM] Disallow results on kernel functions (#96399)"
Author: Jeff Niu Date: 2024-06-28T08:32:18-07:00 New Revision: 684b928f1075bd094ed2fc8cc001aa5cff889fb4 URL: https://github.com/llvm/llvm-project/commit/684b928f1075bd094ed2fc8cc001aa5cff889fb4 DIFF: https://github.com/llvm/llvm-project/commit/684b928f1075bd094ed2fc8cc001aa5cff889fb4.diff LOG: Revert "[mlir][NVVM] Disallow results on kernel functions (#96399)" This reverts commit 346c4a88afedcef3da40f68c83f0a5b3e0ac61ea. Added: Modified: mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp mlir/test/Target/LLVMIR/nvvmir.mlir Removed: diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 3d6a911f36541..94197e473ce01 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -214,8 +214,7 @@ void MmaOp::print(OpAsmPrinter &p) { p.printOptionalAttrDict(this->getOperation()->getAttrs(), ignoreAttrNames); // Print the types of the operands and result. - p << " : " -<< "("; + p << " : " << "("; llvm::interleaveComma(SmallVector{frags[0].regs[0].getType(), frags[1].regs[0].getType(), frags[2].regs[0].getType()}, @@ -956,9 +955,7 @@ std::string NVVM::WgmmaMmaAsyncOp::getPtx() { ss << "},"; // Need to map read/write registers correctly. regCnt = (regCnt * 2); - ss << " $" << (regCnt) << "," - << " $" << (regCnt + 1) << "," - << " p"; + ss << " $" << (regCnt) << "," << " $" << (regCnt + 1) << "," << " p"; if (getTypeD() != WGMMATypes::s32) { ss << ", $" << (regCnt + 3) << ", $" << (regCnt + 4); } @@ -1056,14 +1053,10 @@ LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op, StringAttr attrName = attr.getName(); // Kernel function attribute should be attached to functions. if (attrName == NVVMDialect::getKernelFuncAttrName()) { -auto funcOp = dyn_cast(op); -if (!funcOp) { +if (!isa(op)) { return op->emitError() << "'" << NVVMDialect::getKernelFuncAttrName() << "' attribute attached to unexpected op"; } -if (!funcOp.getResultTypes().empty()) { - return op->emitError() << "kernel function cannot have results"; -} } // If maxntid and reqntid exist, it must be an array with max 3 dim if (attrName == NVVMDialect::getMaxntidAttrName() || diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 26ba80cba6ed5..a8ae4d97888c9 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -574,10 +574,3 @@ llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}) llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}, %arg1: f32, %arg2: !llvm.ptr {llvm.byval = f32, nvvm.grid_constant}) attributes {nvvm.kernel} { llvm.return } - -// - - -// expected-error @below{{kernel function cannot have results}} -llvm.func @kernel_with_result(%i: i32) -> i32 attributes {nvvm.kernel} { - llvm.return %i : i32 -} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Fixed false positive when accessing offset member variables (PR #95387)
https://github.com/gbMattN updated https://github.com/llvm/llvm-project/pull/95387 >From 547444a1cfe493ae6c698e083ebb4f8f73e5e040 Mon Sep 17 00:00:00 2001 From: Matthew Nagy Date: Fri, 28 Jun 2024 16:12:31 + Subject: [PATCH] [TySan] Fixed false positive when accessing global object's member variables --- compiler-rt/lib/tysan/tysan.cpp | 19 +++- .../test/tysan/global-struct-members.c| 31 +++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/test/tysan/global-struct-members.c diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index f627851d049e6..8235b0ec2b55e 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -221,7 +221,24 @@ __tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) { OldTDPtr -= i; OldTD = *OldTDPtr; -if (!isAliasingLegal(td, OldTD)) +// When shadow memory is set for global objects, the entire object is tagged with the struct type +// This means that when you access a member variable, tysan reads that as you accessing a struct midway +// through, with 'i' being the offset +// Therefore, if you are accessing a struct, we need to find the member type. We can go through the +// members of the struct type and see if there is a member at the offset you are accessing the struct by. +// If there is indeed a member starting at offset 'i' in the struct, we should check aliasing legality +// with that type. If there isn't, we run alias checking on the struct with will give us the correct error. +tysan_type_descriptor *InternalMember = OldTD; +if (OldTD->Tag == TYSAN_STRUCT_TD) { + for (int j = 0; j < OldTD->Struct.MemberCount; j++) { +if (OldTD->Struct.Members[j].Offset == i) { + InternalMember = OldTD->Struct.Members[j].Type; + break; +} + } +} + +if (!isAliasingLegal(td, InternalMember)) reportError(addr, size, td, OldTD, AccessStr, "accesses part of an existing object", -i, pc, bp, sp); diff --git a/compiler-rt/test/tysan/global-struct-members.c b/compiler-rt/test/tysan/global-struct-members.c new file mode 100644 index 0..67496a30a820f --- /dev/null +++ b/compiler-rt/test/tysan/global-struct-members.c @@ -0,0 +1,31 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +struct X { + int a, b, c; +} x; + +static struct X xArray[2]; + +int main() { + x.a = 1; + x.b = 2; + x.c = 3; + + printf("%d %d %d\n", x.a, x.b, x.c); + // CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation + + for (size_t i = 0; i < 2; i++) { +xArray[i].a = 1; +xArray[i].b = 1; +xArray[i].c = 1; + } + + struct X *xPtr = (struct X *)&(xArray[0].c); + xPtr->a = 1; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in X at offset 0) accesses an existing object of type int (in X at offset 8) + // CHECK: {{#0 0x.* in main .*struct-members.c:}}[[@LINE-3]] +} \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
https://github.com/gbMattN updated https://github.com/llvm/llvm-project/pull/96507 >From 9df832f5886f6e9555e58e007e3e261937af4a96 Mon Sep 17 00:00:00 2001 From: Matthew Nagy Date: Fri, 28 Jun 2024 16:48:53 + Subject: [PATCH] [TySan] Improves compatability for tests --- compiler-rt/test/tysan/violation-pr45282.c | 2 +- compiler-rt/test/tysan/violation-pr47137.c | 5 +++-- compiler-rt/test/tysan/violation-pr62544.c | 2 +- compiler-rt/test/tysan/violation-pr62828.cpp | 2 +- compiler-rt/test/tysan/violation-pr68655.cpp | 2 +- compiler-rt/test/tysan/violation-pr86685.c | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/compiler-rt/test/tysan/violation-pr45282.c b/compiler-rt/test/tysan/violation-pr45282.c index f3583d6be6f6a..9529dd21e7b7e 100644 --- a/compiler-rt/test/tysan/violation-pr45282.c +++ b/compiler-rt/test/tysan/violation-pr45282.c @@ -18,7 +18,7 @@ int main(void) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing object of type float - // CHECK-NEXT: in main violation-pr45282.c:25 + // CHECK-NEXT: in main {{.*violation-pr45282.c:25.*}} // loop of problems for (j = 2; j <= 4; ++j) { diff --git a/compiler-rt/test/tysan/violation-pr47137.c b/compiler-rt/test/tysan/violation-pr47137.c index 3987128ff6fc6..f6a0b353ad944 100644 --- a/compiler-rt/test/tysan/violation-pr47137.c +++ b/compiler-rt/test/tysan/violation-pr47137.c @@ -4,6 +4,7 @@ // https://github.com/llvm/llvm-project/issues/47137 #include #include +#include void f(int m) { int n = (4 * m + 2) / 3; @@ -23,8 +24,8 @@ void f(int m) { } // CHECK: TypeSanitizer: type-aliasing-violation on address - // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type long long - // CHECK-NEXT:in f violation-pr47137.c:30 + // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type {{(long)+}} + // CHECK-NEXT:in f {{.*violation-pr47137.c:30.*}} for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) { for (int k = 0; k < 3; k++) { ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k]; diff --git a/compiler-rt/test/tysan/violation-pr62544.c b/compiler-rt/test/tysan/violation-pr62544.c index 30610925ba385..50311ebb4f35c 100644 --- a/compiler-rt/test/tysan/violation-pr62544.c +++ b/compiler-rt/test/tysan/violation-pr62544.c @@ -18,7 +18,7 @@ int main() { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing object of type int - // CHECK-NEXT: in main violation-pr62544.c:22 + // CHECK-NEXT: in main {{.*violation-pr62544.c:22.*}} *e = 3; printf("%d\n", a); } diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp b/compiler-rt/test/tysan/violation-pr62828.cpp index 33003df9761f5..2845f4c926257 100644 --- a/compiler-rt/test/tysan/violation-pr62828.cpp +++ b/compiler-rt/test/tysan/violation-pr62828.cpp @@ -24,7 +24,7 @@ short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, int *ptr) { // CHECK: ERROR: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type int - // CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) violation-pr62828.cpp:29 + // CHECK-NEXT:in test1(int (*) [8], short (*) [8], int*) {{.*violation-pr62828.cpp:29.*}} for (int i3 = 0; i3 < 4; ++i3) { output2[i3] = input2[(i3 * 2)]; } diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp b/compiler-rt/test/tysan/violation-pr68655.cpp index ac20f8c94e1ff..615971c75d20e 100644 --- a/compiler-rt/test/tysan/violation-pr68655.cpp +++ b/compiler-rt/test/tysan/violation-pr68655.cpp @@ -9,7 +9,7 @@ struct S1 { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: READ of size 4 at {{.+}} with type int accesses an existing object of type long long (in S1 at offset 0) -// CHECK-NEXT: in copyMem(S1*, S1*) violation-pr68655.cpp:19 +// CHECK-NEXT: in copyMem(S1*, S1*) {{.*violation-pr68655.cpp:19.*}} void inline copyMem(S1 *dst, S1 *src) { unsigned *d = reinterpret_cast(dst); diff --git a/compiler-rt/test/tysan/violation-pr86685.c b/compiler-rt/test/tysan/violation-pr86685.c index fe4fd82af5fdd..b0a84fa9e14c7 100644 --- a/compiler-rt/test/tysan/violation-pr86685.c +++ b/compiler-rt/test/tysan/violation-pr86685.c @@ -13,7 +13,7 @@ void foo(int *s, float *f, long n) { // CHECK: TypeSanitizer: type-aliasing-violation on address // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing object of type float -// CHECK-NEXT: #0 {{.+}} in foo violation-pr86685.c:17 +// CHECK-NEXT: #0 {{.+}} in foo {{.*violation-pr86685.c:17.*}} *s = 4; } } ___ llvm-branch-commits mailing list llvm-
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
MaskRay wrote: Eli is happier now. I plan to land this in a few hours. https://github.com/llvm/llvm-project/pull/96282 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [clang] [clang][OpenMP] Implement `isOpenMPCapturingDirective` (PR #97090)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/97090 Check if the given directive can capture variables, and thus needs a captured statement. Simplify some code using this function. >From 0ee7c0154dee86e25c05f09828637eaf9bb8ec27 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 28 Jun 2024 12:31:35 -0500 Subject: [PATCH] [clang][OpenMP] Implement `isOpenMPCapturingDirective` Check if the given directive can capture variables, and thus needs a captured statement. Simplify some code using this function. --- clang/include/clang/Basic/OpenMPKinds.h | 6 +++ clang/lib/Basic/OpenMPKinds.cpp | 66 + clang/lib/Sema/SemaOpenMP.cpp | 14 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6d9d6ebc58e2c..3f21766f392cf 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -376,6 +376,12 @@ bool checkFailClauseParameter(OpenMPClauseKind FailClauseParameter); /// \return true - if the above condition is met for this directive /// otherwise - false. bool isOpenMPExecutableDirective(OpenMPDirectiveKind DKind); + +/// Checks if the specified directive need to capture variables. +/// \param DKind Specified directive. +/// \return true - if the above condition is met for this directive +/// otherwise - false. +bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind); } #endif diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 7c8990880fae3..30c34c207ae23 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -709,10 +709,44 @@ bool clang::isOpenMPExecutableDirective(OpenMPDirectiveKind DKind) { return Cat == Category::Executable || Cat == Category::Subsidiary; } +bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) { + if (isOpenMPExecutableDirective(DKind)) { +switch (DKind) { +case OMPD_atomic: +case OMPD_barrier: +case OMPD_cancel: +case OMPD_cancellation_point: +case OMPD_critical: +case OMPD_depobj: +case OMPD_error: +case OMPD_flush: +case OMPD_masked: +case OMPD_master: +case OMPD_section: +case OMPD_taskwait: +case OMPD_taskyield: + return false; +default: + return !isOpenMPLoopTransformationDirective(DKind); +} + } + // Non-executable directives. + switch (DKind) { + case OMPD_metadirective: + case OMPD_nothing: +return true; + default: +break; + } + return false; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { assert(unsigned(DKind) < llvm::omp::Directive_enumSize); + assert(isOpenMPCapturingDirective(DKind)); + switch (DKind) { case OMPD_metadirective: CaptureRegions.push_back(OMPD_metadirective); @@ -799,48 +833,18 @@ void clang::getOpenMPCaptureRegions( case OMPD_for: case OMPD_for_simd: case OMPD_sections: - case OMPD_section: case OMPD_single: - case OMPD_master: - case OMPD_critical: case OMPD_taskgroup: case OMPD_distribute: case OMPD_ordered: - case OMPD_atomic: case OMPD_target_data: case OMPD_distribute_simd: case OMPD_scope: case OMPD_dispatch: CaptureRegions.push_back(OMPD_unknown); break; - case OMPD_tile: - case OMPD_unroll: -// loop transformations do not introduce captures. -break; - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_error: - case OMPD_taskwait: - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_declare_simd: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_requires: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: -llvm_unreachable("OpenMP Directive is not allowed"); - case OMPD_unknown: default: -llvm_unreachable("Unknown OpenMP directive"); +llvm_unreachable("Unhandled OpenMP directive"); } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b17c7e2be968e..a741339a7d669 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4862,11 +4862,7 @@ StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), /* ScopeEntry */ false); - if (DSAStack->getCurrentDirective() == OMPD_atomic || - DSAStack->getCurrentDirective() == OMPD_critical || - DSAStack->getCurrentDirective() == OMPD_section || - DSAStack->getCurrentDirective() == OMPD_master || - DSAStack->getCurrentDirective
[llvm-branch-commits] [clang] [clang][OpenMP] Implement `isOpenMPCapturingDirective` (PR #97090)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Krzysztof Parzyszek (kparzysz) Changes Check if the given directive can capture variables, and thus needs a captured statement. Simplify some code using this function. --- Full diff: https://github.com/llvm/llvm-project/pull/97090.diff 3 Files Affected: - (modified) clang/include/clang/Basic/OpenMPKinds.h (+6) - (modified) clang/lib/Basic/OpenMPKinds.cpp (+35-31) - (modified) clang/lib/Sema/SemaOpenMP.cpp (+2-12) ``diff diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6d9d6ebc58e2c..3f21766f392cf 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -376,6 +376,12 @@ bool checkFailClauseParameter(OpenMPClauseKind FailClauseParameter); /// \return true - if the above condition is met for this directive /// otherwise - false. bool isOpenMPExecutableDirective(OpenMPDirectiveKind DKind); + +/// Checks if the specified directive need to capture variables. +/// \param DKind Specified directive. +/// \return true - if the above condition is met for this directive +/// otherwise - false. +bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind); } #endif diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 7c8990880fae3..30c34c207ae23 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -709,10 +709,44 @@ bool clang::isOpenMPExecutableDirective(OpenMPDirectiveKind DKind) { return Cat == Category::Executable || Cat == Category::Subsidiary; } +bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) { + if (isOpenMPExecutableDirective(DKind)) { +switch (DKind) { +case OMPD_atomic: +case OMPD_barrier: +case OMPD_cancel: +case OMPD_cancellation_point: +case OMPD_critical: +case OMPD_depobj: +case OMPD_error: +case OMPD_flush: +case OMPD_masked: +case OMPD_master: +case OMPD_section: +case OMPD_taskwait: +case OMPD_taskyield: + return false; +default: + return !isOpenMPLoopTransformationDirective(DKind); +} + } + // Non-executable directives. + switch (DKind) { + case OMPD_metadirective: + case OMPD_nothing: +return true; + default: +break; + } + return false; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { assert(unsigned(DKind) < llvm::omp::Directive_enumSize); + assert(isOpenMPCapturingDirective(DKind)); + switch (DKind) { case OMPD_metadirective: CaptureRegions.push_back(OMPD_metadirective); @@ -799,48 +833,18 @@ void clang::getOpenMPCaptureRegions( case OMPD_for: case OMPD_for_simd: case OMPD_sections: - case OMPD_section: case OMPD_single: - case OMPD_master: - case OMPD_critical: case OMPD_taskgroup: case OMPD_distribute: case OMPD_ordered: - case OMPD_atomic: case OMPD_target_data: case OMPD_distribute_simd: case OMPD_scope: case OMPD_dispatch: CaptureRegions.push_back(OMPD_unknown); break; - case OMPD_tile: - case OMPD_unroll: -// loop transformations do not introduce captures. -break; - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_error: - case OMPD_taskwait: - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_declare_simd: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_requires: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: -llvm_unreachable("OpenMP Directive is not allowed"); - case OMPD_unknown: default: -llvm_unreachable("Unknown OpenMP directive"); +llvm_unreachable("Unhandled OpenMP directive"); } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b17c7e2be968e..a741339a7d669 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4862,11 +4862,7 @@ StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), /* ScopeEntry */ false); - if (DSAStack->getCurrentDirective() == OMPD_atomic || - DSAStack->getCurrentDirective() == OMPD_critical || - DSAStack->getCurrentDirective() == OMPD_section || - DSAStack->getCurrentDirective() == OMPD_master || - DSAStack->getCurrentDirective() == OMPD_masked) + if (!isOpenMPCapturingDirective(DSAStack->getCurrentDirective())) return S; bool ErrorFound = false; @@ -4909,10 +4905,6 @@ StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, } } DSAStack->setForceVarCapturing(/*V=*/false); -
[llvm-branch-commits] [clang] [clang][OpenMP] Implement `isOpenMPCapturingDirective` (PR #97090)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/97090 >From 0ee7c0154dee86e25c05f09828637eaf9bb8ec27 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 28 Jun 2024 12:31:35 -0500 Subject: [PATCH 1/2] [clang][OpenMP] Implement `isOpenMPCapturingDirective` Check if the given directive can capture variables, and thus needs a captured statement. Simplify some code using this function. --- clang/include/clang/Basic/OpenMPKinds.h | 6 +++ clang/lib/Basic/OpenMPKinds.cpp | 66 + clang/lib/Sema/SemaOpenMP.cpp | 14 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6d9d6ebc58e2c..3f21766f392cf 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -376,6 +376,12 @@ bool checkFailClauseParameter(OpenMPClauseKind FailClauseParameter); /// \return true - if the above condition is met for this directive /// otherwise - false. bool isOpenMPExecutableDirective(OpenMPDirectiveKind DKind); + +/// Checks if the specified directive need to capture variables. +/// \param DKind Specified directive. +/// \return true - if the above condition is met for this directive +/// otherwise - false. +bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind); } #endif diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 7c8990880fae3..30c34c207ae23 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -709,10 +709,44 @@ bool clang::isOpenMPExecutableDirective(OpenMPDirectiveKind DKind) { return Cat == Category::Executable || Cat == Category::Subsidiary; } +bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) { + if (isOpenMPExecutableDirective(DKind)) { +switch (DKind) { +case OMPD_atomic: +case OMPD_barrier: +case OMPD_cancel: +case OMPD_cancellation_point: +case OMPD_critical: +case OMPD_depobj: +case OMPD_error: +case OMPD_flush: +case OMPD_masked: +case OMPD_master: +case OMPD_section: +case OMPD_taskwait: +case OMPD_taskyield: + return false; +default: + return !isOpenMPLoopTransformationDirective(DKind); +} + } + // Non-executable directives. + switch (DKind) { + case OMPD_metadirective: + case OMPD_nothing: +return true; + default: +break; + } + return false; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { assert(unsigned(DKind) < llvm::omp::Directive_enumSize); + assert(isOpenMPCapturingDirective(DKind)); + switch (DKind) { case OMPD_metadirective: CaptureRegions.push_back(OMPD_metadirective); @@ -799,48 +833,18 @@ void clang::getOpenMPCaptureRegions( case OMPD_for: case OMPD_for_simd: case OMPD_sections: - case OMPD_section: case OMPD_single: - case OMPD_master: - case OMPD_critical: case OMPD_taskgroup: case OMPD_distribute: case OMPD_ordered: - case OMPD_atomic: case OMPD_target_data: case OMPD_distribute_simd: case OMPD_scope: case OMPD_dispatch: CaptureRegions.push_back(OMPD_unknown); break; - case OMPD_tile: - case OMPD_unroll: -// loop transformations do not introduce captures. -break; - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_error: - case OMPD_taskwait: - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_declare_simd: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_requires: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: -llvm_unreachable("OpenMP Directive is not allowed"); - case OMPD_unknown: default: -llvm_unreachable("Unknown OpenMP directive"); +llvm_unreachable("Unhandled OpenMP directive"); } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b17c7e2be968e..a741339a7d669 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4862,11 +4862,7 @@ StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), /* ScopeEntry */ false); - if (DSAStack->getCurrentDirective() == OMPD_atomic || - DSAStack->getCurrentDirective() == OMPD_critical || - DSAStack->getCurrentDirective() == OMPD_section || - DSAStack->getCurrentDirective() == OMPD_master || - DSAStack->getCurrentDirective() == OMPD_masked) + if (!isOpenMPCapturingDirective(DSAStack->getCurrentDirective())) return S; bool ErrorFound = fa
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Implement `isOpenMPCapturingDirective` (PR #97090)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/97090 >From 0ee7c0154dee86e25c05f09828637eaf9bb8ec27 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 28 Jun 2024 12:31:35 -0500 Subject: [PATCH 1/2] [clang][OpenMP] Implement `isOpenMPCapturingDirective` Check if the given directive can capture variables, and thus needs a captured statement. Simplify some code using this function. --- clang/include/clang/Basic/OpenMPKinds.h | 6 +++ clang/lib/Basic/OpenMPKinds.cpp | 66 + clang/lib/Sema/SemaOpenMP.cpp | 14 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6d9d6ebc58e2c..3f21766f392cf 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -376,6 +376,12 @@ bool checkFailClauseParameter(OpenMPClauseKind FailClauseParameter); /// \return true - if the above condition is met for this directive /// otherwise - false. bool isOpenMPExecutableDirective(OpenMPDirectiveKind DKind); + +/// Checks if the specified directive need to capture variables. +/// \param DKind Specified directive. +/// \return true - if the above condition is met for this directive +/// otherwise - false. +bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind); } #endif diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 7c8990880fae3..30c34c207ae23 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -709,10 +709,44 @@ bool clang::isOpenMPExecutableDirective(OpenMPDirectiveKind DKind) { return Cat == Category::Executable || Cat == Category::Subsidiary; } +bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) { + if (isOpenMPExecutableDirective(DKind)) { +switch (DKind) { +case OMPD_atomic: +case OMPD_barrier: +case OMPD_cancel: +case OMPD_cancellation_point: +case OMPD_critical: +case OMPD_depobj: +case OMPD_error: +case OMPD_flush: +case OMPD_masked: +case OMPD_master: +case OMPD_section: +case OMPD_taskwait: +case OMPD_taskyield: + return false; +default: + return !isOpenMPLoopTransformationDirective(DKind); +} + } + // Non-executable directives. + switch (DKind) { + case OMPD_metadirective: + case OMPD_nothing: +return true; + default: +break; + } + return false; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { assert(unsigned(DKind) < llvm::omp::Directive_enumSize); + assert(isOpenMPCapturingDirective(DKind)); + switch (DKind) { case OMPD_metadirective: CaptureRegions.push_back(OMPD_metadirective); @@ -799,48 +833,18 @@ void clang::getOpenMPCaptureRegions( case OMPD_for: case OMPD_for_simd: case OMPD_sections: - case OMPD_section: case OMPD_single: - case OMPD_master: - case OMPD_critical: case OMPD_taskgroup: case OMPD_distribute: case OMPD_ordered: - case OMPD_atomic: case OMPD_target_data: case OMPD_distribute_simd: case OMPD_scope: case OMPD_dispatch: CaptureRegions.push_back(OMPD_unknown); break; - case OMPD_tile: - case OMPD_unroll: -// loop transformations do not introduce captures. -break; - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_error: - case OMPD_taskwait: - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_declare_simd: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_requires: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: -llvm_unreachable("OpenMP Directive is not allowed"); - case OMPD_unknown: default: -llvm_unreachable("Unknown OpenMP directive"); +llvm_unreachable("Unhandled OpenMP directive"); } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b17c7e2be968e..a741339a7d669 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4862,11 +4862,7 @@ StmtResult SemaOpenMP::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), /* ScopeEntry */ false); - if (DSAStack->getCurrentDirective() == OMPD_atomic || - DSAStack->getCurrentDirective() == OMPD_critical || - DSAStack->getCurrentDirective() == OMPD_section || - DSAStack->getCurrentDirective() == OMPD_master || - DSAStack->getCurrentDirective() == OMPD_masked) + if (!isOpenMPCapturingDirective(DSAStack->getCurrentDirective())) return S; bool ErrorFound = fa
[llvm-branch-commits] [clang] [clang][OpenMP] Rewrite `getOpenMPCaptureRegions` in term of leafs (PR #97110)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/97110 Replace the switch in `getOpenMPCaptureRegions` with a loop collecting capture regions based on the constituent directives. >From 2d25e0d32672ecae3dc3ad42c50446e651eceb06 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 28 Jun 2024 15:27:42 -0500 Subject: [PATCH] [clang][OpenMP] Rewrite `getOpenMPCaptureRegions` in term of leafs Replace the switch in `getOpenMPCaptureRegions` with a loop collecting capture regions based on the constituent directives. --- clang/lib/Basic/OpenMPKinds.cpp | 170 ++-- 1 file changed, 72 insertions(+), 98 deletions(-) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 30c34c207ae23..152891dfa27dc 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -747,105 +747,79 @@ void clang::getOpenMPCaptureRegions( assert(unsigned(DKind) < llvm::omp::Directive_enumSize); assert(isOpenMPCapturingDirective(DKind)); - switch (DKind) { - case OMPD_metadirective: -CaptureRegions.push_back(OMPD_metadirective); -break; - case OMPD_parallel: - case OMPD_parallel_for: - case OMPD_parallel_for_simd: - case OMPD_parallel_master: - case OMPD_parallel_masked: - case OMPD_parallel_sections: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_parallel_loop: -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_target_teams: - case OMPD_target_teams_distribute: - case OMPD_target_teams_distribute_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_teams); -break; - case OMPD_teams: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: -CaptureRegions.push_back(OMPD_teams); -break; - case OMPD_target: - case OMPD_target_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -break; - case OMPD_teams_loop: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: -CaptureRegions.push_back(OMPD_teams); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_target_parallel: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_parallel_loop: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_task: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: -CaptureRegions.push_back(OMPD_task); -break; - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_masked_taskloop: - case OMPD_masked_taskloop_simd: -CaptureRegions.push_back(OMPD_taskloop); -break; - case OMPD_parallel_masked_taskloop: - case OMPD_parallel_masked_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: -CaptureRegions.push_back(OMPD_parallel); -CaptureRegions.push_back(OMPD_taskloop); -break; - case OMPD_target_teams_loop: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_teams); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_nothing: -CaptureRegions.push_back(OMPD_nothing); -break; - case OMPD_loop: -// TODO: 'loop' may require different capture regions depending on the bind -// clause or the parent directive when there is no bind clause. Use -// OMPD_unknown for now. - case OMPD_simd: - case OMPD_for: - case OMPD_for_simd: - case OMPD_sections: - case OMPD_single: - case OMPD_taskgroup: - case OMPD_distribute: - case OMPD_ordered: - case OMPD_target_data: - case OMPD_distribute_simd: - case OMPD_scope: - case OMPD_dispatch: + auto getRegionsForLeaf = [&](OpenMPDirectiveKind LKind) { +assert(isLeafConstruct(LKind) && "Epecting leaf directive"); +switch (LKind) { +case OMPD_metadirective: + CaptureRegions.push_back(OMPD_metadirective); + break; +case OMPD_nothing: + CaptureRegions.push_back(OMPD_nothing); + break; +case OMPD_parallel: + CaptureRegions.push_back(OMPD_parallel); + break; +case OMPD_target: + CaptureRegions.push_back(OMPD_task); + CaptureRegions.push_back(OMPD_target); + break; +case OMPD_task: +case OMPD_target_enter_data: +case OMPD_target_exit_data: +case OMPD_target_update: + CaptureRegions.push_back(OMPD_task); + break; +case OMPD_teams: + CaptureRegions.push_back(OMPD_teams); + break; +case OMPD_taskloop: + CaptureRegions.push_back(OMPD_taskloop); + break; +case OMP
[llvm-branch-commits] [clang] [clang][OpenMP] Rewrite `getOpenMPCaptureRegions` in terms of leafs (PR #97110)
https://github.com/kparzysz edited https://github.com/llvm/llvm-project/pull/97110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Rewrite `getOpenMPCaptureRegions` in terms of leafs (PR #97110)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Krzysztof Parzyszek (kparzysz) Changes Replace the switch in `getOpenMPCaptureRegions` with a loop collecting capture regions based on the constituent directives. --- Full diff: https://github.com/llvm/llvm-project/pull/97110.diff 1 Files Affected: - (modified) clang/lib/Basic/OpenMPKinds.cpp (+72-98) ``diff diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 30c34c207ae23..152891dfa27dc 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -747,105 +747,79 @@ void clang::getOpenMPCaptureRegions( assert(unsigned(DKind) < llvm::omp::Directive_enumSize); assert(isOpenMPCapturingDirective(DKind)); - switch (DKind) { - case OMPD_metadirective: -CaptureRegions.push_back(OMPD_metadirective); -break; - case OMPD_parallel: - case OMPD_parallel_for: - case OMPD_parallel_for_simd: - case OMPD_parallel_master: - case OMPD_parallel_masked: - case OMPD_parallel_sections: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_parallel_loop: -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_target_teams: - case OMPD_target_teams_distribute: - case OMPD_target_teams_distribute_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_teams); -break; - case OMPD_teams: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: -CaptureRegions.push_back(OMPD_teams); -break; - case OMPD_target: - case OMPD_target_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -break; - case OMPD_teams_loop: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: -CaptureRegions.push_back(OMPD_teams); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_target_parallel: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_parallel_loop: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_task: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: -CaptureRegions.push_back(OMPD_task); -break; - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_masked_taskloop: - case OMPD_masked_taskloop_simd: -CaptureRegions.push_back(OMPD_taskloop); -break; - case OMPD_parallel_masked_taskloop: - case OMPD_parallel_masked_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: -CaptureRegions.push_back(OMPD_parallel); -CaptureRegions.push_back(OMPD_taskloop); -break; - case OMPD_target_teams_loop: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: -CaptureRegions.push_back(OMPD_task); -CaptureRegions.push_back(OMPD_target); -CaptureRegions.push_back(OMPD_teams); -CaptureRegions.push_back(OMPD_parallel); -break; - case OMPD_nothing: -CaptureRegions.push_back(OMPD_nothing); -break; - case OMPD_loop: -// TODO: 'loop' may require different capture regions depending on the bind -// clause or the parent directive when there is no bind clause. Use -// OMPD_unknown for now. - case OMPD_simd: - case OMPD_for: - case OMPD_for_simd: - case OMPD_sections: - case OMPD_single: - case OMPD_taskgroup: - case OMPD_distribute: - case OMPD_ordered: - case OMPD_target_data: - case OMPD_distribute_simd: - case OMPD_scope: - case OMPD_dispatch: + auto getRegionsForLeaf = [&](OpenMPDirectiveKind LKind) { +assert(isLeafConstruct(LKind) && "Epecting leaf directive"); +switch (LKind) { +case OMPD_metadirective: + CaptureRegions.push_back(OMPD_metadirective); + break; +case OMPD_nothing: + CaptureRegions.push_back(OMPD_nothing); + break; +case OMPD_parallel: + CaptureRegions.push_back(OMPD_parallel); + break; +case OMPD_target: + CaptureRegions.push_back(OMPD_task); + CaptureRegions.push_back(OMPD_target); + break; +case OMPD_task: +case OMPD_target_enter_data: +case OMPD_target_exit_data: +case OMPD_target_update: + CaptureRegions.push_back(OMPD_task); + break; +case OMPD_teams: + CaptureRegions.push_back(OMPD_teams); + break; +case OMPD_taskloop: + CaptureRegions.push_back(OMPD_taskloop); + break; +case OMPD_loop: + // TODO: 'loop' may require different capture regions depending on the + // bind clause or the parent directive when there is no bind clause. + // If any of the directives that push regions here are parents of 'loop', + // assume 'parallel'. Otherwise do nothing. + if
[llvm-branch-commits] [clang] [clang][OpenMP] Implement `isOpenMPCapturingDirective` (PR #97090)
@@ -709,10 +709,44 @@ bool clang::isOpenMPExecutableDirective(OpenMPDirectiveKind DKind) { return Cat == Category::Executable || Cat == Category::Subsidiary; } +bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) { + if (isOpenMPExecutableDirective(DKind)) { +switch (DKind) { +case OMPD_atomic: +case OMPD_barrier: +case OMPD_cancel: +case OMPD_cancellation_point: +case OMPD_critical: +case OMPD_depobj: +case OMPD_error: +case OMPD_flush: +case OMPD_masked: +case OMPD_master: +case OMPD_section: +case OMPD_taskwait: +case OMPD_taskyield: + return false; +default: + return !isOpenMPLoopTransformationDirective(DKind); +} + } + // Non-executable directives. + switch (DKind) { + case OMPD_metadirective: + case OMPD_nothing: +return true; + default: +break; + } + return false; +} + void clang::getOpenMPCaptureRegions( SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind) { assert(unsigned(DKind) < llvm::omp::Directive_enumSize); + assert(isOpenMPCapturingDirective(DKind)); alexey-bataev wrote: Add assertion message https://github.com/llvm/llvm-project/pull/97090 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -342,6 +350,108 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { return false; } +uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { + uint64_t MatchedWithNameSimilarity = 0; + ItaniumPartialDemangler ItaniumPartialDemangler; + + // Demangle and derive namespace from function name. + auto DemangleName = [&](std::string &FunctionName) { +StringRef RestoredName = NameResolver::restore(FunctionName); +return demangle(RestoredName); + }; + auto DeriveNameSpace = [&](std::string &DemangledName) { +if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) + return std::string(""); +std::vector Buffer(DemangledName.begin(), DemangledName.end()); +size_t BufferSize = Buffer.size(); +char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( +&Buffer[0], &BufferSize); +return NameSpace ? std::string(NameSpace) : std::string(""); + }; + + // Maps namespaces to associated function block counts and gets profile + // function names and namespaces to minimize the number of BFs to process and + // avoid repeated name demangling/namespace derivision. + StringMap> +NamespaceToProfiledBFSizes; + std::vector ProfileBFDemangledNames; + ProfileBFDemangledNames.reserve(YamlBP.Functions.size()); + std::vector ProfiledBFNamespaces; + ProfiledBFNamespaces.reserve(YamlBP.Functions.size()); + + for (auto &YamlBF : YamlBP.Functions) { +std::string YamlBFDemangledName = DemangleName(YamlBF.Name); +ProfileBFDemangledNames.push_back(YamlBFDemangledName); +std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName); +ProfiledBFNamespaces.push_back(YamlBFNamespace); +NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks); + } + + StringMap> + NamespaceToBFs; maksfb wrote: nit: formatting looks off. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match functions with name similarity (PR #95884)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match functions with calls as anchors (PR #96596)
https://github.com/shawbyoung edited https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match functions with name similarity (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [BOLT] Match functions with name similarity (PR #95884)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884 >From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:42:00 -0700 Subject: [PATCH 1/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 73 -- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 66cabc236f4b2..c9f6d88f0b13a 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses name similarity to match functions that were not matched by name. uint64_t MatchedWithDemangledName = 0; - if (opts::NameSimilarityFunctionMatchingThreshold > 0) { - -std::unordered_map NameToBinaryFunction; -NameToBinaryFunction.reserve(BC.getBinaryFunctions().size()); -for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +auto DemangleName = [&](const char* String) { int Status = 0; - char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(), + char *DemangledName = abi::__cxa_demangle(String, nullptr, nullptr, &Status); - if (Status == 0) -NameToBinaryFunction[std::string(DemangledName)] = &BF; + return Status == 0 ? new std::string(DemangledName) : nullptr; +}; + +auto DeriveNameSpace = [&](std::string DemangledName) { + size_t LParen = std::string(DemangledName).find("("); + std::string FunctionName = std::string(DemangledName).substr(0, LParen); + size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::"); + return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator); +}; + +std::unordered_map> NamespaceToBFs; +NamespaceToBFs.reserve(BC.getBinaryFunctions().size()); + +for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { + std::string* DemangledName = DemangleName(BF->getOneName().str().c_str()); + if (!DemangledName) +continue; + std::string Namespace = DeriveNameSpace(*DemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) +NamespaceToBFs[Namespace] = {BF}; + else +It->second.push_back(BF); } for (auto YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; - int Status = 0; - char *DemangledName = - abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status); - if (Status != 0) + std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str()); + if (!YamlBFDemangledName) continue; - auto It = NameToBinaryFunction.find(DemangledName); - if (It == NameToBinaryFunction.end()) + std::string Namespace = DeriveNameSpace(*YamlBFDemangledName); + auto It = NamespaceToBFs.find(Namespace); + if (It == NamespaceToBFs.end()) continue; - BinaryFunction *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - ++MatchedWithDemangledName; + std::vector BFs = It->second; + + unsigned MinEditDistance = UINT_MAX; + BinaryFunction *ClosestNameBF = nullptr; + + for (BinaryFunction *BF : BFs) { +if (ProfiledFunctions.count(BF)) + continue; +std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str()); +if (!BFDemangledName) + continue; +unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName); +if (BFEditDistance < MinEditDistance) { + MinEditDistance = BFEditDistance; + ClosestNameBF = BF; +} + } + + if (ClosestNameBF && +MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) { +matchProfileToFunction(YamlBF, *ClosestNameBF); +++MatchedWithDemangledName; + } } } + outs() << MatchedWithDemangledName << ": functions matched by name similarity\n"; + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name >From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Thu, 20 Jun 2024 23:45:27 -0700 Subject: [PATCH 2/7] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index c9f6d88f0b13a..cf4a5393df8f4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -491,8 +491,6 @@ Error YAMLProfileReader::read
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
https://github.com/MaskRay edited https://github.com/llvm/llvm-project/pull/96282 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/96282 >From a341e03cb6376d50a4fa219933d3f161e41a567a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 27 Jun 2024 14:44:02 -0700 Subject: [PATCH] move seed inside #if Created using spr 1.3.5-bogner --- llvm/include/llvm/ADT/Hashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 397109880bb02..177fb0318bf80 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -311,11 +311,11 @@ struct hash_state { /// hash values. On platforms without ASLR, this is still likely /// non-deterministic per build. inline uint64_t get_execution_seed() { - [[maybe_unused]] static const char seed = 0; // Work around x86-64 negative offset folding for old Clang -fno-pic // https://reviews.llvm.org/D93931 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && \ (!defined(__clang__) || __clang_major__ > 11) + static const char seed = 0; return static_cast(reinterpret_cast(&seed)); #else return 0xff51afd7ed558ccdULL; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/96282 >From a341e03cb6376d50a4fa219933d3f161e41a567a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 27 Jun 2024 14:44:02 -0700 Subject: [PATCH] move seed inside #if Created using spr 1.3.5-bogner --- llvm/include/llvm/ADT/Hashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 397109880bb02..177fb0318bf80 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -311,11 +311,11 @@ struct hash_state { /// hash values. On platforms without ASLR, this is still likely /// non-deterministic per build. inline uint64_t get_execution_seed() { - [[maybe_unused]] static const char seed = 0; // Work around x86-64 negative offset folding for old Clang -fno-pic // https://reviews.llvm.org/D93931 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && \ (!defined(__clang__) || __clang_major__ > 11) + static const char seed = 0; return static_cast(reinterpret_cast(&seed)); #else return 0xff51afd7ed558ccdULL; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
https://github.com/MaskRay edited https://github.com/llvm/llvm-project/pull/96282 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Hashing] Use a non-deterministic seed if LLVM_ENABLE_ABI_BREAKING_CHECKS (PR #96282)
https://github.com/MaskRay edited https://github.com/llvm/llvm-project/pull/96282 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] e277504 - Revert "[IR] Don't include Module.h in Analysis.h (NFC) (#97023)"
Author: Zijun Date: 2024-06-29T00:14:49Z New Revision: e277504246c0f077fec20e917c29ac8e5c193325 URL: https://github.com/llvm/llvm-project/commit/e277504246c0f077fec20e917c29ac8e5c193325 DIFF: https://github.com/llvm/llvm-project/commit/e277504246c0f077fec20e917c29ac8e5c193325.diff LOG: Revert "[IR] Don't include Module.h in Analysis.h (NFC) (#97023)" This reverts commit 4169338e75cdce73d34063532db598c95ee82ae4. Added: Modified: clang/lib/Interpreter/DeviceOffload.cpp llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/VectorUtils.h llvm/include/llvm/IR/Analysis.h llvm/include/llvm/Transforms/IPO/Attributor.h llvm/include/llvm/Transforms/Utils/Debugify.h llvm/lib/Analysis/CallPrinter.cpp llvm/lib/Analysis/InlineAdvisor.cpp llvm/lib/Analysis/LazyValueInfo.cpp llvm/lib/Analysis/LoopInfo.cpp llvm/lib/Analysis/LoopPass.cpp llvm/lib/Analysis/MLInlineAdvisor.cpp llvm/lib/Analysis/StructuralHash.cpp llvm/lib/Analysis/TargetLibraryInfo.cpp llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp llvm/lib/CodeGen/AsmPrinter/AIXException.cpp llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp llvm/lib/CodeGen/CFGuardLongjmp.cpp llvm/lib/CodeGen/EHContGuardCatchret.cpp llvm/lib/CodeGen/GCMetadata.cpp llvm/lib/CodeGen/KCFI.cpp llvm/lib/CodeGen/MachineCheckDebugify.cpp llvm/lib/CodeGen/MachineFunctionPass.cpp llvm/lib/CodeGen/MachineModuleSlotTracker.cpp llvm/lib/CodeGen/RegAllocBase.cpp llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm/lib/CodeGen/WinEHPrepare.cpp llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp llvm/lib/IR/PassManager.cpp llvm/lib/LTO/UpdateCompilerUsed.cpp llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp llvm/lib/Target/AArch64/AArch64FastISel.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/lib/Target/ARM/ARMInstrInfo.cpp llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp llvm/lib/Target/ARM/Thumb1InstrInfo.cpp llvm/lib/Target/ARM/Thumb2InstrInfo.cpp llvm/lib/Target/AVR/AVRAsmPrinter.cpp llvm/lib/Target/BPF/BPFAsmPrinter.cpp llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp llvm/lib/Target/BPF/BTFDebug.cpp llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp llvm/lib/Target/Mips/MipsAsmPrinter.cpp llvm/lib/Target/Mips/MipsISelLowering.cpp llvm/lib/Target/PowerPC/PPCMCInstLower.cpp llvm/lib/Target/PowerPC/PPCSubtarget.cpp llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp llvm/lib/Target/RISCV/RISCVInstrInfo.cpp llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm/lib/Target/Target.cpp llvm/lib/Target/TargetMachine.cpp llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp llvm/lib/Target/X86/X86FrameLowering.cpp llvm/lib/Target/X86/X86ISelDAGToDAG.cpp llvm/lib/Target/X86/X86IndirectBranchTracking.cpp llvm/lib/Target/X86/X86InsertPrefetch.cpp llvm/lib/Target/X86/X86InstrInfo.cpp llvm/lib/Target/X86/X86PreTileConfig.cpp llvm/lib/Target/X86/X86ReturnThunks.cpp llvm/lib/Target/X86/X86Subtarget.cpp llvm/lib/Transforms/IPO/CalledValuePropagation.cpp llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp llvm/lib/Transforms/Instrumentation/PGOForceFunctionAttrs.cpp llvm/lib/Transforms/Scalar/ConstantHoisting.cpp llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp llvm/lib/Transforms/Utils/DemoteRegToStack.cpp llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp llvm/unittests/Analysis/DDGTest.cpp llvm/unittests/Analysis/GlobalsModRefTest.cpp llvm/unittests/Analysis/IVDescriptorsTest.cpp llvm/unittests/Analysis/LoopInfoTest.cpp llvm/unittests/Analysis/LoopNestTest.cpp llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp llvm/unittests/CodeGen/AMDGPUMetadataTest.cpp llvm/unittests/CodeGen/MLRegAllocDevelopmentFeatures.cpp llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp llvm/unittests/MI/LiveIntervalTest.cpp llvm/unittests/MIR/MachineMetadata.cpp llvm/unittests/Target/AArch64/InstSizes.cpp llvm/unittests/Target/ARM/InstSizes.cpp
[llvm-branch-commits] [llvm] 8215dfd - Revert "[Memprof] Reduce test binary sizes for memprof tests. (#97114)"
Author: Vitaly Buka Date: 2024-06-28T23:07:25-07:00 New Revision: 8215dfd0f8c95f774a01229b2a4430b9657910b9 URL: https://github.com/llvm/llvm-project/commit/8215dfd0f8c95f774a01229b2a4430b9657910b9 DIFF: https://github.com/llvm/llvm-project/commit/8215dfd0f8c95f774a01229b2a4430b9657910b9.diff LOG: Revert "[Memprof] Reduce test binary sizes for memprof tests. (#97114)" This reverts commit b6ba10c6d4455a237ff2e65772d332821721cdf3. Added: Modified: llvm/test/Transforms/PGOProfile/Inputs/memprof.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw Removed: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe index 710e49ce7ec89..361354d7d0a3a 100755 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw index 255f7012e333d..1ff4352a07d1f 100644 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe index c24a0fdbb0e95..e9e6897a4428e 100755 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw index 8886204350470..1ff4f1d9a5c01 100644 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe index 4d7c54b14ed42..c9f81fc911151 100755 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw index a4c5ad8b53d64..c496a134bf3ce 100644 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe index 4de044a97a21c..d555a8cea0ad7 100755 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw index 20ad99db1f081..923d309a0e560 100644 Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw diff er diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe b/llvm/test/Trans