[llvm-branch-commits] [llvm] [BOLT][docs] Expand Heatmaps.md (PR #98162)
paschalis-mpeis wrote: Thanks for your review @aaupov . This is a stacked pull request as the updated docs refer to the `-print-mappings` option. So it will be merged after: - #97567 https://github.com/llvm/llvm-project/pull/98162 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][BOLT] Rename createDummyReturnFunction to createReturnBody (PR #98448)
https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/98448 >From 2e9d663a9a164735fbe1a2408994acc1abaa8c21 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Thu, 11 Jul 2024 09:32:12 +0100 Subject: [PATCH] [NFC][BOLT] Rename createDummyReturnFunction to createReturnInstructionList createDummyReturnFunction is not creating a function but instead only a function body that is simply a return statement. This patch renames it to: createReturnInstructionList --- bolt/include/bolt/Core/MCPlusBuilder.h | 3 ++- bolt/lib/Passes/Instrumentation.cpp| 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 885d627f7b64f..c916c6f95751f 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -2044,7 +2044,8 @@ class MCPlusBuilder { /// Returns a function body that contains only a return instruction. An /// example usage is a workaround for the '__bolt_fini_trampoline' of // Instrumentation. - virtual InstructionListType createDummyReturnFunction(MCContext *Ctx) const { + virtual InstructionListType + createReturnInstructionList(MCContext *Ctx) const { InstructionListType Insts(1); createReturn(Insts[0]); return Insts; diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp index e824a42d82696..ebb3925749b4d 100644 --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -754,7 +754,7 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { // with unknown symbol in runtime library. E.g. for static PIE // executable createSimpleFunction("__bolt_fini_trampoline", - BC.MIB->createDummyReturnFunction(BC.Ctx.get())); + BC.MIB->createReturnInstructionList(BC.Ctx.get())); } } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][BOLT] Rename createDummyReturnFunction to createReturnBody (PR #98448)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/98448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NFC][BOLT] Rename createDummyReturnFunction to createReturnInstructionList (PR #98448)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/98448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NFC][BOLT] Rename createDummyReturnFunction to createReturnInstructi.. (PR #98448)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/98448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NFC][BOLT] Rename createDummyReturnFunction to createReturnInstructi.. (PR #98448)
paschalis-mpeis wrote: Rebased on top of the updated #96626 Thanks for suggesting `createReturnInstructionList`; I think it's better so I used it. https://github.com/llvm/llvm-project/pull/98448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 017a499 - Revert "[flang] Adjust semantics of the char length of an array constructor (…"
Author: David Spickett Date: 2024-07-12T11:34:32+01:00 New Revision: 017a49990189cc9513ff088eebbeeb1f54e3ff93 URL: https://github.com/llvm/llvm-project/commit/017a49990189cc9513ff088eebbeeb1f54e3ff93 DIFF: https://github.com/llvm/llvm-project/commit/017a49990189cc9513ff088eebbeeb1f54e3ff93.diff LOG: Revert "[flang] Adjust semantics of the char length of an array constructor (…" This reverts commit 2d7e136fc0e31207b962397250bc1581203c8d59. Added: Modified: flang/lib/Semantics/expression.cpp flang/test/Lower/HLFIR/array-ctor-character.f90 flang/test/Semantics/array-constr-len.f90 Removed: diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 844ffd44604ba..90900d9acb6ad 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1584,8 +1584,7 @@ class ArrayConstructorContext { std::optional> LengthIfGood() const { if (type_) { auto len{type_->LEN()}; - if (explicitType_ || - (len && IsConstantExpr(*len) && !ContainsAnyImpliedDoIndex(*len))) { + if (len && IsConstantExpr(*len) && !ContainsAnyImpliedDoIndex(*len)) { return len; } } diff --git a/flang/test/Lower/HLFIR/array-ctor-character.f90 b/flang/test/Lower/HLFIR/array-ctor-character.f90 index 881085b370ffe..85e6ed27fe077 100644 --- a/flang/test/Lower/HLFIR/array-ctor-character.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-character.f90 @@ -93,11 +93,10 @@ subroutine test_set_length_sanitize(i, c1) call takes_char([character(len=i):: c1]) end subroutine ! CHECK-LABEL: func.func @_QPtest_set_length_sanitize( -! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare {{.*}}Ec1 -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %arg0 -! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref -! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare {{.*}}Ec1 +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare {{.*}}Ei +! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref ! CHECK: %[[VAL_26:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_27:.*]] = arith.cmpi sgt, %[[VAL_25]], %[[VAL_26]] : i64 ! CHECK: %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_25]], %[[VAL_26]] : i64 -! CHECK: %[[VAL_29:.*]] = hlfir.set_length %[[VAL_2]]#0 len %[[VAL_28]] : (!fir.boxchar<1>, i64) -> !hlfir.expr> +! CHECK: %[[VAL_29:.*]] = hlfir.set_length %[[VAL_6]]#0 len %[[VAL_28]] : (!fir.boxchar<1>, i64) -> !hlfir.expr> diff --git a/flang/test/Semantics/array-constr-len.f90 b/flang/test/Semantics/array-constr-len.f90 index 4de9c76c7041c..a785c9e2ece6d 100644 --- a/flang/test/Semantics/array-constr-len.f90 +++ b/flang/test/Semantics/array-constr-len.f90 @@ -10,5 +10,6 @@ subroutine subr(s,n) print *, [(s(1:j),j=1,0)] print *, [(s(1:1),j=1,0)] ! ok print *, [character(2)::(s(1:n),j=1,0)] ! ok + !ERROR: Array constructor implied DO loop has no iterations and indeterminate character length print *, [character(n)::(s(1:n),j=1,0)] end ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] f1a5264 - Revert "[clang] Catch missing format attributes (#70024)"
Author: Aaron Ballman Date: 2024-07-12T06:57:49-04:00 New Revision: f1a52644e84021dded1fa5e58401cf23b8c519e6 URL: https://github.com/llvm/llvm-project/commit/f1a52644e84021dded1fa5e58401cf23b8c519e6 DIFF: https://github.com/llvm/llvm-project/commit/f1a52644e84021dded1fa5e58401cf23b8c519e6.diff LOG: Revert "[clang] Catch missing format attributes (#70024)" This reverts commit 70f57d25743ca7230bcad3cae7e3072f0aded6f7. Added: Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Attr.h clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaDeclAttr.cpp Removed: clang/test/Sema/attr-format-missing.c clang/test/Sema/attr-format-missing.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index afcd3c655f0f6..781fc8ab1de1e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -716,9 +716,6 @@ Improvements to Clang's diagnostics - Clang now diagnoses integer constant expressions that are folded to a constant value as an extension in more circumstances. Fixes #GH59863 -- Clang now diagnoses missing format attributes for non-template functions and - class/struct/union members. Fixes #GH60718 - Improvements to Clang's time-trace -- diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index da6a3b2fe3571..2241f8481484e 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -525,6 +525,7 @@ def MainReturnType : DiagGroup<"main-return-type">; def MaxUnsignedZero : DiagGroup<"max-unsigned-zero">; def MissingBraces : DiagGroup<"missing-braces">; def MissingDeclarations: DiagGroup<"missing-declarations">; +def : DiagGroup<"missing-format-attribute">; def MissingIncludeDirs : DiagGroup<"missing-include-dirs">; def MissingNoreturn : DiagGroup<"missing-noreturn">; def MultiChar : DiagGroup<"multichar">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f5c18edb65217..0ea3677355169 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1031,9 +1031,6 @@ def err_opencl_invalid_param : Error< def err_opencl_invalid_return : Error< "declaring function return value of type %0 is not allowed %select{; did you forget * ?|}1">; def warn_enum_value_overflow : Warning<"overflow in enumeration value">; -def warn_missing_format_attribute : Warning< - "diagnostic behavior may be improved by adding the %0 format attribute to the declaration of %1">, - InGroup>, DefaultIgnore; def warn_pragma_options_align_reset_failed : Warning< "#pragma options align=reset failed: %0">, InGroup; diff --git a/clang/include/clang/Sema/Attr.h b/clang/include/clang/Sema/Attr.h index 37c124ca7b454..3f0b10212789a 100644 --- a/clang/include/clang/Sema/Attr.h +++ b/clang/include/clang/Sema/Attr.h @@ -123,13 +123,6 @@ inline bool isInstanceMethod(const Decl *D) { return false; } -inline bool checkIfMethodHasImplicitObjectParameter(const Decl *D) { - if (const auto *MethodDecl = dyn_cast(D)) -return MethodDecl->isInstance() && - !MethodDecl->hasCXXExplicitFunctionObjectParameter(); - return false; -} - /// Diagnose mutually exclusive attributes when present on a given /// declaration. Returns true if diagnosed. template diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e30252749d2c3..6be6f6725e5b7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4594,10 +4594,6 @@ class Sema final : public SemaBase { enum class RetainOwnershipKind { NS, CF, OS }; - void DiagnoseMissingFormatAttributes(Stmt *Body, const FunctionDecl *FDecl); - std::vector - GetMissingFormatAttributes(Stmt *Body, const FunctionDecl *FDecl); - UuidAttr *mergeUuidAttr(Decl *D, const AttributeCommonInfo &CI, StringRef UuidAsWritten, MSGuidDecl *GuidDecl); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0039df2a21bc6..80b5a8cd4bae6 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -15934,8 +15934,6 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, } } - DiagnoseMissingFormatAttributes(Body, FD); - // We might not have found a prototype because we didn't wish to warn on // the lack of a missing prototype. Try again without the checks for // whether we want to warn on the missing prototype. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5166e61bb41d4..f2cd46d1e7c93 100644 --- a/clang/lib/Sema
[llvm-branch-commits] [clang] [clang][OpenMP] Update validity check for reduction with `inscan` (PR #98500)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/98500 >From b58e4fae4b0c22c11e8a0d0e29b6f21c28d4da86 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 11 Jul 2024 10:47:41 -0500 Subject: [PATCH] [clang][OpenMP] Update validity check for reduction with `inscan` Follow-up to 81cdf9472c (check for `scan` nesting). Also, it eliminates the mentions of combined directives in `ActOnOpenMPReductionClause`. --- clang/lib/Sema/SemaOpenMP.cpp | 24 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7ca89b0d4eb70..dc77a51b6569e 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -18603,14 +18603,22 @@ OMPClause *SemaOpenMP::ActOnOpenMPReductionClause( // worksharing-loop construct, a worksharing-loop SIMD construct, a simd // construct, a parallel worksharing-loop construct or a parallel // worksharing-loop SIMD construct. - if (Modifier == OMPC_REDUCTION_inscan && - (DSAStack->getCurrentDirective() != OMPD_for && - DSAStack->getCurrentDirective() != OMPD_for_simd && - DSAStack->getCurrentDirective() != OMPD_simd && - DSAStack->getCurrentDirective() != OMPD_parallel_for && - DSAStack->getCurrentDirective() != OMPD_parallel_for_simd)) { -Diag(ModifierLoc, diag::err_omp_wrong_inscan_reduction); -return nullptr; + // [5.2:136:1-4] A reduction clause with the inscan reduction-modifier may + // only appear on a worksharing-loop construct, a simd construct or a + // combined or composite construct for which any of the aforementioned + // constructs is a constituent construct and distribute is not a constituent + // construct. + if (Modifier == OMPC_REDUCTION_inscan) { +SmallVector LeafOrComposite; +ArrayRef CurrentLOC = getLeafOrCompositeConstructs( +DSAStack->getCurrentDirective(), LeafOrComposite); +bool Valid = llvm::any_of(CurrentLOC, [](OpenMPDirectiveKind DK) { + return llvm::is_contained({OMPD_for, OMPD_simd, OMPD_for_simd}, DK); +}); +if (!Valid) { + Diag(ModifierLoc, diag::err_omp_wrong_inscan_reduction); + return nullptr; +} } ReductionData RD(VarList.size(), Modifier); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/98633 …lization` Replace the explicit list of compound directives ending with taskloop with checking for the last leaf construct. >From 08ddba05e36c6c5b6fa5b4b158fbef21b77ab415 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 11 Jul 2024 14:02:41 -0500 Subject: [PATCH] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitialization` Replace the explicit list of compound directives ending with taskloop with checking for the last leaf construct. --- clang/lib/Sema/SemaOpenMP.cpp | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2340873c8fdd9..118cda253a437 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9090,14 +9090,15 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; + auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { +return getLeafConstructsOrSelf(DK).back() == OMPD_taskloop; + }; if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != PredeterminedCKind && DVar.RefExpr && (getLangOpts().OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || - ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || - DKind == OMPD_master_taskloop || DKind == OMPD_masked_taskloop || - DKind == OMPD_parallel_master_taskloop || - DKind == OMPD_parallel_masked_taskloop || + ((isOpenMPWorksharingDirective(DKind) || + isOpenMPTaskloopDirective(DKind) || isOpenMPDistributeDirective(DKind)) && !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Krzysztof Parzyszek (kparzysz) Changes …lization` Replace the explicit list of compound directives ending with taskloop with checking for the last leaf construct. --- Full diff: https://github.com/llvm/llvm-project/pull/98633.diff 1 Files Affected: - (modified) clang/lib/Sema/SemaOpenMP.cpp (+5-4) ``diff diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2340873c8fdd9..118cda253a437 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9090,14 +9090,15 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; + auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { +return getLeafConstructsOrSelf(DK).back() == OMPD_taskloop; + }; if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != PredeterminedCKind && DVar.RefExpr && (getLangOpts().OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || - ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || - DKind == OMPD_master_taskloop || DKind == OMPD_masked_taskloop || - DKind == OMPD_parallel_master_taskloop || - DKind == OMPD_parallel_masked_taskloop || + ((isOpenMPWorksharingDirective(DKind) || + isOpenMPTaskloopDirective(DKind) || isOpenMPDistributeDirective(DKind)) && !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && `` https://github.com/llvm/llvm-project/pull/98633 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
@@ -9090,14 +9090,15 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; + auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { alexey-bataev wrote: IsOpenMPTaskloopDirective https://github.com/llvm/llvm-project/pull/98633 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/98633 >From 08ddba05e36c6c5b6fa5b4b158fbef21b77ab415 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 11 Jul 2024 14:02:41 -0500 Subject: [PATCH 1/2] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitialization` Replace the explicit list of compound directives ending with taskloop with checking for the last leaf construct. --- clang/lib/Sema/SemaOpenMP.cpp | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2340873c8fdd9..118cda253a437 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9090,14 +9090,15 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; + auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { +return getLeafConstructsOrSelf(DK).back() == OMPD_taskloop; + }; if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != PredeterminedCKind && DVar.RefExpr && (getLangOpts().OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || - ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || - DKind == OMPD_master_taskloop || DKind == OMPD_masked_taskloop || - DKind == OMPD_parallel_master_taskloop || - DKind == OMPD_parallel_masked_taskloop || + ((isOpenMPWorksharingDirective(DKind) || + isOpenMPTaskloopDirective(DKind) || isOpenMPDistributeDirective(DKind)) && !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && >From f38af28828a08daf3ba3a7f70104c38ccc6e4732 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 12 Jul 2024 08:58:28 -0500 Subject: [PATCH 2/2] Capitalize IsOpenMPTaskloopDirective --- clang/lib/Sema/SemaOpenMP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 118cda253a437..c1a58fc7f2076 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9090,7 +9090,7 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; - auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { + auto IsOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { return getLeafConstructsOrSelf(DK).back() == OMPD_taskloop; }; if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && @@ -9098,7 +9098,7 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, (getLangOpts().OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || ((isOpenMPWorksharingDirective(DKind) || - isOpenMPTaskloopDirective(DKind) || + IsOpenMPTaskloopDirective(DKind) || isOpenMPDistributeDirective(DKind)) && !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
@@ -9090,14 +9090,15 @@ void SemaOpenMP::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, isOpenMPSimdDirective(DKind) ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) : OMPC_private; + auto isOpenMPTaskloopDirective = [](OpenMPDirectiveKind DK) { kparzysz wrote: Done https://github.com/llvm/llvm-project/pull/98633 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Simplify check for taskloop in `ActOnOpenMPLoopInitia… (PR #98633)
https://github.com/alexey-bataev approved this pull request. LG https://github.com/llvm/llvm-project/pull/98633 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Fixed false positive when accessing offset member variables (PR #95387)
https://github.com/gbMattN updated https://github.com/llvm/llvm-project/pull/95387 >From 8099113d68bd7c47c29f635bb10a048ddb99833b Mon Sep 17 00:00:00 2001 From: Matthew Nagy Date: Fri, 28 Jun 2024 16:12:31 + Subject: [PATCH] [TySan] Fixed false positive when accessing global object's member variables --- compiler-rt/lib/tysan/tysan.cpp | 19 +++- .../test/tysan/global-struct-members.c| 31 +++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/test/tysan/global-struct-members.c diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index f627851d049e6..8235b0ec2b55e 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -221,7 +221,24 @@ __tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) { OldTDPtr -= i; OldTD = *OldTDPtr; -if (!isAliasingLegal(td, OldTD)) +// When shadow memory is set for global objects, the entire object is tagged with the struct type +// This means that when you access a member variable, tysan reads that as you accessing a struct midway +// through, with 'i' being the offset +// Therefore, if you are accessing a struct, we need to find the member type. We can go through the +// members of the struct type and see if there is a member at the offset you are accessing the struct by. +// If there is indeed a member starting at offset 'i' in the struct, we should check aliasing legality +// with that type. If there isn't, we run alias checking on the struct with will give us the correct error. +tysan_type_descriptor *InternalMember = OldTD; +if (OldTD->Tag == TYSAN_STRUCT_TD) { + for (int j = 0; j < OldTD->Struct.MemberCount; j++) { +if (OldTD->Struct.Members[j].Offset == i) { + InternalMember = OldTD->Struct.Members[j].Type; + break; +} + } +} + +if (!isAliasingLegal(td, InternalMember)) reportError(addr, size, td, OldTD, AccessStr, "accesses part of an existing object", -i, pc, bp, sp); diff --git a/compiler-rt/test/tysan/global-struct-members.c b/compiler-rt/test/tysan/global-struct-members.c new file mode 100644 index 0..76ea3c431dd7b --- /dev/null +++ b/compiler-rt/test/tysan/global-struct-members.c @@ -0,0 +1,31 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +struct X { + int a, b, c; +} x; + +static struct X xArray[2]; + +int main() { + x.a = 1; + x.b = 2; + x.c = 3; + + printf("%d %d %d\n", x.a, x.b, x.c); + // CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation + + for (size_t i = 0; i < 2; i++) { +xArray[i].a = 1; +xArray[i].b = 1; +xArray[i].c = 1; + } + + struct X *xPtr = (struct X *)&(xArray[0].c); + xPtr->a = 1; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in X at offset 0) accesses an existing object of type int (in X at offset 8) + // CHECK: {{#0 0x.* in main .*struct-members.c:}}[[@LINE-3]] +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [TySan] Improved compatability for tests (PR #96507)
gbMattN wrote: @fhahn updated to remove merge conflicts https://github.com/llvm/llvm-project/pull/96507 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] 6ea50a9 - Revert "[libc] Add erase function to blockstore (#97641)"
Author: Michael Jones Date: 2024-07-12T10:50:56-07:00 New Revision: 6ea50a98e3f6db5c0fbd3ff6c16e2d9ac01fae37 URL: https://github.com/llvm/llvm-project/commit/6ea50a98e3f6db5c0fbd3ff6c16e2d9ac01fae37 DIFF: https://github.com/llvm/llvm-project/commit/6ea50a98e3f6db5c0fbd3ff6c16e2d9ac01fae37.diff LOG: Revert "[libc] Add erase function to blockstore (#97641)" This reverts commit 9e452c1af430e5090de92aebfe422abbf5e51ff3. Added: Modified: libc/src/__support/blockstore.h libc/test/src/__support/blockstore_test.cpp Removed: diff --git a/libc/src/__support/blockstore.h b/libc/src/__support/blockstore.h index 41f5f196b7cef..8d13e0ed290df 100644 --- a/libc/src/__support/blockstore.h +++ b/libc/src/__support/blockstore.h @@ -9,11 +9,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_BLOCKSTORE_H #define LLVM_LIBC_SRC___SUPPORT_BLOCKSTORE_H -#include "src/__support/CPP/array.h" -#include "src/__support/CPP/new.h" -#include "src/__support/CPP/type_traits.h" -#include "src/__support/libc_assert.h" #include "src/__support/macros/config.h" +#include +#include #include #include @@ -100,16 +98,6 @@ class BlockStore { return *reinterpret_cast(block->data + sizeof(T) * true_index); } -LIBC_INLINE Iterator operator+(int i) { - LIBC_ASSERT(i >= 0 && - "BlockStore iterators only support incrementation."); - auto other = *this; - for (int j = 0; j < i; ++j) -++other; - - return other; -} - LIBC_INLINE bool operator==(const Iterator &rhs) const { return block == rhs.block && index == rhs.index; } @@ -188,45 +176,6 @@ class BlockStore { else return Iterator(current, fill_count); } - - // Removes the element at pos, then moves all the objects after back by one to - // fill the hole. It's assumed that pos is a valid iterator to somewhere in - // this block_store. - LIBC_INLINE void erase(Iterator pos) { -const Iterator last_item = Iterator(current, fill_count); -if (pos == last_item) { - pop_back(); - return; -} - -if constexpr (REVERSE_ORDER) { - // REVERSE: Iterate from begin to pos - const Iterator range_end = pos; - Iterator cur = begin(); - T prev_val = *cur; - ++cur; - T cur_val = *cur; - - for (; cur != range_end; ++cur) { -cur_val = *cur; -*cur = prev_val; -prev_val = cur_val; - } - // We will always need to move at least one item (since we know that pos - // isn't the last item due to the check above). - *cur = prev_val; -} else { - // FORWARD: Iterate from pos to end - const Iterator range_end = end(); - Iterator cur = pos; - Iterator prev = cur; - ++cur; - - for (; cur != range_end; prev = cur, ++cur) -*prev = *cur; -} -pop_back(); - } }; template diff --git a/libc/test/src/__support/blockstore_test.cpp b/libc/test/src/__support/blockstore_test.cpp index dd74ea18f2c02..5fe8fef1b6edc 100644 --- a/libc/test/src/__support/blockstore_test.cpp +++ b/libc/test/src/__support/blockstore_test.cpp @@ -64,99 +64,6 @@ class LlvmLibcBlockStoreTest : public LIBC_NAMESPACE::testing::Test { } block_store.destroy(&block_store); } - - template void erase_test() { -using LIBC_NAMESPACE::BlockStore; -BlockStore block_store; -int i; - -constexpr int ARR_SIZE = 6; - -ASSERT_TRUE(block_store.empty()); -for (int i = 0; i < ARR_SIZE; i++) { - ASSERT_TRUE(block_store.push_back(i + 1)); -} - -// block_store state should be {1,2,3,4,5,6} - -block_store.erase(block_store.begin()); - -// FORWARD: block_store state should be {2,3,4,5,6} -// REVERSE: block_store state should be {1,2,3,4,5} - -auto iter = block_store.begin(); -for (i = 0; iter != block_store.end(); ++i, ++iter) { - if (!REVERSE) { -ASSERT_EQ(*iter, i + 2); - } else { -ASSERT_EQ(*iter, (ARR_SIZE - 1) - i); - } -} - -// Assert that there were the correct number of elements -ASSERT_EQ(i, ARR_SIZE - 1); - -block_store.erase(block_store.end()); - -// BOTH: block_store state should be {2,3,4,5} - -iter = block_store.begin(); -for (i = 0; iter != block_store.end(); ++i, ++iter) { - if (!REVERSE) { -ASSERT_EQ(*iter, i + 2); - } else { -ASSERT_EQ(*iter, (ARR_SIZE - 1) - i); - } -} - -ASSERT_EQ(i, ARR_SIZE - 2); - -block_store.erase(block_store.begin() + 1); - -// FORWARD: block_store state should be {2,4,5} -// REVERSE: block_store state should be {2,3,5} - -const int FORWARD_RESULTS[] = {2, 4, 5}; -const int REVERSE_RESULTS[] = {2, 3, 5}; - -iter = block_store.begin(); -for (i = 0; iter != block_store.end(); ++i, ++iter) { - if (!REVERSE) { -ASSERT_EQ(*iter, FORWARD_RESULTS[i]); - } else { -ASSERT_EQ(*i
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (PR #96872)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96872 >From e79b7f20e3a78de7c09c96e0b8f76132c9942a09 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jun 2024 10:58:44 +0200 Subject: [PATCH 1/2] clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} Need to emit syncscope and new metadata to get the native instruction, most of the time. --- clang/lib/CodeGen/CGBuiltin.cpp | 39 +-- .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl| 2 +- .../builtins-fp-atomics-gfx12.cl | 4 +- .../builtins-fp-atomics-gfx90a.cl | 4 +- .../builtins-fp-atomics-gfx940.cl | 4 +- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6cc0d9485720c..15d5e80fcd402 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -58,6 +58,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18658,8 +18659,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: @@ -18671,18 +18670,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -19097,7 +19089,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19113,6 +19107,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19147,8 +19143,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { - // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. - SSID = llvm::SyncScope::System; + // Most of the builtins do not have syncscope/order arguments. For DS + // atomics the scope doesn't really matter, as they implicitly operate at + // workgroup scope. + // + // The global/flat cases need to use agent scope to consistently produce + // the native instruction instead of a cmpxchg expansion. + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::SequentiallyConsistent; // The v2bf16 builtin uses i16 instead of a natural bfloat type. @@ -19163,6 +19164,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); + +unsigned AddrSpace = Ptr.getType()->getAddressSpace(); +if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) { + // Most targets require "amdgpu.no.fine.grained.memory" to emit the nativ
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96873 >From 9a6ec14c6e2acdae6c5c5a50a0f1fd4847f4cd9a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:12:59 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 20 ++- .../builtins-fp-atomics-gfx12.cl | 9 ++--- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../builtins-fp-atomics-gfx940.cl | 3 ++- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3cf708da269b3..105566b584108 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18659,22 +18659,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -18694,11 +18687,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19091,7 +19079,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19109,6 +19099,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 6b8a6d14575db..07e63a8711c7f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_flat_add_2f16 // GFX12: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { @@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { } // CHECK-LABEL: test_global_add_half2 -// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_global_add_half2 // GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2, off
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From f124301ced6c21ab35a3af57c839e32e6b3ec269 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 105566b584108..91702246c9571 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18661,10 +18661,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18674,19 +18672,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19081,7 +19072,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19101,6 +19094,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079..02e289427238f 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d..bd9b8c7268e06 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef half _
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96875 >From fc87b903a7007710f97eae4b99b6d518c0ad5c81 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:34:43 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 26 ++- .../builtins-fp-atomics-gfx12.cl | 24 - .../builtins-fp-atomics-gfx90a.cl | 6 ++--- .../builtins-fp-atomics-gfx940.cl | 14 +++--- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 91702246c9571..6c97b1d1108a5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18685,22 +18685,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { -Intrinsic::ID IID; -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19074,7 +19058,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19096,6 +19082,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19140,7 +19128,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. - if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 07e63a8711c7f..e8b6eb57c38d7 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL: test_flat_add_2f
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96876 >From e0715016475eaaff55ef7143ea148c4a47e7f825 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 23:18:32 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +-- .../builtins-fp-atomics-gfx90a.cl | 18 ++ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6c97b1d1108a5..42d57671f9c76 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18659,32 +18659,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { -Intrinsic::ID IID; -llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = -CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19060,7 +19034,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19087,8 +19065,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: BinOp = llvm::AtomicRMWInst::FMin; break; +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 9381ce951df3e..556e553903d1a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr, doub
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/97050 >From f708d5e78f8ac415c2dce8057304f13bf3236b2b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Jun 2024 16:32:48 +0200 Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics These are now fully covered by atomicrmw. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 14 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - llvm/lib/Target/AMDGPU/FLATInstructions.td| 2 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/test/Bitcode/amdgcn-atomic.ll| 22 ++ .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 54 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 100 - llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 -- 11 files changed, 33 insertions(+), 368 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 71b1e832bde3c..9cf4d6352d23d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2907,10 +2907,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 53de9eef516b3..f566a0e3c3043 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48..db8b44149cf47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9e7694f41d6b8..74686cd10512c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4897,8 +4897,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_fl
[llvm-branch-commits] [clang] [clang][test] remove unused `run` overload in `BoundNodesCallback` (PR #94244)
https://github.com/5chmidti updated https://github.com/llvm/llvm-project/pull/94244 >From 51de6278688635cdcd5a8dd73a2ef0ede2a31b37 Mon Sep 17 00:00:00 2001 From: Julian Schmidt Date: Sat, 1 Jun 2024 17:49:13 +0200 Subject: [PATCH] [clang][test] remove unused `run` overload in `BoundNodesCallback` The overload that did not take the additional `ASTContext *` argument is unnecessary when the context could simply be commented out, as it is always passed to `run` from `VerifyMatcher::run`. This patch removes the single-argument overload in favor of having a single overload. --- clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp | 2 -- clang/unittests/ASTMatchers/ASTMatchersTest.h| 7 +-- clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp | 9 +++-- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 3295ad1e21455..ebf548eb25431 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2030,8 +2030,6 @@ TEST_P(ASTMatchersTest, template class VerifyAncestorHasChildIsEqual : public BoundNodesCallback { public: - bool run(const BoundNodes *Nodes) override { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(""); return verify(*Nodes, *Context, Node); diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h index e981299531574..ad2f5f355621c 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTest.h +++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h @@ -28,7 +28,6 @@ using clang::tooling::runToolOnCodeWithArgs; class BoundNodesCallback { public: virtual ~BoundNodesCallback() {} - virtual bool run(const BoundNodes *BoundNodes) = 0; virtual bool run(const BoundNodes *BoundNodes, ASTContext *Context) = 0; virtual void onEndOfTranslationUnit() {} }; @@ -403,7 +402,7 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { EXPECT_EQ("", Name); } - bool run(const BoundNodes *Nodes) override { + bool run(const BoundNodes *Nodes, ASTContext * /*Context*/) override { const BoundNodes::IDToNodeMap &M = Nodes->getMap(); if (Nodes->getNodeAs(Id)) { ++Count; @@ -426,10 +425,6 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { -return run(Nodes); - } - private: const std::string Id; const int ExpectedCount; diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 3c033638960de..97355d22dd72b 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -5641,7 +5641,6 @@ TEST(HasParent, MatchesAllParents) { TEST(HasParent, NoDuplicateParents) { class HasDuplicateParents : public BoundNodesCallback { public: -bool run(const BoundNodes *Nodes) override { return false; } bool run(const BoundNodes *Nodes, ASTContext *Context) override { const Stmt *Node = Nodes->getNodeAs("node"); std::set Parents; @@ -5850,16 +5849,14 @@ template class VerifyMatchOnNode : public BoundNodesCallback { public: VerifyMatchOnNode(StringRef Id, const internal::Matcher &InnerMatcher, StringRef InnerId) -: Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) { - } - - bool run(const BoundNodes *Nodes) override { return false; } + : Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) {} bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(Id); return selectFirst(InnerId, match(InnerMatcher, *Node, *Context)) != - nullptr; + nullptr; } + private: std::string Id; internal::Matcher InnerMatcher; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][test] remove unused `run` overload in `BoundNodesCallback` (PR #94244)
5chmidti wrote: rebase on trunk + rebased stack https://github.com/llvm/llvm-project/pull/94244 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][test] add testing for the AST matcher reference (PR #94248)
5chmidti wrote: rebase on trunk + rebased stack https://github.com/llvm/llvm-project/pull/94248 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][test] remove unused `run` overload in `BoundNodesCallback` (PR #94244)
https://github.com/5chmidti updated https://github.com/llvm/llvm-project/pull/94244 >From 26d5b0377af3df4c551ae16d053684bb8c75e233 Mon Sep 17 00:00:00 2001 From: Julian Schmidt Date: Sat, 1 Jun 2024 17:49:13 +0200 Subject: [PATCH] [clang][test] remove unused `run` overload in `BoundNodesCallback` The overload that did not take the additional `ASTContext *` argument is unnecessary when the context could simply be commented out, as it is always passed to `run` from `VerifyMatcher::run`. This patch removes the single-argument overload in favor of having a single overload. --- .../unittests/ASTMatchers/ASTMatchersNodeTest.cpp | 2 -- clang/unittests/ASTMatchers/ASTMatchersTest.h | 7 +-- .../ASTMatchers/ASTMatchersTraversalTest.cpp | 15 --- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 3295ad1e2145..ebf548eb2543 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2030,8 +2030,6 @@ TEST_P(ASTMatchersTest, template class VerifyAncestorHasChildIsEqual : public BoundNodesCallback { public: - bool run(const BoundNodes *Nodes) override { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(""); return verify(*Nodes, *Context, Node); diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h index e98129953157..ad2f5f355621 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTest.h +++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h @@ -28,7 +28,6 @@ using clang::tooling::runToolOnCodeWithArgs; class BoundNodesCallback { public: virtual ~BoundNodesCallback() {} - virtual bool run(const BoundNodes *BoundNodes) = 0; virtual bool run(const BoundNodes *BoundNodes, ASTContext *Context) = 0; virtual void onEndOfTranslationUnit() {} }; @@ -403,7 +402,7 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { EXPECT_EQ("", Name); } - bool run(const BoundNodes *Nodes) override { + bool run(const BoundNodes *Nodes, ASTContext * /*Context*/) override { const BoundNodes::IDToNodeMap &M = Nodes->getMap(); if (Nodes->getNodeAs(Id)) { ++Count; @@ -426,10 +425,6 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { -return run(Nodes); - } - private: const std::string Id; const int ExpectedCount; diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 3c033638960d..d887c5ba8f05 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -5641,7 +5641,6 @@ TEST(HasParent, MatchesAllParents) { TEST(HasParent, NoDuplicateParents) { class HasDuplicateParents : public BoundNodesCallback { public: -bool run(const BoundNodes *Nodes) override { return false; } bool run(const BoundNodes *Nodes, ASTContext *Context) override { const Stmt *Node = Nodes->getNodeAs("node"); std::set Parents; @@ -5850,16 +5849,14 @@ template class VerifyMatchOnNode : public BoundNodesCallback { public: VerifyMatchOnNode(StringRef Id, const internal::Matcher &InnerMatcher, StringRef InnerId) -: Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) { - } - - bool run(const BoundNodes *Nodes) override { return false; } + : Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) {} bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(Id); return selectFirst(InnerId, match(InnerMatcher, *Node, *Context)) != - nullptr; + nullptr; } + private: std::string Id; internal::Matcher InnerMatcher; @@ -6053,7 +6050,7 @@ namespace { class ForCallablePreservesBindingWithMultipleParentsTestCallback : public BoundNodesCallback { public: - bool run(const BoundNodes *BoundNodes) override { + bool run(const BoundNodes *BoundNodes, ASTContext *Context) override { FunctionDecl const *FunDecl = BoundNodes->getNodeAs("funDecl"); // Validate test assumptions. This would be expressed as ASSERT_* in @@ -6090,10 +6087,6 @@ class ForCallablePreservesBindingWithMultipleParentsTestCallback return true; } - bool run(const BoundNodes *BoundNodes, ASTContext *Context) override { -return run(BoundNodes); - } - private: void ExpectCorrectResult(StringRef LogInfo, ArrayRef Results) const { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits