[llvm-branch-commits] [llvm] [MC][NFC] Statically allocate storage for decoded pseudo probes and function records (PR #102789)
@@ -605,13 +616,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( TopLevelFuncs += !Discard; } assert(Data == End && "Have unprocessed data in pseudo_probe section"); + PseudoProbeVec.reserve(ProbeCount); + InlineTreeVec.reserve(InlinedCount); wlei-llvm wrote: Since this assumes the vector's reserved size will be equal to the amount it will be used. How about adding assertions at the end of function. like `assert(InlineTreeVec.size() == InlinedCount && ...)` this is for checking any accidental re-allocation/extension of the vector during the build the map. https://github.com/llvm/llvm-project/pull/102789 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC][NFC] Statically allocate storage for decoded pseudo probes and function records (PR #102789)
@@ -1293,9 +1293,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes( // and will be inferred by the compiler. for (auto &I : FrameSamples) { for (auto *FunctionProfile : I.second) { - for (auto *Probe : I.first->getProbes()) { -FunctionProfile->addBodySamples(Probe->getIndex(), -Probe->getDiscriminator(), 0); + for (auto &Probe : I.first->getProbes()) { wlei-llvm wrote: Seems this can be `const auto` https://github.com/llvm/llvm-project/pull/102789 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC][NFC] Statically allocate storage for decoded pseudo probes and function records (PR #102789)
@@ -240,12 +235,11 @@ class MCPseudoProbeInlineTreeBase { bool isRoot() const { return Guid == 0; } InlinedProbeTreeMap &getChildren() { return Children; } const InlinedProbeTreeMap &getChildren() const { return Children; } - std::vector &getProbes() { return Probes; } - const std::vector &getProbes() const { return Probes; } - void addProbes(ProbeType Probe) { Probes.push_back(Probe); } + ProbesType &getProbes() { return Probes; } wlei-llvm wrote: Do you know where we require this function(anywhere can't replace with `const`), I thought we won't change the probe after it's decoded. is it possible to only keep only one `getProbes` and `getChildren` (with the `const`)? https://github.com/llvm/llvm-project/pull/102789 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
@@ -45,61 +45,190 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -/// This class acts as the glue the joins the CombinerHelper to the overall +/// This class acts as the glue that joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the -/// observer subclass will act on these events. In this case, instruction -/// erasure will cancel any future visits to the erased instruction and -/// instruction creation will schedule that instruction for a future visit. -/// Other Combiner implementations may require more complex behaviour from -/// their GISelChangeObserver subclass. +/// observer subclass will act on these events. class Combiner::WorkListMaintainer : public GISelChangeObserver { - using WorkListTy = GISelWorkList<512>; - WorkListTy &WorkList; +protected: +#ifndef NDEBUG arsenm wrote: Does this need that ABIBreakingChecks macro? https://github.com/llvm/llvm-project/pull/102163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/102163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100216 >From 4ebc1792d1fe3ffbc97b5c93989f3897c31139b9 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 23 Jul 2024 14:41:57 -0500 Subject: [PATCH 1/5] [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) Summary: This was not forwarded properly as it would try to pass it to `nvlink`. Fixes https://github.com/llvm/llvm-project/issues/100168 (cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43) --- clang/lib/Driver/ToolChains/Cuda.cpp | 4 clang/test/Driver/linker-wrapper-passes.c | 10 +++--- clang/test/Driver/nvlink-wrapper.c | 7 +++ clang/tools/clang-nvlink-wrapper/NVLinkOpts.td | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 59453c484ae4f4..61d12b10dfb62b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); + if (Args.hasArg(options::OPT_cuda_path_EQ)) +CmdArgs.push_back(Args.MakeArgString( +"--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); + // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c index aadcf472e9b636..8c337ff906d173 100644 --- a/clang/test/Driver/linker-wrapper-passes.c +++ b/clang/test/Driver/linker-wrapper-passes.c @@ -1,9 +1,5 @@ // Check various clang-linker-wrapper pass options after -offload-opt. -// REQUIRES: llvm-plugins, llvm-examples -// REQUIRES: x86-registered-target -// REQUIRES: amdgpu-registered-target - // Setup. // RUN: mkdir -p %t // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \ @@ -23,14 +19,14 @@ // RUN: %t/host-x86_64-unknown-linux-gnu.s // Check plugin, -passes, and no remarks. -// RUN: clang-linker-wrapper -o a.out --embed-bitcode \ +// RUN: clang-linker-wrapper -o a.out --embed-bitcode --dry-run \ // RUN: --linker-path=/usr/bin/true %t/host-x86_64-unknown-linux-gnu.o \ // RUN: %offload-opt-loadbye --offload-opt=-wave-goodbye \ // RUN: --offload-opt=-passes="function(goodbye),module(inline)" 2>&1 | \ // RUN: FileCheck -match-full-lines -check-prefixes=OUT %s // Check plugin, -p, and remarks. -// RUN: clang-linker-wrapper -o a.out --embed-bitcode \ +// RUN: clang-linker-wrapper -o a.out --embed-bitcode --dry-run \ // RUN: --linker-path=/usr/bin/true %t/host-x86_64-unknown-linux-gnu.o \ // RUN: %offload-opt-loadbye --offload-opt=-wave-goodbye \ // RUN: --offload-opt=-p="function(goodbye),module(inline)" \ @@ -43,7 +39,7 @@ // RUN: -check-prefixes=YML %s // Check handling of bad plugin. -// RUN: not clang-linker-wrapper \ +// RUN: not clang-linker-wrapper --dry-run \ // RUN: --offload-opt=-load-pass-plugin=%t/nonexistent.so 2>&1 | \ // RUN: FileCheck -match-full-lines -check-prefixes=BAD-PLUGIN %s diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index fdda93f1f9cdc1..318315ddaca340 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -63,3 +63,10 @@ int baz() { return y + x; } // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin {{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin + +// +// Check that we don't forward some arguments. +// +// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \ +// RUN: -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s --check-prefix=PATH +// PATH-NOT: --cuda-path=/opt/cuda diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index e84b530f2787d3..8c80a51b12a44e 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose information">; def version : Flag<["--"], "version">, HelpText<"Display the version number and exit">; -def cuda_path_EQ : Joined<["--"], "cuda-path=">, +def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the system CUDA path">; -def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, +def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the 'ptxas' path">; def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">, >From 63fef488bd653f0c02b40024a7d4af86e3f3c
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101506 >From c925b6b58e07d2a15ee0af10de32248750b1b985 Mon Sep 17 00:00:00 2001 From: Yeting Kuo <46629943+yeti...@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:37:42 +0800 Subject: [PATCH] [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891) --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 19 + .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 80 +-- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5abc..be2e880ecd3a98 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) -VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { -unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; -Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { -Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index b8c7037580c46b..849f98c26f4593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-OPT-NEXT:vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:lbu a0, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NO-OPT-NEXT:vmv.v.x v8, a0 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) @@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-OPT-NEXT:vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NO-OPT-NEXT:vfmv.v.f v8, fa5 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0010f64a93fd62..14976f21b7dbba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+z
[llvm-branch-commits] [clang] Cherry pick: [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit spec… (PR #102514)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102514 >From dec94b04c02fe18296e29a10f2db7e8da054be0f Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Tue, 6 Aug 2024 12:40:44 -0400 Subject: [PATCH] [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit specializations instantiation dependent (#100392) A class member named by an expression in a member function that may instantiate to a static _or_ non-static member is represented by a `UnresolvedLookupExpr` in order to defer the implicit transformation to a class member access expression until instantiation. Since `ASTContext::getDecltypeType` only creates a `DecltypeType` that has a `DependentDecltypeType` as its canonical type when the operand is instantiation dependent, and since we do not transform types unless they are instantiation dependent, we need to mark the `UnresolvedLookupExpr` as instantiation dependent in order to correctly build a `DecltypeType` using the expression as its operand with a `DependentDecltypeType` canonical type. Fixes #99873. (cherry picked from commit 55ea36002bd364518c20b3ce282640c920697bf7) --- clang/include/clang/AST/ExprCXX.h | 7 --- clang/lib/AST/ASTImporter.cpp | 6 -- clang/lib/AST/ExprCXX.cpp | 19 +++ clang/lib/Sema/SemaCoroutine.cpp | 3 ++- clang/lib/Sema/SemaDecl.cpp | 2 +- clang/lib/Sema/SemaDeclCXX.cpp| 2 +- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/lib/Sema/SemaExprMember.cpp | 3 ++- clang/lib/Sema/SemaOpenMP.cpp | 6 -- clang/lib/Sema/SemaOverload.cpp | 6 +++--- clang/lib/Sema/SemaTemplate.cpp | 3 ++- clang/lib/Sema/TreeTransform.h| 8 clang/test/SemaCXX/decltype.cpp | 25 + 13 files changed, 64 insertions(+), 28 deletions(-) diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index c2feac525c1ea6..45cfd7bfb7f92c 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -3229,7 +3229,7 @@ class UnresolvedLookupExpr final const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); UnresolvedLookupExpr(EmptyShell Empty, unsigned NumResults, bool HasTemplateKWAndArgsInfo); @@ -3248,7 +3248,7 @@ class UnresolvedLookupExpr final NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); // After canonicalization, there may be dependent template arguments in // CanonicalConverted But none of Args is dependent. When any of @@ -3258,7 +3258,8 @@ class UnresolvedLookupExpr final NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End, bool KnownDependent); + UnresolvedSetIterator End, bool KnownDependent, + bool KnownInstantiationDependent); static UnresolvedLookupExpr *CreateEmpty(const ASTContext &Context, unsigned NumResults, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 08ef09d353afc9..e95992b99f7e9d 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8578,13 +8578,15 @@ ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, *ToTemplateKeywordLocOrErr, ToNameInfo, E->requiresADL(), &ToTAInfo, -ToDecls.begin(), ToDecls.end(), KnownDependent); +ToDecls.begin(), ToDecls.end(), KnownDependent, +/*KnownInstantiationDependent=*/E->isInstantiationDependent()); } return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, ToNameInfo, E->requiresADL(), ToDecls.begin(), ToDecls.end(), - /*KnownDependent=*/E->isTypeDependent()); + /*KnownDependent=*/E->isTypeDependent(), + /*KnownInstantiationDependent=*/E->isInstantiationDependent()); } ExpectedStmt diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 8d2a1b5611ccc6..45e2badf2ddd4a 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -402,10 +402,11 @@ UnresolvedLookupExpr::UnresolvedLookupExpr( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const Decla
[llvm-branch-commits] [clang] dec94b0 - [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit specializations instantiation dependent (#100392)
Author: Krystian Stasiowski Date: 2024-08-15T09:34:14+02:00 New Revision: dec94b04c02fe18296e29a10f2db7e8da054be0f URL: https://github.com/llvm/llvm-project/commit/dec94b04c02fe18296e29a10f2db7e8da054be0f DIFF: https://github.com/llvm/llvm-project/commit/dec94b04c02fe18296e29a10f2db7e8da054be0f.diff LOG: [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit specializations instantiation dependent (#100392) A class member named by an expression in a member function that may instantiate to a static _or_ non-static member is represented by a `UnresolvedLookupExpr` in order to defer the implicit transformation to a class member access expression until instantiation. Since `ASTContext::getDecltypeType` only creates a `DecltypeType` that has a `DependentDecltypeType` as its canonical type when the operand is instantiation dependent, and since we do not transform types unless they are instantiation dependent, we need to mark the `UnresolvedLookupExpr` as instantiation dependent in order to correctly build a `DecltypeType` using the expression as its operand with a `DependentDecltypeType` canonical type. Fixes #99873. (cherry picked from commit 55ea36002bd364518c20b3ce282640c920697bf7) Added: Modified: clang/include/clang/AST/ExprCXX.h clang/lib/AST/ASTImporter.cpp clang/lib/AST/ExprCXX.cpp clang/lib/Sema/SemaCoroutine.cpp clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaDeclCXX.cpp clang/lib/Sema/SemaExpr.cpp clang/lib/Sema/SemaExprMember.cpp clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/SemaOverload.cpp clang/lib/Sema/SemaTemplate.cpp clang/lib/Sema/TreeTransform.h clang/test/SemaCXX/decltype.cpp Removed: diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index c2feac525c1ea6..45cfd7bfb7f92c 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -3229,7 +3229,7 @@ class UnresolvedLookupExpr final const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); UnresolvedLookupExpr(EmptyShell Empty, unsigned NumResults, bool HasTemplateKWAndArgsInfo); @@ -3248,7 +3248,7 @@ class UnresolvedLookupExpr final NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, UnresolvedSetIterator Begin, UnresolvedSetIterator End, - bool KnownDependent); + bool KnownDependent, bool KnownInstantiationDependent); // After canonicalization, there may be dependent template arguments in // CanonicalConverted But none of Args is dependent. When any of @@ -3258,7 +3258,8 @@ class UnresolvedLookupExpr final NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo, bool RequiresADL, const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin, - UnresolvedSetIterator End, bool KnownDependent); + UnresolvedSetIterator End, bool KnownDependent, + bool KnownInstantiationDependent); static UnresolvedLookupExpr *CreateEmpty(const ASTContext &Context, unsigned NumResults, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 08ef09d353afc9..e95992b99f7e9d 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -8578,13 +8578,15 @@ ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) { return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, *ToTemplateKeywordLocOrErr, ToNameInfo, E->requiresADL(), &ToTAInfo, -ToDecls.begin(), ToDecls.end(), KnownDependent); +ToDecls.begin(), ToDecls.end(), KnownDependent, +/*KnownInstantiationDependent=*/E->isInstantiationDependent()); } return UnresolvedLookupExpr::Create( Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr, ToNameInfo, E->requiresADL(), ToDecls.begin(), ToDecls.end(), - /*KnownDependent=*/E->isTypeDependent()); + /*KnownDependent=*/E->isTypeDependent(), + /*KnownInstantiationDependent=*/E->isInstantiationDependent()); } ExpectedStmt diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 8d2a1b5611ccc6..45e2badf2ddd4a 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -402,10 +402,11 @@ UnresolvedLookupExpr::UnresolvedLookupExpr( NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, const DeclarationNameInfo &NameInfo
[llvm-branch-commits] [clang] Cherry pick: [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit spec… (PR #102514)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Use a different smart ptr type alias (#102089) (PR #103003)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103003 >From a3b18fcd241a6ce23653f69df37b59166ea0484d Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Mon, 12 Aug 2024 20:07:08 -0500 Subject: [PATCH] [libc++] Use a different smart ptr type alias (#102089) The `_SP` type is used by some C libraries and this alias could conflict with it. (cherry picked from commit 7951673d408ee64744d0b924a49db78e8243d876) --- libcxx/include/__memory/inout_ptr.h| 10 +- libcxx/include/__memory/out_ptr.h | 8 .../test/libcxx/system_reserved_names.gen.py | 18 -- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__memory/inout_ptr.h b/libcxx/include/__memory/inout_ptr.h index 72e1a21ad68671..e5f3ac5d027e8e 100644 --- a/libcxx/include/__memory/inout_ptr.h +++ b/libcxx/include/__memory/inout_ptr.h @@ -63,17 +63,17 @@ class _LIBCPP_TEMPLATE_VIS inout_ptr_t { } } -using _SP = __pointer_of_or_t<_Smart, _Pointer>; +using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (is_pointer_v<_Smart>) { - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/include/__memory/out_ptr.h b/libcxx/include/__memory/out_ptr.h index 95aa2029c92314..fd99110790cc89 100644 --- a/libcxx/include/__memory/out_ptr.h +++ b/libcxx/include/__memory/out_ptr.h @@ -58,14 +58,14 @@ class _LIBCPP_TEMPLATE_VIS out_ptr_t { return; } -using _SP = __pointer_of_or_t<_Smart, _Pointer>; +using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py index 0d935a18addeee..956a8d1abe3c3c 100644 --- a/libcxx/test/libcxx/system_reserved_names.gen.py +++ b/libcxx/test/libcxx/system_reserved_names.gen.py @@ -17,7 +17,8 @@ from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: - print(f"""\ +print( +f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} @@ -162,6 +163,18 @@ #define erase SYSTEM_RESERVED_NAME #define refresh SYSTEM_RESERVED_NAME +// Dinkumware libc ctype.h uses these definitions +#define _XA SYSTEM_RESERVED_NAME +#define _XS SYSTEM_RESERVED_NAME +#define _BB SYSTEM_RESERVED_NAME +#define _CN SYSTEM_RESERVED_NAME +#define _DI SYSTEM_RESERVED_NAME +#define _LO SYSTEM_RESERVED_NAME +#define _PU SYSTEM_RESERVED_NAME +#define _SP SYSTEM_RESERVED_NAME +#define _UP SYSTEM_RESERVED_NAME +#define _XD SYSTEM_RESERVED_NAME + #include <{header}> // Make sure we don't swallow the definition of the macros we push/pop @@ -172,4 +185,5 @@ static_assert(__builtin_strcmp(STRINGIFY(move), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, ""); static_assert(__builtin_strcmp(STRINGIFY(erase), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, ""); static_assert(__builtin_strcmp(STRING
[llvm-branch-commits] [libcxx] a3b18fc - [libc++] Use a different smart ptr type alias (#102089)
Author: Brian Cain Date: 2024-08-15T09:34:45+02:00 New Revision: a3b18fcd241a6ce23653f69df37b59166ea0484d URL: https://github.com/llvm/llvm-project/commit/a3b18fcd241a6ce23653f69df37b59166ea0484d DIFF: https://github.com/llvm/llvm-project/commit/a3b18fcd241a6ce23653f69df37b59166ea0484d.diff LOG: [libc++] Use a different smart ptr type alias (#102089) The `_SP` type is used by some C libraries and this alias could conflict with it. (cherry picked from commit 7951673d408ee64744d0b924a49db78e8243d876) Added: Modified: libcxx/include/__memory/inout_ptr.h libcxx/include/__memory/out_ptr.h libcxx/test/libcxx/system_reserved_names.gen.py Removed: diff --git a/libcxx/include/__memory/inout_ptr.h b/libcxx/include/__memory/inout_ptr.h index 72e1a21ad68671..e5f3ac5d027e8e 100644 --- a/libcxx/include/__memory/inout_ptr.h +++ b/libcxx/include/__memory/inout_ptr.h @@ -63,17 +63,17 @@ class _LIBCPP_TEMPLATE_VIS inout_ptr_t { } } -using _SP = __pointer_of_or_t<_Smart, _Pointer>; +using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (is_pointer_v<_Smart>) { - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/include/__memory/out_ptr.h b/libcxx/include/__memory/out_ptr.h index 95aa2029c92314..fd99110790cc89 100644 --- a/libcxx/include/__memory/out_ptr.h +++ b/libcxx/include/__memory/out_ptr.h @@ -58,14 +58,14 @@ class _LIBCPP_TEMPLATE_VIS out_ptr_t { return; } -using _SP = __pointer_of_or_t<_Smart, _Pointer>; +using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>; if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) { - std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } else { - static_assert(is_constructible_v<_Smart, _SP, _Args...>, + static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>, "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args..."); - std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); }, + std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); }, std::move(__a_)); } } diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py index 0d935a18addeee..956a8d1abe3c3c 100644 --- a/libcxx/test/libcxx/system_reserved_names.gen.py +++ b/libcxx/test/libcxx/system_reserved_names.gen.py @@ -17,7 +17,8 @@ from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: - print(f"""\ +print( +f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} @@ -162,6 +163,18 @@ #define erase SYSTEM_RESERVED_NAME #define refresh SYSTEM_RESERVED_NAME +// Dinkumware libc ctype.h uses these definitions +#define _XA SYSTEM_RESERVED_NAME +#define _XS SYSTEM_RESERVED_NAME +#define _BB SYSTEM_RESERVED_NAME +#define _CN SYSTEM_RESERVED_NAME +#define _DI SYSTEM_RESERVED_NAME +#define _LO SYSTEM_RESERVED_NAME +#define _PU SYSTEM_RESERVED_NAME +#define _SP SYSTEM_RESERVED_NAME +#define _UP SYSTEM_RESERVED_NAME +#define _XD SYSTEM_RESERVED_NAME + #include <{header}> // Make sure we don't swallow the definition of the macros we push/pop @@ -172,4 +185,5 @@ static_assert(__builtin_strcmp(STRINGIFY(move), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, ""); static_assert(__builtin_strcmp(STRINGIFY(erase), ST
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Use a different smart ptr type alias (#102089) (PR #103003)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103003 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Cherry pick: [Clang][Sema] Make UnresolvedLookupExprs in class scope explicit spec… (PR #102514)
github-actions[bot] wrote: @nga888 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/102514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898) (PR #103048)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103048 >From 028cef89ecd71787f704d70cc99689b75425659c Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Tue, 13 Aug 2024 13:39:42 +0200 Subject: [PATCH] [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898) This is needed for MSVC link.exe to generate redirection metadata for hybrid patchable thunks. (cherry picked from commit d550ada5ab6cd6e49de71ac4c9aa27ced4c11de0) --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 7 ++ .../AArch64/arm64ec-hybrid-patchable.ll | 24 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3c9b07ad45bf24..c64454cc253c35 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M, StringRef ExpStr = cast(Node->getOperand(0))->getString(); MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr); MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName()); + + OutStreamer->beginCOFFSymbolDef(ExpSym); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(Sym); OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll index 64fb5b36b2c623..1ed6a273338abb 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll @@ -240,6 +240,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .section.drectve,"yni" ; CHECK-NEXT: .ascii " /EXPORT:exp" +; CHECK-NEXT: .def"EXP+#func"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .deffunc; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -252,6 +256,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#func", "#func$hybpatch_thunk"{{$}} +; CHECK-NEXT: .def"EXP+#has_varargs"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defhas_varargs; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -264,6 +272,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_varargs", "#has_varargs$hybpatch_thunk" +; CHECK-NEXT: .def"EXP+#has_sret"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defhas_sret; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -276,6 +288,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_sret", "#has_sret$hybpatch_thunk" +; CHECK-NEXT: .def"EXP+#exp"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defexp; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -295,18 +311,18 @@ define dso_local void @caller() nounwind { ; SYM: [78](sec 20)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x #exp$hybpatch_thunk ; SYM: [110](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x func ; SYM-NEXT: AUX indx 112 srch 3 -; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#func +; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#func ; SYM: [116](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x #func ; SYM-NEXT: AUX indx 53 srch 3 ; SYM: [122](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x has_varargs ; SYM-NEXT: AUX indx 124 srch 3 -; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#has_varargs +; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#has_varargs ; SYM-NEXT: [125](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x has_sret ; SYM-NEXT: AUX indx 127 srch 3 -; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#has_sret +; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#has_sret ; SYM-NEXT: [128](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x exp ; SYM-NEXT: AUX indx 130 srch 3 -; SYM-NEXT: [130](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#exp +; SYM-NEXT: [130](sec 0)(fl 0x00)
[llvm-branch-commits] [llvm] 028cef8 - [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898)
Author: Jacek Caban Date: 2024-08-15T09:35:14+02:00 New Revision: 028cef89ecd71787f704d70cc99689b75425659c URL: https://github.com/llvm/llvm-project/commit/028cef89ecd71787f704d70cc99689b75425659c DIFF: https://github.com/llvm/llvm-project/commit/028cef89ecd71787f704d70cc99689b75425659c.diff LOG: [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898) This is needed for MSVC link.exe to generate redirection metadata for hybrid patchable thunks. (cherry picked from commit d550ada5ab6cd6e49de71ac4c9aa27ced4c11de0) Added: Modified: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll Removed: diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3c9b07ad45bf24..c64454cc253c35 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M, StringRef ExpStr = cast(Node->getOperand(0))->getString(); MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr); MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName()); + + OutStreamer->beginCOFFSymbolDef(ExpSym); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(Sym); OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll index 64fb5b36b2c623..1ed6a273338abb 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll @@ -240,6 +240,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .section.drectve,"yni" ; CHECK-NEXT: .ascii " /EXPORT:exp" +; CHECK-NEXT: .def"EXP+#func"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .deffunc; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -252,6 +256,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#func", "#func$hybpatch_thunk"{{$}} +; CHECK-NEXT: .def"EXP+#has_varargs"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defhas_varargs; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -264,6 +272,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_varargs", "#has_varargs$hybpatch_thunk" +; CHECK-NEXT: .def"EXP+#has_sret"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defhas_sret; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -276,6 +288,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_sret", "#has_sret$hybpatch_thunk" +; CHECK-NEXT: .def"EXP+#exp"; +; CHECK-NEXT: .scl2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .defexp; ; CHECK-NEXT: .scl2; ; CHECK-NEXT: .type 32; @@ -295,18 +311,18 @@ define dso_local void @caller() nounwind { ; SYM: [78](sec 20)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x #exp$hybpatch_thunk ; SYM: [110](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x func ; SYM-NEXT: AUX indx 112 srch 3 -; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#func +; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#func ; SYM: [116](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x #func ; SYM-NEXT: AUX indx 53 srch 3 ; SYM: [122](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x has_varargs ; SYM-NEXT: AUX indx 124 srch 3 -; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#has_varargs +; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#has_varargs ; SYM-NEXT: [125](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x has_sret ; SYM-NEXT: AUX indx 127 srch 3 -; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x EXP+#has_sret +; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x EXP+#has_sret ; SYM-NEXT: [128](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x exp ; SYM-NEXT: AUX indx 13
[llvm-branch-commits] [llvm] release/19.x: [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898) (PR #103048)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Use a different smart ptr type alias (#102089) (PR #103003)
github-actions[bot] wrote: @androm3da (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103003 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [PPC][AIX] Save/restore r31 when using base pointer (#100182) (PR #103301)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103301 >From 9c3d4f344685f7695bb8e05677d8684fa3aa1f72 Mon Sep 17 00:00:00 2001 From: Zaara Syeda Date: Wed, 7 Aug 2024 09:59:45 -0400 Subject: [PATCH] [PPC][AIX] Save/restore r31 when using base pointer (#100182) When the base pointer r30 is used to hold the stack pointer, r30 is spilled in the prologue. On AIX registers are saved from highest to lowest, so r31 also needs to be saved. Fixes https://github.com/llvm/llvm-project/issues/96411 (cherry picked from commit d07f106e512c08455b76cc1889ee48318e73c810) --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 14 -- llvm/test/CodeGen/PowerPC/aix-base-pointer.ll | 5 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 1963582ce68631..a57ed33bda9c77 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // R0 cannot be used as a base register, but it can be used as an // index in a store-indexed. int LastOffset = 0; -if (HasFP) { +if (HasFP) { // R0 += (FPOffset-LastOffset). // Need addic, since addi treats R0 as 0. BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) @@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // code. Same goes for the base pointer and the PIC base register. if (needsFP(MF)) SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); - if (RegInfo->hasBasePointer(MF)) + if (RegInfo->hasBasePointer(MF)) { SavedRegs.reset(RegInfo->getBaseRegister(MF)); +// On AIX, when BaseRegister(R30) is used, need to spill r31 too to match +// AIX trackback table requirement. +if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) && +Subtarget.isAIXABI()) { + assert( + (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && + "Invalid base register on AIX!"); + SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); +} + } if (FI->usesPICBase()) SavedRegs.reset(PPC::R30); diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll index ab222d770360ce..5e66e5ec276389 100644 --- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll @@ -6,6 +6,7 @@ ; Use an overaligned buffer to force base-pointer usage. Test verifies: ; - base pointer register (r30) is saved/defined/restored. +; - frame pointer register (r31) is saved/defined/restored. ; - stack frame is allocated with correct alignment. ; - Address of %AlignedBuffer is calculated based off offset from the stack ; pointer. @@ -25,7 +26,9 @@ declare void @callee(ptr) ; 32BIT: subfic 0, 0, -224 ; 32BIT: stwux 1, 1, 0 ; 32BIT: addi 3, 1, 64 +; 32BIT: stw 31, -12(30) ; 32BIT: bl .callee +; 32BIT: lwz 31, -12(30) ; 32BIT: mr 1, 30 ; 32BIT: lwz 30, -16(1) @@ -36,6 +39,8 @@ declare void @callee(ptr) ; 64BIT: subfic 0, 0, -288 ; 64BIT: stdux 1, 1, 0 ; 64BIT: addi 3, 1, 128 +; 64BIT: std 31, -16(30) ; 64BIT: bl .callee +; 64BIT: ld 31, -16(30) ; 64BIT: mr 1, 30 ; 64BIT: ld 30, -24(1) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 9c3d4f3 - [PPC][AIX] Save/restore r31 when using base pointer (#100182)
Author: Zaara Syeda Date: 2024-08-15T09:35:44+02:00 New Revision: 9c3d4f344685f7695bb8e05677d8684fa3aa1f72 URL: https://github.com/llvm/llvm-project/commit/9c3d4f344685f7695bb8e05677d8684fa3aa1f72 DIFF: https://github.com/llvm/llvm-project/commit/9c3d4f344685f7695bb8e05677d8684fa3aa1f72.diff LOG: [PPC][AIX] Save/restore r31 when using base pointer (#100182) When the base pointer r30 is used to hold the stack pointer, r30 is spilled in the prologue. On AIX registers are saved from highest to lowest, so r31 also needs to be saved. Fixes https://github.com/llvm/llvm-project/issues/96411 (cherry picked from commit d07f106e512c08455b76cc1889ee48318e73c810) Added: Modified: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm/test/CodeGen/PowerPC/aix-base-pointer.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 1963582ce68631..a57ed33bda9c77 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // R0 cannot be used as a base register, but it can be used as an // index in a store-indexed. int LastOffset = 0; -if (HasFP) { +if (HasFP) { // R0 += (FPOffset-LastOffset). // Need addic, since addi treats R0 as 0. BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) @@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // code. Same goes for the base pointer and the PIC base register. if (needsFP(MF)) SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); - if (RegInfo->hasBasePointer(MF)) + if (RegInfo->hasBasePointer(MF)) { SavedRegs.reset(RegInfo->getBaseRegister(MF)); +// On AIX, when BaseRegister(R30) is used, need to spill r31 too to match +// AIX trackback table requirement. +if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) && +Subtarget.isAIXABI()) { + assert( + (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && + "Invalid base register on AIX!"); + SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); +} + } if (FI->usesPICBase()) SavedRegs.reset(PPC::R30); diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll index ab222d770360ce..5e66e5ec276389 100644 --- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll +++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll @@ -6,6 +6,7 @@ ; Use an overaligned buffer to force base-pointer usage. Test verifies: ; - base pointer register (r30) is saved/defined/restored. +; - frame pointer register (r31) is saved/defined/restored. ; - stack frame is allocated with correct alignment. ; - Address of %AlignedBuffer is calculated based off offset from the stack ; pointer. @@ -25,7 +26,9 @@ declare void @callee(ptr) ; 32BIT: subfic 0, 0, -224 ; 32BIT: stwux 1, 1, 0 ; 32BIT: addi 3, 1, 64 +; 32BIT: stw 31, -12(30) ; 32BIT: bl .callee +; 32BIT: lwz 31, -12(30) ; 32BIT: mr 1, 30 ; 32BIT: lwz 30, -16(1) @@ -36,6 +39,8 @@ declare void @callee(ptr) ; 64BIT: subfic 0, 0, -288 ; 64BIT: stdux 1, 1, 0 ; 64BIT: addi 3, 1, 128 +; 64BIT: std 31, -16(30) ; 64BIT: bl .callee +; 64BIT: ld 31, -16(30) ; 64BIT: mr 1, 30 ; 64BIT: ld 30, -24(1) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [PPC][AIX] Save/restore r31 when using base pointer (#100182) (PR #103301)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103301 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk symbol as a function. (#102898) (PR #103048)
github-actions[bot] wrote: @cjacek (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103048 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Fix annotation of braces enclosing stringification (#102998) (PR #103403)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103403 >From ca3f3f63275a683c170251be30430a05428113a9 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 13 Aug 2024 12:39:33 -0700 Subject: [PATCH] [clang-format] Fix annotation of braces enclosing stringification (#102998) Fixes #102937. (cherry picked from commit ee2359968fa307ef45254c816e14df33374168cd) --- clang/lib/Format/UnwrappedLineParser.cpp | 3 +++ clang/unittests/Format/TokenAnnotatorTest.cpp | 11 +++ 2 files changed, 14 insertions(+) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d406a531a5c0c2..688c7c5b1e977f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -507,6 +507,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { if (!Line->InMacroBody && !Style.isTableGen()) { // Skip PPDirective lines and comments. while (NextTok->is(tok::hash)) { +NextTok = Tokens->getNextToken(); +if (NextTok->is(tok::pp_not_keyword)) + break; do { NextTok = Tokens->getNextToken(); } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 0b5475ea95989c..c20b50d14b80b1 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3178,6 +3178,17 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[17], BK_Block); EXPECT_BRACE_KIND(Tokens[22], BK_Block); EXPECT_BRACE_KIND(Tokens[26], BK_Block); + + Tokens = annotate("{\n" +"#define M(x) \\\n" +" return {#x};\n" +"}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[0], BK_Block); + EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[13], BK_Block); } TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] ca3f3f6 - [clang-format] Fix annotation of braces enclosing stringification (#102998)
Author: Owen Pan Date: 2024-08-15T09:36:13+02:00 New Revision: ca3f3f63275a683c170251be30430a05428113a9 URL: https://github.com/llvm/llvm-project/commit/ca3f3f63275a683c170251be30430a05428113a9 DIFF: https://github.com/llvm/llvm-project/commit/ca3f3f63275a683c170251be30430a05428113a9.diff LOG: [clang-format] Fix annotation of braces enclosing stringification (#102998) Fixes #102937. (cherry picked from commit ee2359968fa307ef45254c816e14df33374168cd) Added: Modified: clang/lib/Format/UnwrappedLineParser.cpp clang/unittests/Format/TokenAnnotatorTest.cpp Removed: diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d406a531a5c0c2..688c7c5b1e977f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -507,6 +507,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { if (!Line->InMacroBody && !Style.isTableGen()) { // Skip PPDirective lines and comments. while (NextTok->is(tok::hash)) { +NextTok = Tokens->getNextToken(); +if (NextTok->is(tok::pp_not_keyword)) + break; do { NextTok = Tokens->getNextToken(); } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 0b5475ea95989c..c20b50d14b80b1 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3178,6 +3178,17 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[17], BK_Block); EXPECT_BRACE_KIND(Tokens[22], BK_Block); EXPECT_BRACE_KIND(Tokens[26], BK_Block); + + Tokens = annotate("{\n" +"#define M(x) \\\n" +" return {#x};\n" +"}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[0], BK_Block); + EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[13], BK_Block); } TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Fix annotation of braces enclosing stringification (#102998) (PR #103403)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103403 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) (PR #104027)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104027 >From 67b06b42973c0a207a44314524376551f4d19a4a Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Wed, 14 Aug 2024 07:38:14 -0700 Subject: [PATCH] [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) ... whereever we have the Decl for it, and even when we don't keep the SourceLocation of it aimed at the call site. Fixes: #102983 (cherry picked from commit 019ef522756886caa258daf68d877f84abc1b878) --- clang/lib/CodeGen/Targets/AArch64.cpp | 14 -- clang/test/CodeGen/aarch64-soft-float-abi-errors.c | 10 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 1dec3cd40ebd21..97381f673c2849 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -840,12 +840,13 @@ static bool isStreamingCompatible(const FunctionDecl *F) { static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags, const StringRef ABIName, const AArch64ABIInfo &ABIInfo, - const QualType &Ty, const NamedDecl *D) { + const QualType &Ty, const NamedDecl *D, + SourceLocation loc) { const Type *HABase = nullptr; uint64_t HAMembers = 0; if (Ty->isFloatingType() || Ty->isVectorType() || ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) { -Diags.Report(D->getLocation(), diag::err_target_unsupported_type_for_abi) +Diags.Report(loc, diag::err_target_unsupported_type_for_abi) << D->getDeclName() << Ty << ABIName; } } @@ -860,10 +861,11 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, - FuncDecl->getReturnType(), FuncDecl); + FuncDecl->getReturnType(), FuncDecl, + FuncDecl->getLocation()); for (ParmVarDecl *PVD : FuncDecl->parameters()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(), - PVD); + PVD, FuncDecl->getLocation()); } } } @@ -908,11 +910,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat( return; diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType, - Caller); + Callee ? Callee : Caller, CallLoc); for (const CallArg &Arg : Args) diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(), - Caller); + Callee ? Callee : Caller, CallLoc); } void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, diff --git a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c index 95b7668aca1b0e..6961ee4b6f 100644 --- a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c +++ b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c @@ -69,7 +69,7 @@ inline void test_float_arg_inline(float a) {} inline void test_float_arg_inline_used(float a) {} // nofp-hard-opt-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}} void use_inline() { test_float_arg_inline_used(1.0f); } -// nofp-hard-error@-1 {{'use_inline' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'test_float_arg_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}} // The always_inline attribute causes an inline function to always be // code-genned, even at -O0, so we always emit the error. @@ -77,7 +77,7 @@ __attribute((always_inline)) inline void test_float_arg_always_inline_used(float a) {} // nofp-hard-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}} void use_always_inline() { test_float_arg_always_inline_used(1.0f); } -// nofp-hard-error@-1 {{'use_always_inline' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'test_float_arg_always_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}} // Floating-point expressions, global variables and local variables do not // affect the ABI, so are allowed. GCC does reject some uses of floating point @@ -103,9 +103,9 @@ int test_var_double(int a) { extern void extern_float_arg(float); extern float extern_float_ret(void); void call_extern_float_arg() { extern_float_arg(1.0f); } -// nofp-hard-error@-1 {{'call_extern_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'extern_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}} void call_extern_float_ret() {
[llvm-branch-commits] [llvm] release/19.x: [PPC][AIX] Save/restore r31 when using base pointer (#100182) (PR #103301)
github-actions[bot] wrote: @syzaara (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103301 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 67b06b4 - [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392)
Author: Jon Roelofs Date: 2024-08-15T09:36:31+02:00 New Revision: 67b06b42973c0a207a44314524376551f4d19a4a URL: https://github.com/llvm/llvm-project/commit/67b06b42973c0a207a44314524376551f4d19a4a DIFF: https://github.com/llvm/llvm-project/commit/67b06b42973c0a207a44314524376551f4d19a4a.diff LOG: [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) ... whereever we have the Decl for it, and even when we don't keep the SourceLocation of it aimed at the call site. Fixes: #102983 (cherry picked from commit 019ef522756886caa258daf68d877f84abc1b878) Added: Modified: clang/lib/CodeGen/Targets/AArch64.cpp clang/test/CodeGen/aarch64-soft-float-abi-errors.c Removed: diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 1dec3cd40ebd21..97381f673c2849 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -840,12 +840,13 @@ static bool isStreamingCompatible(const FunctionDecl *F) { static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags, const StringRef ABIName, const AArch64ABIInfo &ABIInfo, - const QualType &Ty, const NamedDecl *D) { + const QualType &Ty, const NamedDecl *D, + SourceLocation loc) { const Type *HABase = nullptr; uint64_t HAMembers = 0; if (Ty->isFloatingType() || Ty->isVectorType() || ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) { -Diags.Report(D->getLocation(), diag::err_target_unsupported_type_for_abi) +Diags.Report(loc, diag::err_target_unsupported_type_for_abi) << D->getDeclName() << Ty << ABIName; } } @@ -860,10 +861,11 @@ void AArch64TargetCodeGenInfo::checkFunctionABI( if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, - FuncDecl->getReturnType(), FuncDecl); + FuncDecl->getReturnType(), FuncDecl, + FuncDecl->getLocation()); for (ParmVarDecl *PVD : FuncDecl->parameters()) { diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(), - PVD); + PVD, FuncDecl->getLocation()); } } } @@ -908,11 +910,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat( return; diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType, - Caller); + Callee ? Callee : Caller, CallLoc); for (const CallArg &Arg : Args) diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(), - Caller); + Callee ? Callee : Caller, CallLoc); } void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, diff --git a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c index 95b7668aca1b0e..6961ee4b6f 100644 --- a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c +++ b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c @@ -69,7 +69,7 @@ inline void test_float_arg_inline(float a) {} inline void test_float_arg_inline_used(float a) {} // nofp-hard-opt-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}} void use_inline() { test_float_arg_inline_used(1.0f); } -// nofp-hard-error@-1 {{'use_inline' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'test_float_arg_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}} // The always_inline attribute causes an inline function to always be // code-genned, even at -O0, so we always emit the error. @@ -77,7 +77,7 @@ __attribute((always_inline)) inline void test_float_arg_always_inline_used(float a) {} // nofp-hard-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}} void use_always_inline() { test_float_arg_always_inline_used(1.0f); } -// nofp-hard-error@-1 {{'use_always_inline' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'test_float_arg_always_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}} // Floating-point expressions, global variables and local variables do not // affect the ABI, so are allowed. GCC does reject some uses of floating point @@ -103,9 +103,9 @@ int test_var_double(int a) { extern void extern_float_arg(float); extern float extern_float_ret(void); void call_extern_float_arg() { extern_float_arg(1.0f); } -// nofp-hard-error@-1 {{'call_extern_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}} +// nofp-hard-error@-1 {{'extern_float_arg'
[llvm-branch-commits] [clang] release/19.x: [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) (PR #104027)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang-format] Fix annotation of braces enclosing stringification (#102998) (PR #103403)
github-actions[bot] wrote: @owenca (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103403 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) (PR #104027)
github-actions[bot] wrote: @jroelofs (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/104027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)
tru wrote: So we should remove this tool from the 19.x release? Can someone confirm? https://github.com/llvm/llvm-project/pull/103886 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) (PR #104117)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104117 >From 4d4a4100f68dfc50bd3b67de40101761be8ffdb7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 14 Aug 2024 14:04:22 -0400 Subject: [PATCH] [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) Fixes #101960 (cherry picked from commit 4d08bb11eea5907fa9cdfe4c7bc9d5c91e79c6a7) --- libcxx/include/complex| 9 +++-- libcxx/include/optional | 9 +++-- .../gh_101960_ambiguous_ctor.pass.cpp | 38 +++ .../gh_101960_internal_ctor.compile.pass.cpp | 28 ++ 4 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp diff --git a/libcxx/include/complex b/libcxx/include/complex index 22271acaf7358d..e6534025de57e5 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -421,7 +421,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(float __re = 0.0f, float __im = 0.0f) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex float __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex float __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex& __c); @@ -517,7 +518,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(double __re = 0.0, double __im = 0.0) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex double __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex& __c); @@ -617,7 +619,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(long double __re = 0.0L, long double __im = 0.0L) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex long double __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex long double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex& __c); diff --git a/libcxx/include/optional b/libcxx/include/optional index f9cbcbfa595d1a..41d7515a2b6892 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -301,7 +301,7 @@ struct __optional_destruct_base<_Tp, false> { # if _LIBCPP_STD_VER >= 23 template - _LIBCPP_HIDE_FROM_ABI constexpr __optional_destruct_base( + _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_destruct_base( __optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) : __val_(std::invoke(std::forward<_Fp>(__f), std::forward<_Args>(__args)...)), __engaged_(true) {} # endif @@ -707,8 +707,11 @@ public: } # if _LIBCPP_STD_VER >= 23 - template - _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(__optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Tag, _Fp&& __f, _Args&&... __args) : __base(__optional_construct_from_invoke_tag{}, std::forward<_Fp>(__f), std::forward<_Args>(__args)...) {} # endif diff --git a/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp new file mode 100644 index 00..bffe8764386a75 --- /dev/null +++ b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp @@ -0,0 +1,38 @@ +//===--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +// + +// Regression test for https://github.com/llvm/llvm-project/issues/101960 where we used to +// trigger an ambiguous constructor. + +#include +#include + +struct NastyConvertible { + template + operator T() const { +return T(0); + } +}; + +template +void test() { + NastyConvertible nasty; + std::complex x(nasty, nasty); + assert(x.real() == T(0)); + assert(x.imag() == T(0)); +} + +int main(int, char**) { + test(); + test(); + test(); + + return 0; +} diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.c
[llvm-branch-commits] [libcxx] 4d4a410 - [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409)
Author: Louis Dionne Date: 2024-08-15T09:40:37+02:00 New Revision: 4d4a4100f68dfc50bd3b67de40101761be8ffdb7 URL: https://github.com/llvm/llvm-project/commit/4d4a4100f68dfc50bd3b67de40101761be8ffdb7 DIFF: https://github.com/llvm/llvm-project/commit/4d4a4100f68dfc50bd3b67de40101761be8ffdb7.diff LOG: [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) Fixes #101960 (cherry picked from commit 4d08bb11eea5907fa9cdfe4c7bc9d5c91e79c6a7) Added: libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp Modified: libcxx/include/complex libcxx/include/optional Removed: diff --git a/libcxx/include/complex b/libcxx/include/complex index 22271acaf7358d..e6534025de57e5 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -421,7 +421,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(float __re = 0.0f, float __im = 0.0f) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex float __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex float __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex& __c); @@ -517,7 +518,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(double __re = 0.0, double __im = 0.0) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex double __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex& __c); @@ -617,7 +619,8 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(long double __re = 0.0L, long double __im = 0.0L) : __re_(__re), __im_(__im) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex long double __v) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex long double __v) : __re_(__real__ __v), __im_(__imag__ __v) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex& __c); diff --git a/libcxx/include/optional b/libcxx/include/optional index f9cbcbfa595d1a..41d7515a2b6892 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -301,7 +301,7 @@ struct __optional_destruct_base<_Tp, false> { # if _LIBCPP_STD_VER >= 23 template - _LIBCPP_HIDE_FROM_ABI constexpr __optional_destruct_base( + _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_destruct_base( __optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) : __val_(std::invoke(std::forward<_Fp>(__f), std::forward<_Args>(__args)...)), __engaged_(true) {} # endif @@ -707,8 +707,11 @@ public: } # if _LIBCPP_STD_VER >= 23 - template - _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(__optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args) + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Tag, _Fp&& __f, _Args&&... __args) : __base(__optional_construct_from_invoke_tag{}, std::forward<_Fp>(__f), std::forward<_Args>(__args)...) {} # endif diff --git a/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp new file mode 100644 index 00..bffe8764386a75 --- /dev/null +++ b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp @@ -0,0 +1,38 @@ +//===--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +// + +// Regression test for https://github.com/llvm/llvm-project/issues/101960 where we used to +// trigger an ambiguous constructor. + +#include +#include + +struct NastyConvertible { + template + operator T() const { +return T(0); + } +}; + +template +void test() { + NastyConvertible nasty; + std::complex x(nasty, nasty); + assert(x.real() == T(0)); + assert(x.imag() == T(0)); +} + +int main(int, char**) { + test(); + test(); + test(); + + return 0; +} diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp b/libcxx/test/std/utilities/optional/op
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) (PR #104117)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104117 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) (PR #104117)
github-actions[bot] wrote: @ldionne (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/104117 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
sharadhr wrote: @ChuanqiXu9, is there anything else that needs to be done here? There's a merge conflict; I could resolve that. https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Use a more consistent pass name for DXILTranslateMetadata (PR #104249)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104249 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Use a more consistent pass name for DXILTranslateMetadata (PR #104249)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104249 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
https://github.com/ChuanqiXu9 milestoned https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
ChuanqiXu9 wrote: > @ChuanqiXu9, is there anything else that needs to be done here? There's a > merge conflict; I could resolve that. If there is merge conflict, we need to resolve it. For the merge request, we need to wait for the release manager to have a time to look at this. Maybe due to we forgot to mention this belongs to 19.x. CC @tru manually. https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.handle.fromBinding` to DXIL ops (PR #104251)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104251 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.handle.fromBinding` to DXIL ops (PR #104251)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104251 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.handle.fromBinding` to DXIL ops (PR #104251)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/104251 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.handle.fromBinding` to DXIL ops (PR #104251)
@@ -0,0 +1,63 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +define void @test_bindings() { + ; RWBuffer Buf : register(u5, space3) + %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( + i32 3, i32 5, i32 1, i32 4, i1 false) + ; CHECK: [[BUF0:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 4, i1 false) + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 }) + + ; RWBuffer Buf : register(u7, space2) + %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_0t( + i32 2, i32 7, i32 1, i32 6, i1 false) + ; CHECK: [[BUF1:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 6, i1 false) + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 }) + + ; Buffer Buf[24] : register(t3, space5) + %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) bogner wrote: It was technically `Buf[1]` as written, as the index is from the beginning of the binding space (matching DXC) and the test case didn't match the comment. I've updated the docs slightly to make that clearer and adjusted the test so the comment and test case agree. https://github.com/llvm/llvm-project/pull/104251 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
https://github.com/sharadhr updated https://github.com/llvm/llvm-project/pull/102438 >From 5248f86c4ef2aa5ce796b6b985ab1c7bbb65d763 Mon Sep 17 00:00:00 2001 From: Sharadh Rajaraman Date: Sun, 14 Jul 2024 11:05:57 +0100 Subject: [PATCH 1/3] `TY_ModuleFile` should be a 'CXX' file type --- clang/lib/Driver/Types.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index a7b6b9000e1d2b..c6a03f4491dd79 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -242,7 +242,7 @@ bool types::isCXX(ID Id) { case TY_CXXHUHeader: case TY_PP_CXXHeaderUnit: case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: - case TY_CXXModule: case TY_PP_CXXModule: + case TY_CXXModule: case TY_PP_CXXModule: case TY_ModuleFile: case TY_PP_CLCXX: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: case TY_HIP: >From 1807c2031d3603872e07e70508abb2f2396655a6 Mon Sep 17 00:00:00 2001 From: Sharadh Rajaraman <3754080+shara...@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:10:12 +0100 Subject: [PATCH 2/3] Resolve `clang-format` issues --- clang/lib/Driver/Types.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index c6a03f4491dd79..2b9b391c19c9fd 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -242,7 +242,9 @@ bool types::isCXX(ID Id) { case TY_CXXHUHeader: case TY_PP_CXXHeaderUnit: case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: - case TY_CXXModule: case TY_PP_CXXModule: case TY_ModuleFile: + case TY_CXXModule: + case TY_PP_CXXModule: + case TY_ModuleFile: case TY_PP_CLCXX: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: case TY_HIP: >From 6bdb828957cbe9910dabe1589e6c33684cb7aa89 Mon Sep 17 00:00:00 2001 From: Sharadh Rajaraman Date: Wed, 7 Aug 2024 20:41:39 +0100 Subject: [PATCH 3/3] Add a test to check that warning is gone --- clang/test/Driver/cl-cxx20-modules.cppm | 16 1 file changed, 16 insertions(+) create mode 100644 clang/test/Driver/cl-cxx20-modules.cppm diff --git a/clang/test/Driver/cl-cxx20-modules.cppm b/clang/test/Driver/cl-cxx20-modules.cppm new file mode 100644 index 00..43dbf517485a05 --- /dev/null +++ b/clang/test/Driver/cl-cxx20-modules.cppm @@ -0,0 +1,16 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang_cl /std:c++20 --precompile -### -- %s 2>&1 | FileCheck --check-prefix=PRECOMPILE %s +// PRECOMPILE: -emit-module-interface + +// RUN: %clang_cl /std:c++20 --fmodule-file=Foo=Foo.pcm -### -- %s 2>&1 | FileCheck --check-prefix=FMODULEFILE %s +// FMODULEFILE: -fmodule-file=Foo=Foo.pcm + +// RUN: %clang_cl /std:c++20 --fprebuilt-module-path=. -### -- %s 2>&1 | FileCheck --check-prefix=FPREBUILT %s +// FPREBUILT: -fprebuilt-module-path=. + +// RUN: %clang_cl %t/test.pcm /std:c++20 -### 2>&1 | FileCheck --check-prefix=CPP20WARNING %t/test.pcm + +//--- test.pcm +// CPP20WARNING-NOT: clang-cl: warning: argument unused during compilation: '/std:c++20' [-Wunused-command-line-argument] ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104253 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/104253 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/104253 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)
@@ -289,6 +289,43 @@ class OpLowerer { }); } + void lowerTypedBufferStore(Function &F) { +IRBuilder<> &IRB = OpBuilder.getIRB(); +Type *Int8Ty = IRB.getInt8Ty(); +Type *Int32Ty = IRB.getInt32Ty(); + +replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Index0 = CI->getArgOperand(1); + Value *Index1 = UndefValue::get(Int32Ty); + // For typed stores, the mask must always cover all four elements. + Constant *Mask = ConstantInt::get(Int8Ty, 0xF); + + Value *Data = CI->getArgOperand(2); bogner wrote: The argument to `typedBufferStore` must be a vector of 4 elements, as we must store 16 bytes exactly. The only exception to this is a vector of 2 doubles, which isn't supported yet (I've filed #104423 for that). That said, it'd probably be nice to get decent errors here if we get bad textual IR or a frontend other than clang messes up, so I've added better errors in the latest. https://github.com/llvm/llvm-project/pull/104253 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
@@ -45,61 +45,190 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -/// This class acts as the glue the joins the CombinerHelper to the overall +/// This class acts as the glue that joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the -/// observer subclass will act on these events. In this case, instruction -/// erasure will cancel any future visits to the erased instruction and -/// instruction creation will schedule that instruction for a future visit. -/// Other Combiner implementations may require more complex behaviour from -/// their GISelChangeObserver subclass. +/// observer subclass will act on these events. class Combiner::WorkListMaintainer : public GISelChangeObserver { - using WorkListTy = GISelWorkList<512>; - WorkListTy &WorkList; +protected: +#ifndef NDEBUG tobias-stadler wrote: Good point, but I don't think it's needed in this case, because you can't get the complete type of WorkListMaintainer outside of Combiner.cpp. Combiner.h only contains the declaration. Thoughts? https://github.com/llvm/llvm-project/pull/102163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
https://github.com/tobias-stadler edited https://github.com/llvm/llvm-project/pull/102163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
@@ -45,61 +45,190 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -/// This class acts as the glue the joins the CombinerHelper to the overall +/// This class acts as the glue that joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the -/// observer subclass will act on these events. In this case, instruction -/// erasure will cancel any future visits to the erased instruction and -/// instruction creation will schedule that instruction for a future visit. -/// Other Combiner implementations may require more complex behaviour from -/// their GISelChangeObserver subclass. +/// observer subclass will act on these events. class Combiner::WorkListMaintainer : public GISelChangeObserver { - using WorkListTy = GISelWorkList<512>; - WorkListTy &WorkList; +protected: +#ifndef NDEBUG arsenm wrote: Ok, I thought this was in the header https://github.com/llvm/llvm-project/pull/102163 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)
@@ -289,6 +289,43 @@ class OpLowerer { }); } + void lowerTypedBufferStore(Function &F) { +IRBuilder<> &IRB = OpBuilder.getIRB(); +Type *Int8Ty = IRB.getInt8Ty(); +Type *Int32Ty = IRB.getInt32Ty(); + +replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Index0 = CI->getArgOperand(1); + Value *Index1 = UndefValue::get(Int32Ty); + // For typed stores, the mask must always cover all four elements. + Constant *Mask = ConstantInt::get(Int8Ty, 0xF); + + Value *Data = CI->getArgOperand(2); python3kgae wrote: Do we generate typedBufferStore for spirv or will it be a different intrinsic which support different vector size? https://github.com/llvm/llvm-project/pull/104253 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AArch64: Use consistent atomicrmw expansion for FP operations (PR #103702)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/103702 >From b8fbc44dbbfe3f0bcfc6c72c4beaf279bc7a99de Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 14 Aug 2024 00:43:03 +0400 Subject: [PATCH 1/2] AArch64: Use consistent atomicrmw expansion for FP operations Use LLSC or cmpxchg in the same cases as for the unsupported integer operations. This required some fixups to the LLSC implementatation to deal with the fp128 case. --- .../Target/AArch64/AArch64ISelLowering.cpp| 30 +- llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll | 392 llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll | 418 +- llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll | 418 +- llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll | 392 5 files changed, 420 insertions(+), 1230 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 97fb2c5f552731..f059e79b9024a6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27067,9 +27067,6 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes."); - if (AI->isFloatingPointOperation()) -return AtomicExpansionKind::CmpXChg; - bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 && (AI->getOperation() == AtomicRMWInst::Xchg || AI->getOperation() == AtomicRMWInst::Or || @@ -27079,7 +27076,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // Nand is not supported in LSE. // Leave 128 bits to LLSC or CmpXChg. - if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 && + !AI->isFloatingPointOperation()) { if (Subtarget->hasLSE()) return AtomicExpansionKind::None; if (Subtarget->outlineAtomics()) { @@ -27152,10 +27150,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); -Lo = Builder.CreateZExt(Lo, ValueTy, "lo64"); -Hi = Builder.CreateZExt(Hi, ValueTy, "hi64"); -return Builder.CreateOr( -Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64"); + +auto *Int128Ty = Type::getInt128Ty(Builder.getContext()); +Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64"); +Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64"); + +Value *Or = Builder.CreateOr( +Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64"); +return Builder.CreateBitCast(Or, ValueTy); } Type *Tys[] = { Addr->getType() }; @@ -27166,8 +27168,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, const DataLayout &DL = M->getDataLayout(); IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy)); CallInst *CI = Builder.CreateCall(Ldxr, Addr); - CI->addParamAttr( - 0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy)); + CI->addParamAttr(0, Attribute::get(Builder.getContext(), + Attribute::ElementType, IntEltTy)); Value *Trunc = Builder.CreateTrunc(CI, IntEltTy); return Builder.CreateBitCast(Trunc, ValueTy); @@ -27193,9 +27195,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder, IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp; Function *Stxr = Intrinsic::getDeclaration(M, Int); Type *Int64Ty = Type::getInt64Ty(M->getContext()); +Type *Int128Ty = Type::getInt128Ty(M->getContext()); + +Value *CastVal = Builder.CreateBitCast(Val, Int128Ty); -Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); -Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); +Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo"); +Value *Hi = +Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi"); return Builder.CreateCall(Stxr, {Lo, Hi, Addr}); } diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll index f95caf325b197c..2c6461097f7d9b 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll @@ -6,33 +6,17 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2: ; NOLSE: // %bb.0: ; NOLSE-NEXT:fcvt s1, h0 -; NOLSE-NEXT:ldr h0, [x0] -; NOLSE-NEXT:b .LBB0_2 ; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start -; NOLSE-NEXT:// in Loop: Header=BB0_2 Depth=1 -; NOLSE-NEXT:fmov s0, w10 -; NOLSE-NEXT:cmp w10, w9, uxth -; NOLSE-NEXT:
[llvm-branch-commits] [llvm] AArch64: Use consistent atomicrmw expansion for FP operations (PR #103702)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/103702 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
https://github.com/bogner created https://github.com/llvm/llvm-project/pull/104447 Generate metadata from target extension type based resources. Part of #91366 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata (PR #104446)
https://github.com/bogner created https://github.com/llvm/llvm-project/pull/104446 Move the module level logic for resources into the pretty printer and translate metadata passes rather than embedding them in the DXILResource helper. This will make it easier to migrate towards the target extension type based approach to resources. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
https://github.com/bogner created https://github.com/llvm/llvm-project/pull/104448 Handle target extension type resources when printing resources to textual IR. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata (PR #104446)
llvmbot wrote: @llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) Changes Move the module level logic for resources into the pretty printer and translate metadata passes rather than embedding them in the DXILResource helper. This will make it easier to migrate towards the target extension type based approach to resources. --- Full diff: https://github.com/llvm/llvm-project/pull/104446.diff 4 Files Affected: - (modified) llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp (+16-2) - (modified) llvm/lib/Target/DirectX/DXILResource.cpp (+8-31) - (modified) llvm/lib/Target/DirectX/DXILResource.h (+7-2) - (modified) llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp (+25-1) ``diff diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index 7d2abb7078b8a..c57631cc4c8b6 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -12,13 +12,27 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; static void prettyPrintResources(raw_ostream &OS, const dxil::Resources &MDResources) { - MDResources.print(OS); + // Column widths are arbitrary but match the widths DXC uses. + OS << ";\n; Resource Bindings:\n;\n"; + OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,16}\n", +"Name", "Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); + OS << formatv( + "; {0,-+30} {1,-+10} {2,-+7} {3,-+11} {4,-+7} {5,-+14} {6,-+16}\n", "", + "", "", "", "", "", ""); + + if (MDResources.hasCBuffers()) +MDResources.printCBuffers(OS); + if (MDResources.hasUAVs()) +MDResources.printUAVs(OS); + + OS << ";\n"; } PreservedAnalyses DXILPrettyPrinterPass::run(Module &M, @@ -63,7 +77,7 @@ INITIALIZE_PASS_END(DXILPrettyPrinterLegacy, "dxil-pretty-printer", bool DXILPrettyPrinterLegacy::runOnModule(Module &M) { dxil::Resources &Res = getAnalysis().getDXILResource(); - Res.print(OS); + prettyPrintResources(OS, Res); return false; } diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp index 8e5b9867e6661..f027283b70521 100644 --- a/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/llvm/lib/Target/DirectX/DXILResource.cpp @@ -333,37 +333,14 @@ template MDNode *ResourceTable::write(Module &M) const { return MDNode::get(M.getContext(), MDs); } -void Resources::write(Module &M) const { - Metadata *ResourceMDs[4] = {nullptr, nullptr, nullptr, nullptr}; - - ResourceMDs[1] = UAVs.write(M); - - ResourceMDs[2] = CBuffers.write(M); - - bool HasResource = ResourceMDs[0] != nullptr || ResourceMDs[1] != nullptr || - ResourceMDs[2] != nullptr || ResourceMDs[3] != nullptr; - - if (HasResource) { -NamedMDNode *DXResMD = M.getOrInsertNamedMetadata("dx.resources"); -DXResMD->addOperand(MDNode::get(M.getContext(), ResourceMDs)); - } - - NamedMDNode *Entry = M.getNamedMetadata("hlsl.uavs"); - if (Entry) -Entry->eraseFromParent(); +Metadata *Resources::writeUAVs(Module &M) const { return UAVs.write(M); } +void Resources::printUAVs(raw_ostream &OS) const { UAVs.print(OS); } +Metadata *Resources::writeCBuffers(Module &M) const { + return CBuffers.write(M); } +void Resources::printCBuffers(raw_ostream &OS) const { CBuffers.print(OS); } -void Resources::print(raw_ostream &O) const { - O << ";\n" -<< "; Resource Bindings:\n" -<< ";\n" -<< "; Name Type Format Dim " - "ID HLSL Bind Count\n" -<< "; -- -- --- --- " - "--- -- --\n"; - - CBuffers.print(O); - UAVs.print(O); +void Resources::dump() const { + printCBuffers(dbgs()); + printUAVs(dbgs()); } - -void Resources::dump() const { print(dbgs()); } diff --git a/llvm/lib/Target/DirectX/DXILResource.h b/llvm/lib/Target/DirectX/DXILResource.h index 06902fe2b87b0..812729bc4dc57 100644 --- a/llvm/lib/Target/DirectX/DXILResource.h +++ b/llvm/lib/Target/DirectX/DXILResource.h @@ -103,6 +103,7 @@ template class ResourceTable { public: ResourceTable(StringRef Name) : MDName(Name) {} void collect(Module &M); + bool empty() const { return Data.empty(); } MDNode *write(Module &M) const; void print(raw_ostream &O) const; }; @@ -117,8 +118,12 @@ class Resources { public: void collect(Module &M); - void write(Module &M) const; - void print(raw_ostream &O) const; + bool hasUAVs() const { return !UAVs.empty(); } + Metadata *writeUAVs(Module &M) const; + void printUAVs(raw_ostream &OS) const; + bool hasCBuffers() const { return !CBuffers.empty(); } + Metadata *writeCBuffers(Module &M) const; + void printCBuffers(raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; };
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Justin Bogner (bogner) Changes Handle target extension type resources when printing resources to textual IR. --- Full diff: https://github.com/llvm/llvm-project/pull/104448.diff 5 Files Affected: - (modified) llvm/include/llvm/Analysis/DXILResource.h (+15-1) - (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+2-2) - (modified) llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp (+222-5) - (modified) llvm/test/CodeGen/DirectX/CreateHandle.ll (+9) - (modified) llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll (+9) ``diff diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index 2ed508b28a908..faee9f5dac1b4 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -142,12 +142,17 @@ class ResourceInfo { Binding.LowerBound = LowerBound; Binding.Size = Size; } + const ResourceBinding &getBinding() const { return Binding; } void setUAV(bool GloballyCoherent, bool HasCounter, bool IsROV) { assert(isUAV() && "Not a UAV"); UAVFlags.GloballyCoherent = GloballyCoherent; UAVFlags.HasCounter = HasCounter; UAVFlags.IsROV = IsROV; } + const UAVInfo &getUAV() const { +assert(isUAV() && "Not a UAV"); +return UAVFlags; + } void setCBuffer(uint32_t Size) { assert(isCBuffer() && "Not a CBuffer"); CBufferSize = Size; @@ -163,6 +168,10 @@ class ResourceInfo { Typed.ElementTy = ElementTy; Typed.ElementCount = ElementCount; } + const TypedInfo &getTyped() const { +assert(isTyped() && "Not typed"); +return Typed; + } void setFeedback(dxil::SamplerFeedbackType Type) { assert(isFeedback() && "Not Feedback"); Feedback.Type = Type; @@ -171,8 +180,14 @@ class ResourceInfo { assert(isMultiSample() && "Not MultiSampled"); MultiSample.Count = Count; } + const MSInfo &getMultiSample() const { +assert(isMultiSample() && "Not MultiSampled"); +return MultiSample; + } + StringRef getName() const { return Name; } dxil::ResourceClass getResourceClass() const { return RC; } + dxil::ResourceKind getResourceKind() const { return Kind; } bool operator==(const ResourceInfo &RHS) const; @@ -222,7 +237,6 @@ class ResourceInfo { MDTuple *getAsMetadata(LLVMContext &Ctx) const; - ResourceBinding getBinding() const { return Binding; } std::pair getAnnotateProps() const; void print(raw_ostream &OS) const; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f34302cc95065..e7c36ead1cc34 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -175,7 +175,7 @@ class OpLowerer { IRB.SetInsertPoint(CI); dxil::ResourceInfo &RI = DRM[CI]; - dxil::ResourceInfo::ResourceBinding Binding = RI.getBinding(); + const auto &Binding = RI.getBinding(); std::array Args{ ConstantInt::get(Int8Ty, llvm::to_underlying(RI.getResourceClass())), @@ -201,7 +201,7 @@ class OpLowerer { IRB.SetInsertPoint(CI); dxil::ResourceInfo &RI = DRM[CI]; - dxil::ResourceInfo::ResourceBinding Binding = RI.getBinding(); + const auto &Binding = RI.getBinding(); std::pair Props = RI.getAnnotateProps(); Constant *ResBind = OpBuilder.getResBind( diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index c57631cc4c8b6..76a40dbfc5845 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -10,23 +10,235 @@ #include "DXILResourceAnalysis.h" #include "DirectX.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static void prettyPrintResources(raw_ostream &OS, +static constexpr StringRef getRCName(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: +return "SRV"; + case dxil::ResourceClass::UAV: +return "UAV"; + case dxil::ResourceClass::CBuffer: +return "cbuffer"; + case dxil::ResourceClass::Sampler: +return "sampler"; + } + llvm_unreachable("covered switch"); +} + +static constexpr StringRef getRCPrefix(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: +return "t"; + case dxil::ResourceClass::UAV: +return "u"; + case dxil::ResourceClass::CBuffer: +return "cb"; + case dxil::ResourceClass::Sampler: +return "s"; + } +} + +static constexpr StringRef getFormatName(const dxil::ResourceInfo &RI) { + if (RI.isTyped()) { +switch (RI.getTyped().ElementTy) { +case dxil::ElementType::I1: + return "i1"; +case
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
llvmbot wrote: @llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) Changes Generate metadata from target extension type based resources. Part of #91366 --- Full diff: https://github.com/llvm/llvm-project/pull/104447.diff 3 Files Affected: - (modified) llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp (+39-8) - (modified) llvm/test/CodeGen/DirectX/CreateHandle.ll (+9-1) - (modified) llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll (+9-1) ``diff diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 007af0b46b9f3..f8621eea23448 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -13,27 +13,52 @@ #include "DXILShaderFlags.h" #include "DirectX.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; using namespace llvm::dxil; -static void emitResourceMetadata(Module &M, +static void emitResourceMetadata(Module &M, const DXILResourceMap &DRM, const dxil::Resources &MDResources) { - Metadata *SRVMD = nullptr, *UAVMD = nullptr, *CBufMD = nullptr, - *SmpMD = nullptr; - bool HasResources = false; + LLVMContext &Context = M.getContext(); + + SmallVector SRVs, UAVs, CBufs, Smps; + for (auto [_, RI] : DRM) { +switch (RI.getResourceClass()) { +case dxil::ResourceClass::SRV: + SRVs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::UAV: + UAVs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::CBuffer: + CBufs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::Sampler: + Smps.push_back(RI.getAsMetadata(Context)); + break; +} + } + Metadata *SRVMD = SRVs.empty() ? nullptr : MDNode::get(Context, SRVs); + Metadata *UAVMD = UAVs.empty() ? nullptr : MDNode::get(Context, UAVs); + Metadata *CBufMD = CBufs.empty() ? nullptr : MDNode::get(Context, CBufs); + Metadata *SmpMD = Smps.empty() ? nullptr : MDNode::get(Context, Smps); + bool HasResources = !DRM.empty(); if (MDResources.hasUAVs()) { +assert(!UAVMD && "Old and new UAV representations can't coexist"); UAVMD = MDResources.writeUAVs(M); HasResources = true; } if (MDResources.hasCBuffers()) { +assert(!CBufMD && "Old and new cbuffer representations can't coexist"); CBufMD = MDResources.writeCBuffers(M); HasResources = true; } @@ -46,7 +71,8 @@ static void emitResourceMetadata(Module &M, MDNode::get(M.getContext(), {SRVMD, UAVMD, CBufMD, SmpMD})); } -static void translateMetadata(Module &M, const dxil::Resources &MDResources, +static void translateMetadata(Module &M, const DXILResourceMap &DRM, + const dxil::Resources &MDResources, const ComputedShaderFlags &ShaderFlags) { dxil::ValidatorVersionMD ValVerMD(M); if (ValVerMD.isEmpty()) @@ -54,18 +80,19 @@ static void translateMetadata(Module &M, const dxil::Resources &MDResources, dxil::createShaderModelMD(M); dxil::createDXILVersionMD(M); - emitResourceMetadata(M, MDResources); + emitResourceMetadata(M, DRM, MDResources); dxil::createEntryMD(M, static_cast(ShaderFlags)); } PreservedAnalyses DXILTranslateMetadata::run(Module &M, ModuleAnalysisManager &MAM) { + const DXILResourceMap &DRM = MAM.getResult(M); const dxil::Resources &MDResources = MAM.getResult(M); const ComputedShaderFlags &ShaderFlags = MAM.getResult(M); - translateMetadata(M, MDResources, ShaderFlags); + translateMetadata(M, DRM, MDResources, ShaderFlags); return PreservedAnalyses::all(); } @@ -80,17 +107,20 @@ class DXILTranslateMetadataLegacy : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); +AU.addRequired(); AU.addRequired(); AU.addRequired(); } bool runOnModule(Module &M) override { +const DXILResourceMap &DRM = +getAnalysis().getResourceMap(); const dxil::Resources &MDResources = getAnalysis().getDXILResource(); const ComputedShaderFlags &ShaderFlags = getAnalysis().getShaderFlags(); -translateMetadata(M, MDResources, ShaderFlags); +translateMetadata(M, DRM, MDResources, ShaderFlags); return true; } }; @@ -105,6 +135,7 @@ ModulePass *llvm::createDXILTranslateMetadataLegacyPass() { INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) +INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DXILRe
[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata. NFC (PR #104446)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/104446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
bogner wrote: Depends on #104446 https://github.com/llvm/llvm-project/pull/104447 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
bogner wrote: Depends on #104446 https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 09b57d9c203322d415c41eeaec6e8842409de620 67a1cc9be3446f61e6b6c3814690cc38f0d53dbd --extensions cpp,h -- llvm/include/llvm/Analysis/DXILResource.h llvm/lib/Target/DirectX/DXILOpLowering.cpp llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index 76a40dbfc5..171b437c0b 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -149,7 +149,8 @@ struct FormatResourceDimension OS << getTextureDimName(RK); if (Item.isMultiSample()) OS << Item.getMultiSample().Count; - break;; + break; + ; } case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: @@ -214,11 +215,11 @@ static void prettyPrintResources(raw_ostream &OS, const DXILResourceMap &DRM, const dxil::Resources &MDResources) { // Column widths are arbitrary but match the widths DXC uses. OS << ";\n; Resource Bindings:\n;\n"; - OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", -"Name", "Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); + OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", "Name", +"Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); OS << formatv( - "; {0,-+30} {1,-+10} {2,-+7} {3,-+11} {4,-+7} {5,-+14} {6,-+9}\n", "", - "", "", "", "", "", ""); + "; {0,-+30} {1,-+10} {2,-+7} {3,-+11} {4,-+7} {5,-+14} {6,-+9}\n", "", "", + "", "", "", "", ""); // TODO: Do we want to sort these by binding or something like that? for (auto [_, RI] : DRM) { @@ -235,8 +236,8 @@ static void prettyPrintResources(raw_ostream &OS, const DXILResourceMap &DRM, FormatBindingID ID(RI); FormatBindingLocation Bind(RI); FormatBindingSize Count(RI); -OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", - Name, Type, Format, Dim, ID, Bind, Count); +OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", Name, + Type, Format, Dim, ID, Bind, Count); } if (MDResources.hasCBuffers()) `` https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata. NFC (PR #104446)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 33a3ace7dc16dd730589f69c333b21dcf6f3a318 f09a87ce0c5947569006ec44ab8423beff0dcc98 --extensions h,cpp -- llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp llvm/lib/Target/DirectX/DXILResource.cpp llvm/lib/Target/DirectX/DXILResource.h llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index c57631cc4c..7185771792 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -21,8 +21,8 @@ static void prettyPrintResources(raw_ostream &OS, const dxil::Resources &MDResources) { // Column widths are arbitrary but match the widths DXC uses. OS << ";\n; Resource Bindings:\n;\n"; - OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,16}\n", -"Name", "Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); + OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,16}\n", "Name", +"Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); OS << formatv( "; {0,-+30} {1,-+10} {2,-+7} {3,-+11} {4,-+7} {5,-+14} {6,-+16}\n", "", "", "", "", "", "", ""); `` https://github.com/llvm/llvm-project/pull/104446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
@@ -13,27 +13,52 @@ #include "DXILShaderFlags.h" #include "DirectX.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; using namespace llvm::dxil; -static void emitResourceMetadata(Module &M, +static void emitResourceMetadata(Module &M, const DXILResourceMap &DRM, const dxil::Resources &MDResources) { - Metadata *SRVMD = nullptr, *UAVMD = nullptr, *CBufMD = nullptr, - *SmpMD = nullptr; - bool HasResources = false; + LLVMContext &Context = M.getContext(); + + SmallVector SRVs, UAVs, CBufs, Smps; + for (auto [_, RI] : DRM) { +switch (RI.getResourceClass()) { +case dxil::ResourceClass::SRV: + SRVs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::UAV: + UAVs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::CBuffer: + CBufs.push_back(RI.getAsMetadata(Context)); + break; +case dxil::ResourceClass::Sampler: + Smps.push_back(RI.getAsMetadata(Context)); + break; +} + } + Metadata *SRVMD = SRVs.empty() ? nullptr : MDNode::get(Context, SRVs); + Metadata *UAVMD = UAVs.empty() ? nullptr : MDNode::get(Context, UAVs); + Metadata *CBufMD = CBufs.empty() ? nullptr : MDNode::get(Context, CBufs); + Metadata *SmpMD = Smps.empty() ? nullptr : MDNode::get(Context, Smps); + bool HasResources = !DRM.empty(); if (MDResources.hasUAVs()) { +assert(!UAVMD && "Old and new UAV representations can't coexist"); python3kgae wrote: Will we remove the old UAV representations ? https://github.com/llvm/llvm-project/pull/104447 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang][AArch64] Point the nofp ABI check diagnostics at the callee (#103392) (PR #104027)
jroelofs wrote: > @jroelofs (or anyone else). If you would like to add a note about this fix in > the release notes (completely optional). Please reply to this comment with a > one or two sentence description of the fix. When you are done, please add the > release:note label to this PR. Clang now diagnoses cases where a hard-float ABI would require passing arguments and/or return values in floating point registers on targets that do not have FP registers, e.g. via ``-mgeneral-regs-only`` or ``-march=...+nofp``. https://github.com/llvm/llvm-project/pull/104027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
https://github.com/coopp approved this pull request. Looks good to me. This is the first time I have seen a switch 'default:' come before the 'case:' entries. (void format(llvm::raw_ostream &OS, StringRef Style) override) https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
https://github.com/coopp approved this pull request. Looks good. https://github.com/llvm/llvm-project/pull/104447 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (PR #96873)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96873 >From 0a01c3aa950322fae803d31812affbd358d368b9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:12:59 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 20 ++- .../builtins-fp-atomics-gfx12.cl | 9 ++--- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../builtins-fp-atomics-gfx940.cl | 3 ++- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f424ddaa175400..77c652573cae42 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18920,22 +18920,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_global_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; @@ -18955,11 +18948,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19360,7 +19348,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19378,6 +19368,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 6b8a6d14575db8..07e63a8711c7fe 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -48,7 +48,8 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_flat_add_2f16 // GFX12: flat_atomic_pk_add_f16 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) { @@ -64,7 +65,8 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { } // CHECK-LABEL: test_global_add_half2 -// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}}) +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX12-LABEL: test_global_add_half2 // GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2,
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From 4d880b9b40b85d0ed2d19da2d89880cefd4ae661 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 77c652573cae42..0b6e4f55502655 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18922,10 +18922,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18935,19 +18933,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19350,7 +19341,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19370,6 +19363,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079c..02e289427238fb 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d5..bd9b8c7268e061 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef h
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96875 >From 75cbc81c7f6c7b63a9d6cc33ce194e77b4c2b119 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:34:43 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 26 ++- .../builtins-fp-atomics-gfx12.cl | 24 - .../builtins-fp-atomics-gfx90a.cl | 6 ++--- .../builtins-fp-atomics-gfx940.cl | 14 +++--- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0b6e4f55502655..18efc0de2b90d6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18946,22 +18946,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { -Intrinsic::ID IID; -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19343,7 +19327,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19365,6 +19351,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19409,7 +19397,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. - if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 07e63a8711c7fe..e8b6eb57c38d7a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL: test_flat_ad
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96876 >From 2c9916f78076a6885ba7b9c847433fec8e413103 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 23:18:32 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +-- .../builtins-fp-atomics-gfx90a.cl | 18 ++ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 18efc0de2b90d6..b2d9a34d27e558 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18920,32 +18920,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { -Intrinsic::ID IID; -llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = -CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19329,7 +19303,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19356,8 +19334,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: BinOp = llvm::AtomicRMWInst::FMin; break; +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 9381ce951df3e3..556e553903d1a5 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr,
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/97050 >From f0843b296d9858d8e6b6a2b4e1cea0702c200b6b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Jun 2024 16:32:48 +0200 Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics These are now fully covered by atomicrmw. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 14 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - llvm/lib/Target/AMDGPU/FLATInstructions.td| 2 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/test/Bitcode/amdgcn-atomic.ll| 22 ++ .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 106 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 218 -- llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 11 files changed, 33 insertions(+), 538 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8c25467cc5e4b1..e24571d8b184c8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2990,10 +2990,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e24d119b781628..c6963edf5288ae 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48f..db8b44149cf47e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 17067ddd93ff08..00878da1bfc68a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4907,8 +4907,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fmax: case I
[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)
@@ -213,6 +208,25 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { bool ShowName) const; }; +// Address to pseudo probes map. +class AddressProbesMap +: public std::vector> { + auto getIt(uint64_t Addr) const { +auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) { + return Probe.getAddress() < Addr; +}; +return llvm::lower_bound(*this, Addr, CompareProbe); wlei-llvm wrote: How does this guarantee it's equivalent to `std::map` ? specifically, if the address doesn't exist in the map, `std::map` returns `end()` but using `lower_bound`, it will points to the next element. https://github.com/llvm/llvm-project/pull/102904 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)
https://github.com/hekota approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/104447 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AArch64: Use consistent atomicrmw expansion for FP operations (PR #103702)
efriedma-quic wrote: Just thought of this, but... we can't do this in the case where we do a libcall. Any load or store between the load exclusive and the store exclusive could break the reservation. (It normally won't, but it can in weird cases where the atomic variable is on the stack.) So we have to use cmpxchg lowering in those cases (and then expand the cmpxchg to ll/sc). The cases we can do inline should be fine, though. https://github.com/llvm/llvm-project/pull/103702 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
https://github.com/hekota edited https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
https://github.com/hekota approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
@@ -10,23 +10,235 @@ #include "DXILResourceAnalysis.h" #include "DirectX.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static void prettyPrintResources(raw_ostream &OS, +static constexpr StringRef getRCName(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: +return "SRV"; + case dxil::ResourceClass::UAV: +return "UAV"; + case dxil::ResourceClass::CBuffer: +return "cbuffer"; + case dxil::ResourceClass::Sampler: +return "sampler"; + } + llvm_unreachable("covered switch"); +} + +static constexpr StringRef getRCPrefix(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: +return "t"; + case dxil::ResourceClass::UAV: +return "u"; + case dxil::ResourceClass::CBuffer: +return "cb"; + case dxil::ResourceClass::Sampler: +return "s"; + } +} + +static constexpr StringRef getFormatName(const dxil::ResourceInfo &RI) { + if (RI.isTyped()) { +switch (RI.getTyped().ElementTy) { +case dxil::ElementType::I1: + return "i1"; +case dxil::ElementType::I16: + return "i16"; +case dxil::ElementType::U16: + return "u16"; +case dxil::ElementType::I32: + return "i32"; +case dxil::ElementType::U32: + return "u32"; +case dxil::ElementType::I64: + return "i64"; +case dxil::ElementType::U64: + return "u64"; +case dxil::ElementType::F16: + return "f16"; +case dxil::ElementType::F32: + return "f32"; +case dxil::ElementType::F64: + return "f64"; +case dxil::ElementType::SNormF16: + return "snorm_f16"; +case dxil::ElementType::UNormF16: + return "unorm_f16"; +case dxil::ElementType::SNormF32: + return "snorm_f32"; +case dxil::ElementType::UNormF32: + return "unorm_f32"; +case dxil::ElementType::SNormF64: + return "snorm_f64"; +case dxil::ElementType::UNormF64: + return "unorm_f64"; +case dxil::ElementType::PackedS8x32: + return "p32i8"; +case dxil::ElementType::PackedU8x32: + return "p32u8"; +case dxil::ElementType::Invalid: + llvm_unreachable("Invalid ElementType"); +} +llvm_unreachable("Unhandled ElementType"); + } else if (RI.isStruct()) +return "struct"; + else if (RI.isCBuffer() || RI.isSampler()) +return "NA"; + return "byte"; +} + +static constexpr StringRef getTextureDimName(dxil::ResourceKind RK) { + switch (RK) { + case dxil::ResourceKind::Texture1D: +return "1d"; + case dxil::ResourceKind::Texture2D: +return "2d"; + case dxil::ResourceKind::Texture3D: +return "3d"; + case dxil::ResourceKind::TextureCube: +return "cube"; + case dxil::ResourceKind::Texture1DArray: +return "1darray"; + case dxil::ResourceKind::Texture2DArray: +return "2darray"; + case dxil::ResourceKind::TextureCubeArray: +return "cubearray"; + case dxil::ResourceKind::TBuffer: +return "tbuffer"; + case dxil::ResourceKind::FeedbackTexture2D: +return "fbtex2d"; + case dxil::ResourceKind::FeedbackTexture2DArray: +return "fbtex2darray"; + case dxil::ResourceKind::Texture2DMS: +return "2dMS"; + case dxil::ResourceKind::Texture2DMSArray: +return "2darrayMS"; + case dxil::ResourceKind::Invalid: + case dxil::ResourceKind::NumEntries: + case dxil::ResourceKind::CBuffer: + case dxil::ResourceKind::RawBuffer: + case dxil::ResourceKind::Sampler: + case dxil::ResourceKind::StructuredBuffer: + case dxil::ResourceKind::TypedBuffer: + case dxil::ResourceKind::RTAccelerationStructure: +llvm_unreachable("Invalid ResourceKind for texture"); + } + llvm_unreachable("Unhandled ResourceKind"); +} + +namespace { +struct FormatResourceDimension +: public llvm::FormatAdapter { + explicit FormatResourceDimension(const dxil::ResourceInfo &RI) + : llvm::FormatAdapter(RI) {} + + void format(llvm::raw_ostream &OS, StringRef Style) override { +dxil::ResourceKind RK = Item.getResourceKind(); +switch (RK) { +default: { + OS << getTextureDimName(RK); + if (Item.isMultiSample()) +OS << Item.getMultiSample().Count; + break;; +} +case dxil::ResourceKind::RawBuffer: +case dxil::ResourceKind::StructuredBuffer: + if (!Item.isUAV()) +OS << "r/o"; + else if (Item.getUAV().HasCounter) +OS << "r/w+cnt"; + else +OS << "r/w"; + break; +case dxil::ResourceKind::TypedBuffer: + OS << "buf"; + break; +case dxil::ResourceKind::RTAccelerationStructure: + // TODO: dxc would print "ras" here. Can/should this happen? + llvm_unreachable("RTAccelerationStructure printing is not implemented"); +} + } +}; + +struct FormatBindingID +: public llvm
[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)
@@ -1,4 +1,13 @@ ; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s +; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s + +; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count +; CHECK-PRETTY: -- --- --- --- -- - +; CHECK-PRETTY:UAV f32 buf U0 u5,space3 1 +; CHECK-PRETTY:UAV i32 buf U1 u7,space2 1 +; CHECK-PRETTY:SRV u32 buf T0 t3,space524 +; CHECK-PRETTY:SRV struct r/o T1 t2,space4 1 +; CHECK-PRETTY:SRVbyte r/o T2 t8,space1 1 hekota wrote: Change one of the cases to use the default `space0`? https://github.com/llvm/llvm-project/pull/104448 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC][NFC] Statically allocate storage for decoded pseudo probes and function records (PR #102789)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/102789 >From ddcbb593f72ca47acaa82f9c14a7fd2c4e30903b Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 13 Aug 2024 03:51:31 -0700 Subject: [PATCH 1/3] Pass CurChildIndex by value Created using spr 1.3.4 --- llvm/include/llvm/MC/MCPseudoProbe.h | 6 -- llvm/lib/MC/MCPseudoProbe.cpp| 26 +++--- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index a46188e565c7e8..32d7a4e9129eca 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -474,11 +474,13 @@ class MCPseudoProbeDecoder { } private: + // Recursively parse an inlining tree encoded in pseudo_probe section. Returns + // whether the the top-level node should be skipped. template - void buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, const Uint64Set &GuildFilter, const Uint64Map &FuncStartAddrs, - uint32_t &CurChild); + const uint32_t CurChildIndex); }; } // end namespace llvm diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index c4c2dfcec40564..e6f6e797b4ee71 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -420,17 +420,17 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, } template -void MCPseudoProbeDecoder::buildAddress2ProbeMap( +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, -uint32_t &CurChild) { +const uint32_t CurChildIndex) { // The pseudo_probe section encodes an inline forest and each tree has a // format defined in MCPseudoProbe.h uint32_t Index = 0; if (IsTopLevelFunc) { // Use a sequential id for top level inliner. -Index = CurChild; +Index = CurChildIndex; } else { // Read inline site for inlinees Index = cantFail(errorOrToExpected(readUnsignedNumber())); @@ -446,19 +446,14 @@ void MCPseudoProbeDecoder::buildAddress2ProbeMap( // If the incoming node is null, all its children nodes should be disgarded. if (Cur) { // Switch/add to a new tree node(inlinee) -Cur->Children[CurChild] = MCDecodedPseudoProbeInlineTree(Guid, Index, Cur); -Cur = &Cur->Children[CurChild]; +Cur->Children[CurChildIndex] = +MCDecodedPseudoProbeInlineTree(Guid, Index, Cur); +Cur = &Cur->Children[CurChildIndex]; if (IsTopLevelFunc && !EncodingIsAddrBased) { if (auto V = FuncStartAddrs.lookup(Guid)) LastAddr = V; } } - // Advance CurChild for non-skipped top-level functions and unconditionally - // for inlined functions. - if (IsTopLevelFunc) -CurChild += !!Cur; - else -++CurChild; // Read number of probes in the current node. uint32_t NodeCount = @@ -519,9 +514,10 @@ void MCPseudoProbeDecoder::buildAddress2ProbeMap( InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess); Cur->Children = MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess); } - for (uint32_t I = 0; I < ChildrenToProcess;) { + for (uint32_t I = 0; I < ChildrenToProcess; I++) { buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I); } + return Cur; } template @@ -630,10 +626,10 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( Data = Start; End = Data + Size; uint64_t LastAddr = 0; - uint32_t Child = 0; + uint32_t CurChildIndex = 0; while (Data < End) -buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, -FuncStartAddrs, Child); +CurChildIndex += buildAddress2ProbeMap( +&DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex); assert(Data == End && "Have unprocessed data in pseudo_probe section"); return true; } >From 73d808abad1e66b6d7f5a9a52f9617b5267ee4c0 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 14 Aug 2024 07:50:31 -0700 Subject: [PATCH 2/3] s/ChildrenType/InlinedProbeTreeMap Created using spr 1.3.4 --- llvm/include/llvm/MC/MCPseudoProbe.h | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 0f21d89971f7ab..c21aff7b277aa6 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -214,11 +214,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { }; template + typename InlinedProbeTreeMap> class MCPseudoProbeInlineTreeBase { protected: // Track children (e.g. inlinees) of current context - ChildrenType Children; + InlinedProbeTreeMap Children
[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)
amy-kwan wrote: > So we should remove this tool from the 19.x release? Can someone confirm? @kyulee-com @thevinster Are you two able to help confirm this? https://github.com/llvm/llvm-project/pull/103886 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata. NFC (PR #104446)
https://github.com/hekota approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/104446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Revert specialized createOp methods part of #101250 (PR #104245)
https://github.com/hekota approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/104245 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DirectX] Encapsulate DXILOpLowering's state into a class. NFC (PR #104248)
https://github.com/farzonl approved this pull request. https://github.com/llvm/llvm-project/pull/104248 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Register a few DXIL passes with the new PM (PR #104250)
https://github.com/hekota approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/104250 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Register a few DXIL passes with the new PM (PR #104250)
https://github.com/farzonl approved this pull request. https://github.com/llvm/llvm-project/pull/104250 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96874 >From 681262015dbc0b6dcfdeb781dcc8db8fb9053649 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:15:26 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++--- .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl | 6 -- .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 77c652573cae42..0b6e4f55502655 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18922,10 +18922,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -18935,19 +18933,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_flat_atomic_fadd; - break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -19350,7 +19341,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19370,6 +19363,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index cd10777dbe079c..02e289427238fb 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){ } // CHECK-LABEL: test_flat_add_local_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}} + // GFX90A-LABEL: test_flat_add_local_f64$local // GFX90A: ds_add_rtn_f64 void test_flat_add_local_f64(__local double *addr, double x){ @@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){ } // CHECK-LABEL: test_flat_global_add_f64 -// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_flat_global_add_f64$local // GFX90A: global_atomic_add_f64 void test_flat_global_add_f64(__global double *addr, double x){ diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl index 589dcd406630d5..bd9b8c7268e061 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl @@ -10,7 +10,8 @@ typedef h
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96875 >From c45510746e382ec4c7e2111037b1ae5e715ddf25 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 19:34:43 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 26 ++- .../builtins-fp-atomics-gfx12.cl | 24 - .../builtins-fp-atomics-gfx90a.cl | 6 ++--- .../builtins-fp-atomics-gfx940.cl | 14 +++--- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0b6e4f55502655..18efc0de2b90d6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18946,22 +18946,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { -Intrinsic::ID IID; -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: - IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19343,7 +19327,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19365,6 +19351,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: @@ -19409,7 +19397,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. - if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index 07e63a8711c7fe..e8b6eb57c38d7a 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; // CHECK-LABEL: test_local_add_2bf16 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat> -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16> // GFX12-LABEL: test_local_add_2bf16 @@ -48,7 +48,7 @@ void test_local_add_2f16_noret(__local half2 *addr, half2 x) { } // CHECK-LABEL: test_flat_add_2f16 -// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} +// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL: test_flat_ad
[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96876 >From c0e5b88ab0d952dea59ca313a197cf1b495ffd62 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2024 23:18:32 +0200 Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins --- clang/lib/CodeGen/CGBuiltin.cpp | 36 +-- .../builtins-fp-atomics-gfx90a.cl | 18 ++ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 18efc0de2b90d6..b2d9a34d27e558 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18920,32 +18920,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { -Intrinsic::ID IID; -llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); -switch (BuiltinID) { -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: - IID = Intrinsic::amdgcn_global_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: - IID = Intrinsic::amdgcn_global_atomic_fmax; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmin; - break; -case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: - IID = Intrinsic::amdgcn_flat_atomic_fmax; - break; -} -llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); -llvm::Value *Val = EmitScalarExpr(E->getArg(1)); -llvm::Function *F = -CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); -return Builder.CreateCall(F, {Addr, Val}); - } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: @@ -19329,7 +19303,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -19356,8 +19334,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: BinOp = llvm::AtomicRMWInst::FMin; break; +case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: +case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 9381ce951df3e3..556e553903d1a5 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) { } // CHECK-LABEL: test_global_global_min_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_global_min_f64$local // GFX90A: global_atomic_min_f64 void test_global_global_min_f64(__global double *addr, double x){ @@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){ } // CHECK-LABEL: test_global_max_f64 -// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}}) +// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // GFX90A-LABEL: test_global_max_f64$local // GFX90A: global_atomic_max_f64 void test_global_max_f64(__global double *addr, double x){ @@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr,
[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/97050 >From 0f8ef026d8b6e7e26093294eee95a86b6c7cad50 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Jun 2024 16:32:48 +0200 Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics These are now fully covered by atomicrmw. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 14 +- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 - .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - llvm/lib/Target/AMDGPU/FLATInstructions.td| 2 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/test/Bitcode/amdgcn-atomic.ll| 22 ++ .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 106 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 218 -- llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 11 files changed, 33 insertions(+), 538 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8c25467cc5e4b1..e24571d8b184c8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2990,10 +2990,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic { def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic; } -// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; -def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUAtomicRtn; - defset list AMDGPUMFMAIntrinsics940 = { def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e24d119b781628..c6963edf5288ae 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax")) { + Name.starts_with("ds.fmax") || + Name.starts_with("global.atomic.fadd.v2bf16") || + Name.starts_with("flat.atomic.fadd.v2bf16")) { // Replaced with atomicrmw fadd/fmin/fmax, so there's no new // declaration. NewFn = nullptr; @@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("ds.fmin", AtomicRMWInst::FMin) .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) - .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); + .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) + .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. @@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); if (PtrTy->getAddressSpace() != 3) { -RMW->setMetadata("amdgpu.no.fine.grained.memory", - MDNode::get(F->getContext(), {})); +MDNode *EmptyMD = MDNode::get(F->getContext(), {}); +RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); +if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) + RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } if (IsVolatile) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c6dbc58395e48f..db8b44149cf47e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op { defm int_amdgcn_flat_atomic_fadd : noret_op; defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; -defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_flat_atomic_fmin : noret_op; defm int_amdgcn_flat_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; -defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 17067ddd93ff08..00878da1bfc68a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4907,8 +4907,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fmax: case I
[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)
@@ -213,6 +208,25 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { bool ShowName) const; }; +// Address to pseudo probes map. +class AddressProbesMap +: public std::vector> { + auto getIt(uint64_t Addr) const { +auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) { + return Probe.getAddress() < Addr; +}; +return llvm::lower_bound(*this, Addr, CompareProbe); aaupov wrote: Good catch. Initially I used equal_range in find(Addr) case, and lower_bound in find(From, To) case. Need to revert to equal_range. https://github.com/llvm/llvm-project/pull/102904 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Add analysis utility to fetch ID of a callsite (PR #104491)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/104491 None >From e0cb0c4b74d0f5fb695d80973b366399ed6dda2b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 15 Aug 2024 10:28:04 -0700 Subject: [PATCH] [ctx_prof] Add analysis utility to fetch ID of a callsite --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 4 + llvm/lib/Analysis/CtxProfAnalysis.cpp | 7 + llvm/unittests/Analysis/CMakeLists.txt| 1 + .../Analysis/CtxProfAnalysisTest.cpp | 145 ++ 4 files changed, 157 insertions(+) create mode 100644 llvm/unittests/Analysis/CtxProfAnalysisTest.cpp diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index f0e2aeb0f92f74..483a6e557126e0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -11,6 +11,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" @@ -84,6 +86,8 @@ class CtxProfAnalysis : public AnalysisInfoMixin { using Result = PGOContextualProfile; PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM); + + static InstrProfCallsite *getCallsiteInstrumentation(CallBase &CB); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 7b4666b29a1936..3e4c1f8c3df3c0 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -183,3 +183,10 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, OS << "\n"; return PreservedAnalyses::all(); } + +InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { + while (auto *Prev = CB.getPrevNode()) +if (auto *IPC = dyn_cast(Prev)) + return IPC; + return nullptr; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index 3cba630867a83b..958d8f9a72fd66 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -22,6 +22,7 @@ set(ANALYSIS_TEST_SOURCES CFGTest.cpp CGSCCPassManagerTest.cpp ConstraintSystemTest.cpp + CtxProfAnalysisTest.cpp DDGTest.cpp DomTreeUpdaterTest.cpp DXILResourceTest.cpp diff --git a/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp b/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp new file mode 100644 index 00..8f57b828bf5a15 --- /dev/null +++ b/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp @@ -0,0 +1,145 @@ +//===--- CtxProfAnalysisTest.cpp --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "llvm/Analysis/CtxProfAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class CtxProfAnalysisTest : public testing::Test { + static constexpr auto *IR = R"IR( +declare void @bar() + +define private void @foo(i32 %a, ptr %fct) #0 !guid !0 { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} + +define void @an_entrypoint(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + call void @foo(i32 1, ptr null) + ret void +no: + ret void +} + +define void @another_entrypoint_no_callees(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + ret void +no: + ret void +} + +attributes #0 = { noinline } +!0 = !{ i64 11872291593386833696 } +)IR"; + +protected: + LLVMContext C; + PassBuilder PB; + ModuleAnalysisManager MAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + LoopAnalysisManager LAM; + std::unique_ptr M; + + void SetUp() override { +SMDiagnostic Err; +M = parseAssemblyString(IR, Err, C); +if (!M) + Err.print("CtxProfAnalysisTest", errs()); + } + +public: + CtxProfAnalysisTest() { +PB.registerModuleAnalyses(MAM); +PB.registerCGSCCAnalyses(CGAM); +PB.registerFunctionAnalyses(FAM); +PB.registerL
[llvm-branch-commits] [llvm] [ctx_prof] Add analysis utility to fetch ID of a callsite (PR #104491)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/104491?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#104491** https://app.graphite.dev/github/pr/llvm/llvm-project/104491?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#104490** https://app.graphite.dev/github/pr/llvm/llvm-project/104490?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @mtrofin and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/104491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Add analysis utility to fetch ID of a callsite (PR #104491)
https://github.com/mtrofin ready_for_review https://github.com/llvm/llvm-project/pull/104491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ctx_prof] Add analysis utility to fetch ID of a callsite (PR #104491)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Mircea Trofin (mtrofin) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/104491.diff 4 Files Affected: - (modified) llvm/include/llvm/Analysis/CtxProfAnalysis.h (+4) - (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+7) - (modified) llvm/unittests/Analysis/CMakeLists.txt (+1) - (added) llvm/unittests/Analysis/CtxProfAnalysisTest.cpp (+145) ``diff diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index f0e2aeb0f92f74..483a6e557126e0 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -11,6 +11,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" @@ -84,6 +86,8 @@ class CtxProfAnalysis : public AnalysisInfoMixin { using Result = PGOContextualProfile; PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM); + + static InstrProfCallsite *getCallsiteInstrumentation(CallBase &CB); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 7b4666b29a1936..3e4c1f8c3df3c0 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -183,3 +183,10 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, OS << "\n"; return PreservedAnalyses::all(); } + +InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { + while (auto *Prev = CB.getPrevNode()) +if (auto *IPC = dyn_cast(Prev)) + return IPC; + return nullptr; +} diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index 3cba630867a83b..958d8f9a72fd66 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -22,6 +22,7 @@ set(ANALYSIS_TEST_SOURCES CFGTest.cpp CGSCCPassManagerTest.cpp ConstraintSystemTest.cpp + CtxProfAnalysisTest.cpp DDGTest.cpp DomTreeUpdaterTest.cpp DXILResourceTest.cpp diff --git a/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp b/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp new file mode 100644 index 00..8f57b828bf5a15 --- /dev/null +++ b/llvm/unittests/Analysis/CtxProfAnalysisTest.cpp @@ -0,0 +1,145 @@ +//===--- CtxProfAnalysisTest.cpp --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "llvm/Analysis/CtxProfAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassInstrumentation.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class CtxProfAnalysisTest : public testing::Test { + static constexpr auto *IR = R"IR( +declare void @bar() + +define private void @foo(i32 %a, ptr %fct) #0 !guid !0 { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} + +define void @an_entrypoint(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + call void @foo(i32 1, ptr null) + ret void +no: + ret void +} + +define void @another_entrypoint_no_callees(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + ret void +no: + ret void +} + +attributes #0 = { noinline } +!0 = !{ i64 11872291593386833696 } +)IR"; + +protected: + LLVMContext C; + PassBuilder PB; + ModuleAnalysisManager MAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + LoopAnalysisManager LAM; + std::unique_ptr M; + + void SetUp() override { +SMDiagnostic Err; +M = parseAssemblyString(IR, Err, C); +if (!M) + Err.print("CtxProfAnalysisTest", errs()); + } + +public: + CtxProfAnalysisTest() { +PB.registerModuleAnalyses(MAM); +PB.registerCGSCCAnalyses(CGAM); +PB.registerFunctionAnalyses(FAM); +PB.registerLoopAnalyses(LAM); +PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + } +}; + +TEST_F(CtxProfAnalysisTest, GetCallsiteIDTest) { + ASSERT_TRUE(!!M); + ModulePassManager MPM; + MPM.addPass(P
[llvm-branch-commits] [llvm] [ctx_prof] Add analysis utility to fetch ID of a callsite (PR #104491)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/104491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits