[llvm-branch-commits] [llvm] [SPARC][IAS] Add definitions for OSA 2011 instructions (PR #138403)
https://github.com/s-barannikov approved this pull request. https://github.com/llvm/llvm-project/pull/138403 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/141766 >From 2ef30aacee4d80c0e4a925aa5ba9416423d10b1b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 27 May 2025 07:55:04 -0500 Subject: [PATCH 1/7] [utils][TableGen] Handle versions on clause/directive spellings In "getDirectiveName(Kind, Version)", return the spelling that corresponds to Version, and in "getDirectiveKindAndVersions(Name)" return the pair {Kind, VersionRange}, where VersionRange contains the minimum and the maximum versions that allow "Name" as a spelling. This applies to clauses as well. In general it applies to classes that have spellings (defined via TableGen class "Spelling"). Given a Kind and a Version, getting the corresponding spelling requires a runtime search (which can fail in a general case). To avoid generating the search function inline, a small additional component of llvm/Frontent was added: LLVMFrontendDirective. The corresponding header file also defines C++ classes "Spelling" and "VersionRange", which are used in TableGen/DirectiveEmitter as well. For background information see https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 --- .../llvm/Frontend/Directive/Spelling.h| 39 + llvm/include/llvm/TableGen/DirectiveEmitter.h | 25 +-- llvm/lib/Frontend/CMakeLists.txt | 1 + llvm/lib/Frontend/Directive/CMakeLists.txt| 6 + llvm/lib/Frontend/Directive/Spelling.cpp | 31 llvm/lib/Frontend/OpenACC/CMakeLists.txt | 2 +- llvm/lib/Frontend/OpenMP/CMakeLists.txt | 1 + llvm/test/TableGen/directive1.td | 34 ++-- llvm/test/TableGen/directive2.td | 24 +-- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 146 +++--- 10 files changed, 212 insertions(+), 97 deletions(-) create mode 100644 llvm/include/llvm/Frontend/Directive/Spelling.h create mode 100644 llvm/lib/Frontend/Directive/CMakeLists.txt create mode 100644 llvm/lib/Frontend/Directive/Spelling.cpp diff --git a/llvm/include/llvm/Frontend/Directive/Spelling.h b/llvm/include/llvm/Frontend/Directive/Spelling.h new file mode 100644 index 0..3ba0ae2296535 --- /dev/null +++ b/llvm/include/llvm/Frontend/Directive/Spelling.h @@ -0,0 +1,39 @@ +//===-- Spelling.h C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef LLVM_FRONTEND_DIRECTIVE_SPELLING_H +#define LLVM_FRONTEND_DIRECTIVE_SPELLING_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace llvm::directive { + +struct VersionRange { + static constexpr int MaxValue = std::numeric_limits::max(); + int Min = 1; + int Max = MaxValue; +}; + +inline bool operator<(const VersionRange &A, const VersionRange &B) { + if (A.Min != B.Min) +return A.Min < B.Min; + return A.Max < B.Max; +} + +struct Spelling { + StringRef Name; + VersionRange Versions; +}; + +StringRef FindName(llvm::iterator_range, unsigned Version); + +} // namespace llvm::directive + +#endif // LLVM_FRONTEND_DIRECTIVE_SPELLING_H diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index 1235b7638e761..c7d7460087723 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/Directive/Spelling.h" #include "llvm/Support/MathExtras.h" #include "llvm/TableGen/Record.h" #include @@ -113,29 +114,19 @@ class Versioned { constexpr static int IntWidth = 8 * sizeof(int); }; -// Range of specification versions: [Min, Max] -// Default value: all possible versions. -// This is the same structure as the one emitted into the generated sources. -#define STRUCT_VERSION_RANGE \ - struct VersionRange { \ -int Min = 1; \ -int Max = INT_MAX; \ - } - -STRUCT_VERSION_RANGE; - class Spelling : public Versioned { public: - using Value = std::pair; + using Value = llvm::directive::Spelling; Spelling(const Record *Def) : Def(Def) {} StringRef getText() const { return Def->getValueAsString("spelling"); } - VersionRange getVersions() const { -return VersionRange{getMinVersion(Def), getMaxVersion(Def)}; + llvm::directive::VersionRange getVersions() const { +return llvm::directive::VersionRange{getMinVersion(Def), +
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
@@ -77,6 +77,19 @@ static std::string getIdentifierName(const Record *Rec, StringRef Prefix) { return Prefix.str() + BaseRecord(Rec).getFormattedName(); } +using RecordWithSpelling = std::pair; + +static std::vector +getSpellings(ArrayRef Records) { tblah wrote: Is it always safe to construct a `Clause` from a record which is a directive? https://github.com/llvm/llvm-project/pull/141766 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 21ac3ae - Revert "[Clang][LoongArch] Support target attribute for function (#140700)"
Author: Paul Kirth Date: 2025-05-29T10:49:51-07:00 New Revision: 21ac3ae0e4be720474a1eceb287eada6bd07c1f4 URL: https://github.com/llvm/llvm-project/commit/21ac3ae0e4be720474a1eceb287eada6bd07c1f4 DIFF: https://github.com/llvm/llvm-project/commit/21ac3ae0e4be720474a1eceb287eada6bd07c1f4.diff LOG: Revert "[Clang][LoongArch] Support target attribute for function (#140700)" This reverts commit b359422eebbc61f0e0fb03c27ec1a93c818701ee. Added: Modified: clang/lib/Basic/Targets/LoongArch.cpp clang/lib/Basic/Targets/LoongArch.h clang/lib/Sema/SemaDeclAttr.cpp llvm/include/llvm/TargetParser/LoongArchTargetParser.h llvm/lib/TargetParser/LoongArchTargetParser.cpp Removed: clang/test/CodeGen/LoongArch/targetattr.c clang/test/Sema/attr-target-loongarch.c diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 6e5e5a6d1a3e6..f4bcb54bd470d 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -393,73 +393,6 @@ bool LoongArchTargetInfo::handleTargetFeatures( return true; } -enum class AttrFeatureKind { Arch, Tune, NoFeature, Feature }; - -static std::pair -getAttrFeatureTypeAndValue(llvm::StringRef AttrFeature) { - if (auto Split = AttrFeature.split("="); !Split.second.empty()) { -if (Split.first.trim() == "arch") - return {AttrFeatureKind::Arch, Split.second.trim()}; -if (Split.first.trim() == "tune") - return {AttrFeatureKind::Tune, Split.second.trim()}; - } - if (AttrFeature.starts_with("no-")) -return {AttrFeatureKind::NoFeature, AttrFeature.drop_front(3)}; - return {AttrFeatureKind::Feature, AttrFeature}; -} - -ParsedTargetAttr -LoongArchTargetInfo::parseTargetAttr(StringRef Features) const { - ParsedTargetAttr Ret; - if (Features == "default") -return Ret; - SmallVector AttrFeatures; - Features.split(AttrFeatures, ","); - - for (auto &Feature : AttrFeatures) { -auto [Kind, Value] = getAttrFeatureTypeAndValue(Feature.trim()); - -switch (Kind) { -case AttrFeatureKind::Arch: { - if (llvm::LoongArch::isValidArchName(Value) || Value == "la64v1.0" || - Value == "la64v1.1") { -std::vector ArchFeatures; -if (llvm::LoongArch::getArchFeatures(Value, ArchFeatures)) { - Ret.Features.insert(Ret.Features.end(), ArchFeatures.begin(), - ArchFeatures.end()); -} - -if (!Ret.CPU.empty()) - Ret.Duplicate = "arch="; -else if (Value == "la64v1.0" || Value == "la64v1.1") - Ret.CPU = "loongarch64"; -else - Ret.CPU = Value; - } else { -Ret.Features.push_back("!arch=" + Value.str()); - } - break; -} - -case AttrFeatureKind::Tune: - if (!Ret.Tune.empty()) -Ret.Duplicate = "tune="; - else -Ret.Tune = Value; - break; - -case AttrFeatureKind::NoFeature: - Ret.Features.push_back("-" + Value.str()); - break; - -case AttrFeatureKind::Feature: - Ret.Features.push_back("+" + Value.str()); - break; -} - } - return Ret; -} - bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { return llvm::LoongArch::isValidCPUName(Name); } @@ -468,7 +401,3 @@ void LoongArchTargetInfo::fillValidCPUList( SmallVectorImpl &Values) const { llvm::LoongArch::fillValidCPUList(Values); } - -bool LoongArchTargetInfo::isValidFeatureName(StringRef Name) const { - return llvm::LoongArch::isValidFeatureName(Name); -} diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index a83bb925bc310..4c7b53abfef9b 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -101,9 +101,6 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; - ParsedTargetAttr parseTargetAttr(StringRef Str) const override; - bool supportsTargetAttributeTune() const override { return true; } - bool initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, @@ -113,7 +110,6 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool isValidFeatureName(StringRef Name) const override; }; class LLVM_LIBRARY_VISIBILITY LoongArch32TargetInfo diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5bc358ca5fca0..119ba8486b09f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3195,17 +3195,6 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { } } - if (Context.getTargetInfo().getTriple().isLoongArch())
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran edited https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add missing fract test (PR #141985)
arsenm wrote: ### Merge activity * **May 29, 7:50 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141985). https://github.com/llvm/llvm-project/pull/141985 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle other fmin flavors in fract combine (PR #141987)
arsenm wrote: ### Merge activity * **May 29, 7:50 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141987). https://github.com/llvm/llvm-project/pull/141987 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for fract combine with other fmin types (PR #141986)
arsenm wrote: ### Merge activity * **May 29, 7:50 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141986). https://github.com/llvm/llvm-project/pull/141986 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle other fmin flavors in fract combine (PR #141987)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/141987 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point rampitec wrote: Maybe report 1 for packed tid? https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/bogner approved this pull request. LGTM other than one comment about consistently using the `ASSERT_*` vs `EXPECT_*` macros in the tests. https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
@@ -1157,3 +1157,49 @@ TEST(RootSignature, ParseDescriptorTable) { ASSERT_EQ(Range.OffsetInDescriptorsFromTableStart, -1); } } + +TEST(RootSignature, ParseStaticSamplers) { + { +uint8_t Buffer[] = { +0x44, 0x58, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x90, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x52, 0x54, 0x53, 0x30, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, +0xa4, 0x70, 0x9d, 0x3f, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x85, 0xeb, 0x91, 0x40, 0x66, 0x66, 0x0e, 0x41, +0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00}; +DXContainer C = +llvm::cantFail(DXContainer::create(getMemoryBuffer<133>(Buffer))); + +auto MaybeRS = C.getRootSignature(); +ASSERT_TRUE(MaybeRS.has_value()); +const auto &RS = MaybeRS.value(); +ASSERT_EQ(RS.getVersion(), 2u); +ASSERT_EQ(RS.getNumParameters(), 0u); +ASSERT_EQ(RS.getRootParametersOffset(), 0u); +ASSERT_EQ(RS.getNumStaticSamplers(), 1u); +ASSERT_EQ(RS.getStaticSamplersOffset(), 24u); +ASSERT_EQ(RS.getFlags(), 17u); + +auto Sampler = *RS.samplers().begin(); + +ASSERT_EQ(Sampler.Filter, 10u); +ASSERT_EQ(Sampler.AddressU, 1u); +ASSERT_EQ(Sampler.AddressV, 2u); +ASSERT_EQ(Sampler.AddressW, 5u); +EXPECT_FLOAT_EQ(Sampler.MipLODBias, 1.23); +ASSERT_EQ(Sampler.MaxAnisotropy, 20u); +ASSERT_EQ(Sampler.ComparisonFunc, 4u); +ASSERT_EQ(Sampler.BorderColor, 0u); +EXPECT_FLOAT_EQ(Sampler.MinLOD, 4.56); bogner wrote: Shouldn't these be `ASSERT_FLOAT_EQ`? IIUC `ASSERT_*` stops immediately if the condition isn't met and `EXPECT_*` allow us to diagnose multiple issues. I don't think it makes sense to use one for the floating point fields and the other for the rest of the fields. https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
https://github.com/qinkunbao created https://github.com/llvm/llvm-project/pull/142027 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point arsenm wrote: For some reason the calling instruction isn't set in the context where this gets called from, so I can't figure out if it's a kernel or not so I left this for later https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point arsenm wrote: Similarly also can't check if the other work items are disabled https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Qinkun Bao (qinkunbao) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/142027.diff 2 Files Affected: - (modified) clang/include/clang/Basic/NoSanitizeList.h (+2-1) - (modified) clang/lib/Basic/NoSanitizeList.cpp (+11-10) ``diff diff --git a/clang/include/clang/Basic/NoSanitizeList.h b/clang/include/clang/Basic/NoSanitizeList.h index 43415859fcd54..4a546351bb66d 100644 --- a/clang/include/clang/Basic/NoSanitizeList.h +++ b/clang/include/clang/Basic/NoSanitizeList.h @@ -29,7 +29,8 @@ class SanitizerSpecialCaseList; class NoSanitizeList { std::unique_ptr SSCL; SourceManager &SM; - + bool containsPrefix(SanitizerMask Mask,StringRef Prefix, StringRef Name, + StringRef Category = StringRef()) const; public: NoSanitizeList(const std::vector &NoSanitizeListPaths, SourceManager &SM); diff --git a/clang/lib/Basic/NoSanitizeList.cpp b/clang/lib/Basic/NoSanitizeList.cpp index 9f0f1c64995cb..671430dfe6293 100644 --- a/clang/lib/Basic/NoSanitizeList.cpp +++ b/clang/lib/Basic/NoSanitizeList.cpp @@ -27,6 +27,15 @@ NoSanitizeList::NoSanitizeList(const std::vector &NoSanitizePaths, NoSanitizeList::~NoSanitizeList() = default; +bool NoSanitizeList::containsPrefix(SanitizerMask Mask, StringRef Prefix, +StringRef Name, StringRef Category) const { + auto NoSan = SSCL->inSectionBlame(Mask, Prefix, Name, Category); + if (NoSan == llvm::SpecialCaseList::NotFound) +return false; + auto San = SSCL->inSectionBlame(Mask, Prefix, Name, "sanitize"); + return San == llvm::SpecialCaseList::NotFound || NoSan > San; +} + bool NoSanitizeList::containsGlobal(SanitizerMask Mask, StringRef GlobalName, StringRef Category) const { return SSCL->inSection(Mask, "global", GlobalName, Category); @@ -34,11 +43,7 @@ bool NoSanitizeList::containsGlobal(SanitizerMask Mask, StringRef GlobalName, bool NoSanitizeList::containsType(SanitizerMask Mask, StringRef MangledTypeName, StringRef Category) const { - auto NoSan = SSCL->inSectionBlame(Mask, "type", MangledTypeName, Category); - if (NoSan == llvm::SpecialCaseList::NotFound) -return false; - auto San = SSCL->inSectionBlame(Mask, "type", MangledTypeName, "sanitize"); - return San == llvm::SpecialCaseList::NotFound || NoSan > San; + return containsPrefix(Mask, "type", MangledTypeName, Category); } bool NoSanitizeList::containsFunction(SanitizerMask Mask, @@ -48,11 +53,7 @@ bool NoSanitizeList::containsFunction(SanitizerMask Mask, bool NoSanitizeList::containsFile(SanitizerMask Mask, StringRef FileName, StringRef Category) const { - auto NoSan = SSCL->inSectionBlame(Mask, "src", FileName, Category); - if (NoSan == llvm::SpecialCaseList::NotFound) -return false; - auto San = SSCL->inSectionBlame(Mask, "src", FileName, "sanitize"); - return San == llvm::SpecialCaseList::NotFound || NoSan > San; + return containsPrefix(Mask, "src", FileName, Category); } bool NoSanitizeList::containsMainFile(SanitizerMask Mask, StringRef FileName, `` https://github.com/llvm/llvm-project/pull/142027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
https://github.com/qinkunbao updated https://github.com/llvm/llvm-project/pull/142027 >From b4871ccd92c7006fa771d61dfbadeaeeaa2c170d Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Thu, 29 May 2025 19:59:38 + Subject: [PATCH] format. Created using spr 1.3.6 --- clang/include/clang/Basic/NoSanitizeList.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/NoSanitizeList.h b/clang/include/clang/Basic/NoSanitizeList.h index 4a546351bb66d..266dfc0d217cb 100644 --- a/clang/include/clang/Basic/NoSanitizeList.h +++ b/clang/include/clang/Basic/NoSanitizeList.h @@ -29,8 +29,9 @@ class SanitizerSpecialCaseList; class NoSanitizeList { std::unique_ptr SSCL; SourceManager &SM; - bool containsPrefix(SanitizerMask Mask,StringRef Prefix, StringRef Name, + bool containsPrefix(SanitizerMask Mask, StringRef Prefix, StringRef Name, StringRef Category = StringRef()) const; + public: NoSanitizeList(const std::vector &NoSanitizeListPaths, SourceManager &SM); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point rampitec wrote: Ok for now. https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/141947 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
https://github.com/qinkunbao edited https://github.com/llvm/llvm-project/pull/142027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix broken XFAILed test for fat pointer null initializers (PR #142015)
https://github.com/krzysz00 approved this pull request. https://github.com/llvm/llvm-project/pull/142015 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/141766 >From 2ef30aacee4d80c0e4a925aa5ba9416423d10b1b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 27 May 2025 07:55:04 -0500 Subject: [PATCH 1/6] [utils][TableGen] Handle versions on clause/directive spellings In "getDirectiveName(Kind, Version)", return the spelling that corresponds to Version, and in "getDirectiveKindAndVersions(Name)" return the pair {Kind, VersionRange}, where VersionRange contains the minimum and the maximum versions that allow "Name" as a spelling. This applies to clauses as well. In general it applies to classes that have spellings (defined via TableGen class "Spelling"). Given a Kind and a Version, getting the corresponding spelling requires a runtime search (which can fail in a general case). To avoid generating the search function inline, a small additional component of llvm/Frontent was added: LLVMFrontendDirective. The corresponding header file also defines C++ classes "Spelling" and "VersionRange", which are used in TableGen/DirectiveEmitter as well. For background information see https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 --- .../llvm/Frontend/Directive/Spelling.h| 39 + llvm/include/llvm/TableGen/DirectiveEmitter.h | 25 +-- llvm/lib/Frontend/CMakeLists.txt | 1 + llvm/lib/Frontend/Directive/CMakeLists.txt| 6 + llvm/lib/Frontend/Directive/Spelling.cpp | 31 llvm/lib/Frontend/OpenACC/CMakeLists.txt | 2 +- llvm/lib/Frontend/OpenMP/CMakeLists.txt | 1 + llvm/test/TableGen/directive1.td | 34 ++-- llvm/test/TableGen/directive2.td | 24 +-- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 146 +++--- 10 files changed, 212 insertions(+), 97 deletions(-) create mode 100644 llvm/include/llvm/Frontend/Directive/Spelling.h create mode 100644 llvm/lib/Frontend/Directive/CMakeLists.txt create mode 100644 llvm/lib/Frontend/Directive/Spelling.cpp diff --git a/llvm/include/llvm/Frontend/Directive/Spelling.h b/llvm/include/llvm/Frontend/Directive/Spelling.h new file mode 100644 index 0..3ba0ae2296535 --- /dev/null +++ b/llvm/include/llvm/Frontend/Directive/Spelling.h @@ -0,0 +1,39 @@ +//===-- Spelling.h C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef LLVM_FRONTEND_DIRECTIVE_SPELLING_H +#define LLVM_FRONTEND_DIRECTIVE_SPELLING_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace llvm::directive { + +struct VersionRange { + static constexpr int MaxValue = std::numeric_limits::max(); + int Min = 1; + int Max = MaxValue; +}; + +inline bool operator<(const VersionRange &A, const VersionRange &B) { + if (A.Min != B.Min) +return A.Min < B.Min; + return A.Max < B.Max; +} + +struct Spelling { + StringRef Name; + VersionRange Versions; +}; + +StringRef FindName(llvm::iterator_range, unsigned Version); + +} // namespace llvm::directive + +#endif // LLVM_FRONTEND_DIRECTIVE_SPELLING_H diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index 1235b7638e761..c7d7460087723 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/Directive/Spelling.h" #include "llvm/Support/MathExtras.h" #include "llvm/TableGen/Record.h" #include @@ -113,29 +114,19 @@ class Versioned { constexpr static int IntWidth = 8 * sizeof(int); }; -// Range of specification versions: [Min, Max] -// Default value: all possible versions. -// This is the same structure as the one emitted into the generated sources. -#define STRUCT_VERSION_RANGE \ - struct VersionRange { \ -int Min = 1; \ -int Max = INT_MAX; \ - } - -STRUCT_VERSION_RANGE; - class Spelling : public Versioned { public: - using Value = std::pair; + using Value = llvm::directive::Spelling; Spelling(const Record *Def) : Def(Def) {} StringRef getText() const { return Def->getValueAsString("spelling"); } - VersionRange getVersions() const { -return VersionRange{getMinVersion(Def), getMaxVersion(Def)}; + llvm::directive::VersionRange getVersions() const { +return llvm::directive::VersionRange{getMinVersion(Def), +
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/141948 None >From f69a1ebaa0ef1988c6ff36ac84e4e44efc4892a0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2025 15:32:59 +0200 Subject: [PATCH] AMDGPU: Report special input intrinsics as free --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 23 +++- .../AMDGPU/special-argument-intrinsics.ll | 56 +-- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 357f8c5cfcd02..f9dbfb18ab7ee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point +// there may be a bit instruction. +return 0; + case Intrinsic::amdgcn_workgroup_id_x: + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::amdgcn_lds_kernel_id: + case Intrinsic::amdgcn_dispatch_ptr: + case Intrinsic::amdgcn_dispatch_id: + case Intrinsic::amdgcn_implicitarg_ptr: + case Intrinsic::amdgcn_queue_ptr: +// Read from an argument register. return 0; + default: +break; + } if (!intrinsicHasPackedVectorBenefit(ICA.getID())) return BaseT::getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll index ea045e04310be..00dbcff0a021f 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll @@ -7,11 +7,11 @@ define i32 @workitem_id_x() { ; ALL-LABEL: 'workitem_id_x' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result ; ; SIZE-LABEL: 'workitem_id_x' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result ; %result = call i32 @llvm.amdgcn.workitem.id.x() @@ -20,12 +20,12 @@ define i32 @workitem_id_x() { define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { ; ALL-LABEL: 'kernel_workitem_id_x' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'kernel_workitem_id_x' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -36,11 +36,11 @@ define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { define i32 @workitem_id_y() { ; ALL-LABEL: 'workitem_id_y' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result ; ; SIZE-LABEL: 'workitem_id_y' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() ; SIZE-NE
[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141947 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141943 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/141944 The real implementation is 1 real instruction plus a constant materialize. Call that a 1, it's not a real f64 operation. >From 19ab42a4fdba866aa40da8e2cc24967a72f6f482 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2025 15:20:50 +0200 Subject: [PATCH] AMDGPU: Reduce cost of f64 copysign The real implementation is 1 real instruction plus a constant materialize. Call that a 1, it's not a real f64 operation. --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 12 --- .../Analysis/CostModel/AMDGPU/copysign.ll | 32 +-- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0dbaf7c548f89..c1ccc8f6798a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; - if (SLT == MVT::f64) -return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, switch (ICA.getID()) { case Intrinsic::fma: case Intrinsic::fmuladd: +if (SLT == MVT::f64) { + InstRate = get64BitInstrCost(CostKind); + break; +} + if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16) InstRate = getFullRateInstrCost(); else { @@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::copysign: return NElts * getFullRateInstrCost(); case Intrinsic::canonicalize: { -assert(SLT != MVT::f64); -InstRate = getFullRateInstrCost(); +InstRate = +SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); break; } case Intrinsic::uadd_sat: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll index 334bb341a3c3e..5b042a8a04603 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll @@ -245,25 +245,25 @@ define void @copysign_bf16() { define void @copysign_f64() { ; ALL-LABEL: 'copysign_f64' -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/141945 None >From dc0d24948146621e6ba5e39c21ea7f4494a14ed1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2025 11:16:22 +0200 Subject: [PATCH] AMDGPU: Move fpenvIEEEMode into TTI --- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 28 ++- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 17 +++ .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 7 + 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5f6ab24182d5e..b0774385547c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, return maxnum(Src0, Src1); } -enum class KnownIEEEMode { Unknown, On, Off }; - -/// Return KnownIEEEMode::On if we know if the use context can assume -/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume -/// "amdgpu-ieee"="false". -static KnownIEEEMode fpenvIEEEMode(const Instruction &I, - const GCNSubtarget &ST) { - if (!ST.hasIEEEMode()) // Only mode on gfx12 -return KnownIEEEMode::On; - - const Function *F = I.getFunction(); - if (!F) -return KnownIEEEMode::Unknown; - - Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); - if (IEEEAttr.isValid()) -return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; - - return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off - : KnownIEEEMode::On; -} - // Check if a value can be converted to a 16-bit value without losing // precision. // The value is expected to be either a float (IsFloat = true) or an unsigned @@ -1003,7 +981,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // TODO: Also can fold to 2 operands with infinities. if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) || isa(Src0)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc0 && ConstSrc0->isSignaling()) @@ -1018,7 +996,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) || isa(Src1)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc1 && ConstSrc1->isSignaling()) @@ -1034,7 +1012,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) || isa(Src2)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: if (ConstSrc2 && ConstSrc2->isSignaling()) { auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index c1ccc8f6798a6..563c46f57dfa5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first}); LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } + +GCNTTIImpl::KnownIEEEMode +GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const { + if (!ST->hasIEEEMode()) // Only mode on gfx12 +return KnownIEEEMode::On; + + const Function *F = I.getFunction(); + if (!F) +return KnownIEEEMode::Unknown; + + Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); + if (IEEEAttr.isValid()) +return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; + + return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off + : KnownIEEEMode::On; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index ec298c7e9631a..0fae301abf532 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -281,6 +281,13 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + + enum class KnownIEEEMode { Unknown, On, Off }; + + /// Return KnownIEEEMode::On if we know if the use context can assume + /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume + /// "amdgpu-ieee"="fal
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141943 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141944 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/141947 None >From 4cd96adeae2b54b0894ca5ae77b4bcb4f1a19a23 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2025 15:29:57 +0200 Subject: [PATCH] AMDGPU: Add baseline cost model tests for special argument intrinsics --- .../AMDGPU/special-argument-intrinsics.ll | 202 ++ 1 file changed, 202 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll new file mode 100644 index 0..ea045e04310be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,UNPACKEDID %s +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=ALL,PACKEDID %s + +; RUN: opt -passes='print' -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SIZE,SIZE-UNPACKEDID %s +; RUN: opt -passes='print' -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=SIZE,SIZE-PACKEDID %s + +define i32 @workitem_id_x() { +; ALL-LABEL: 'workitem_id_x' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_x' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result +; + %result = call i32 @llvm.amdgcn.workitem.id.x() + ret i32 %result +} + +define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { +; ALL-LABEL: 'kernel_workitem_id_x' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SIZE-LABEL: 'kernel_workitem_id_x' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %result = call i32 @llvm.amdgcn.workitem.id.x() + store i32 %result, ptr addrspace(1) %ptr + ret void +} + +define i32 @workitem_id_y() { +; ALL-LABEL: 'workitem_id_y' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_y' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result +; + %result = call i32 @llvm.amdgcn.workitem.id.y() + ret i32 %result +} + +define amdgpu_kernel void @kernel_workitem_id_y(ptr addrspace(1) %ptr) { +; ALL-LABEL: 'kernel_workitem_id_y' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SIZE-LABEL: 'kernel_workitem_id_y' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %result = call i32 @llvm.amdgcn.workitem.id.y() + store i32 %result, ptr addrspace(1) %ptr + ret void +} + +define i32 @workitem_id_z() { +; ALL-LABEL: 'workitem_id_z' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_z' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workite
[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141946 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#141948** https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/141948?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#141947** https://app.graphite.dev/github/pr/llvm/llvm-project/141947?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141946** https://app.graphite.dev/github/pr/llvm/llvm-project/141946?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141945** https://app.graphite.dev/github/pr/llvm/llvm-project/141945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141944** https://app.graphite.dev/github/pr/llvm/llvm-project/141944?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141943** https://app.graphite.dev/github/pr/llvm/llvm-project/141943?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141904** https://app.graphite.dev/github/pr/llvm/llvm-project/141904?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141903** https://app.graphite.dev/github/pr/llvm/llvm-project/141903?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Unify converting names to upper-camel case (PR #141762)
@@ -172,26 +197,13 @@ class Directive : public BaseRecord { // Clang uses a different format for names of its directives enum. std::string getClangAccSpelling() const { -std::string Name = Def->getValueAsString("name").str(); +StringRef Name = Def->getValueAsString("name"); // Clang calls the 'unknown' value 'invalid'. if (Name == "unknown") return "Invalid"; -// Clang entries all start with a capital letter, so apply that. -Name[0] = std::toupper(Name[0]); -// Additionally, spaces/underscores are handled by capitalizing the next -// letter of the name and removing the space/underscore. -for (unsigned I = 0; I < Name.size(); ++I) { - if (Name[I] == ' ' || Name[I] == '_') { -Name.erase(I, 1); -assert(Name[I] != ' ' && Name[I] != '_' && - "No double spaces/underscores"); -Name[I] = std::toupper(Name[I]); - } -} - -return Name; +return BaseRecord::getUpperCamelName(Name, " _"); mrkajetanp wrote: Is this extra space in the separator string intentional? https://github.com/llvm/llvm-project/pull/141762 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL][RootSignature] Implement `ResourceRange` as an `IntervalMap` (PR #140957)
@@ -198,6 +199,61 @@ class MetadataBuilder { SmallVector GeneratedMetadata; }; +// RangeInfo holds the information to correctly construct a ResourceRange +// and retains this information to be used for displaying a better diagnostic +struct RangeInfo { + const static uint32_t Unbounded = static_cast(-1); + + uint32_t LowerBound; + uint32_t UpperBound; +}; + +class ResourceRange { +public: + using IMap = llvm::IntervalMap>; bogner wrote: We're not entirely consistent about this, but we often use the `FooT` naming convention for type aliases. I think it would be good to do that here (either `MapT` or `IntervalMapT` I think - the "I" isn't the most obvious/useful abbreviation in this case). https://github.com/llvm/llvm-project/pull/140957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL][RootSignature] Implement `ResourceRange` as an `IntervalMap` (PR #140957)
https://github.com/bogner approved this pull request. https://github.com/llvm/llvm-project/pull/140957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL][RootSignature] Implement `ResourceRange` as an `IntervalMap` (PR #140957)
@@ -198,6 +199,61 @@ class MetadataBuilder { SmallVector GeneratedMetadata; }; +// RangeInfo holds the information to correctly construct a ResourceRange +// and retains this information to be used for displaying a better diagnostic +struct RangeInfo { + const static uint32_t Unbounded = static_cast(-1); bogner wrote: I think `~0U` (if you want to be concise) or `std::numeric_limits::max()` are very slightly clearer here. https://github.com/llvm/llvm-project/pull/140957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Simplify `createWriteOrMaskedWrite` (NFC) (PR #141567)
https://github.com/rengolin approved this pull request. Nice clean up, thanks! https://github.com/llvm/llvm-project/pull/141567 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)
@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str()); } +namespace { + +// A resource range overlaps with another resource range if they have: +// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler) +// - equivalent resource space +// - overlapping visbility +class ResourceRanges { +public: + // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum + using KeyT = uint64_t; inbelic wrote: I think I originally had it because I was using it in a switch statement in some prototype. But this should be fine to create a specialization as an `llvm::DenseMap` key https://github.com/llvm/llvm-project/pull/140962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix broken XFAILed test for fat pointer null initializers (PR #142015)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/142015?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#142015** https://app.graphite.dev/github/pr/llvm/llvm-project/142015?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142015?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#142014** https://app.graphite.dev/github/pr/llvm/llvm-project/142014?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/142015 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix broken XFAILed test for fat pointer null initializers (PR #142015)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/142015 This was failing on the buffer fat pointer lowering error in the addrspace(7) case, not the expected asm printer breakage. Also remove the attempt at FileChecking the result, since that is dependent on the actual fix and we want the unexpected pass whenever the assert is fixed. >From 53f7285da881898142c6ccbae9a0f0e009ab3608 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 May 2025 21:10:00 +0200 Subject: [PATCH] AMDGPU: Fix broken XFAILed test for fat pointer null initializers This was failing on the buffer fat pointer lowering error in the addrspace(7) case, not the expected asm printer breakage. Also remove the attempt at FileChecking the result, since that is dependent on the actual fix and we want the unexpected pass whenever the assert is fixed. --- llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll index 6556f07c73504..e039225dba3e7 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll @@ -1,6 +1,6 @@ ; XFAIL: * -; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s -; RUN: llc < %s -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s +; REQUIRES: asserts +; RUN: llc -mtriple=amdgcn-- < %s ; This is a temporary xfail, as the assembly printer is broken when dealing with ; lowerConstant() trying to return a value of size greater than 8 bytes. @@ -9,10 +9,10 @@ ; The exact form of the GCN output depends on how the printer gets fixed. ; GCN-NEXT: .zeroes 5 ; R600-NEXT: .long 0 -@nullptr7 = global ptr addrspace(7) addrspacecast (ptr null to ptr addrspace(7)) +; @nullptr7 = global ptr addrspace(7) addrspacecast (ptr null to ptr addrspace(7)) ; CHECK-LABEL: nullptr8: ; The exact form of the GCN output depends on how the printer gets fixed. ; GCN-NEXT: .zeroes 4 ; R600-NEXT: .long 0 -@nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(7)) +@nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(8)) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/139963 >From bc246e6523f726f9a9a6c2563a58eec39888aadc Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Wed, 7 May 2025 00:13:27 + Subject: [PATCH 1/4] adding static samplers --- llvm/include/llvm/BinaryFormat/DXContainer.h | 51 + .../BinaryFormat/DXContainerConstants.def | 73 +++ .../llvm/MC/DXContainerRootSignature.h| 1 + llvm/include/llvm/Object/DXContainer.h| 5 ++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 26 +++ llvm/lib/MC/DXContainerRootSignature.cpp | 20 - llvm/lib/Object/DXContainer.cpp | 4 + llvm/lib/ObjectYAML/DXContainerYAML.cpp | 37 ++ 8 files changed, 216 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 98e30b1b6a8af..c2b55c2c820aa 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -18,6 +18,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/TargetParser/Triple.h" +#include #include namespace llvm { @@ -600,6 +601,25 @@ inline bool isValidShaderVisibility(uint32_t V) { return false; } +#define STATIC_SAMPLER_FILTER(Val, Enum) Enum = Val, +enum class StaticSamplerFilter : uint32_t { +#include "DXContainerConstants.def" +}; + +#define TEXTURE_ADDRESS_MODE(Val, Enum) Enum = Val, +enum class TextureAddressMode : uint32_t { +#include "DXContainerConstants.def" +}; + +#define COMPARISON_FUNCTION(Val, Enum) Enum = Val, +enum class ComparisonFunction : uint32_t { +#include "DXContainerConstants.def" +}; + +#define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, +enum class StaticBorderColor : uint32_t { +#include "DXContainerConstants.def" +}; namespace v0 { struct RootSignatureHeader { @@ -667,6 +687,37 @@ struct DescriptorRange { sys::swapByteOrder(OffsetInDescriptorsFromTableStart); } }; + +struct StaticSampler { + StaticSamplerFilter Filter; + TextureAddressMode AddressU; + TextureAddressMode AddressV; + TextureAddressMode AddressW; + float MipLODBias; + uint32_t MaxAnisotropy; + ComparisonFunction ComparisonFunc; + StaticBorderColor BorderColor; + float MinLOD; + float MaxLOD; + uint32_t ShaderRegister; + uint32_t RegisterSpace; + ShaderVisibility ShaderVisibility; + void swapBytes() { +sys::swapByteOrder(Filter); +sys::swapByteOrder(AddressU); +sys::swapByteOrder(AddressV); +sys::swapByteOrder(AddressW); +sys::swapByteOrder(MipLODBias); +sys::swapByteOrder(MaxAnisotropy); +sys::swapByteOrder(ComparisonFunc); +sys::swapByteOrder(BorderColor); +sys::swapByteOrder(MinLOD); +sys::swapByteOrder(MaxLOD); +sys::swapByteOrder(ShaderRegister); +sys::swapByteOrder(RegisterSpace); +sys::swapByteOrder(ShaderVisibility); + }; +}; } // namespace v0 namespace v1 { diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 5fe7e7c321a33..f25c464d5f9cc 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -129,6 +129,79 @@ SHADER_VISIBILITY(7, Mesh) #undef SHADER_VISIBILITY #endif // SHADER_VISIBILITY +#ifdef STATIC_SAMPLER_FILTER + +STATIC_SAMPLER_FILTER(0, MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x1, MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x4, MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x5, MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x10, MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x11, MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x14, MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x15, MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x55, ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x80, COMPARISON_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x81, COMPARISON_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x84, COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x85, COMPARISON_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x90, COMPARISON_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x91, COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x94, COMPARISON_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x95, COMPARISON_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0xd5, COMPARISON_ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x100, MINIMUM_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x101, MINIMUM_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x104, MINIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x105, MINIMUM_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x110, MINIMUM_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x111, MINIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x114, MINIMUM_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x115, MINIMUM_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x155, MINIMUM_A
[llvm-branch-commits] [llvm] AMDGPU: Fix broken XFAILed test for fat pointer null initializers (PR #142015)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This was failing on the buffer fat pointer lowering error in the addrspace(7) case, not the expected asm printer breakage. Also remove the attempt at FileChecking the result, since that is dependent on the actual fix and we want the unexpected pass whenever the assert is fixed. --- Full diff: https://github.com/llvm/llvm-project/pull/142015.diff 1 Files Affected: - (modified) llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll (+4-4) ``diff diff --git a/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll index 6556f07c73504..e039225dba3e7 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr-long-address-spaces.ll @@ -1,6 +1,6 @@ ; XFAIL: * -; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s -; RUN: llc < %s -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s +; REQUIRES: asserts +; RUN: llc -mtriple=amdgcn-- < %s ; This is a temporary xfail, as the assembly printer is broken when dealing with ; lowerConstant() trying to return a value of size greater than 8 bytes. @@ -9,10 +9,10 @@ ; The exact form of the GCN output depends on how the printer gets fixed. ; GCN-NEXT: .zeroes 5 ; R600-NEXT: .long 0 -@nullptr7 = global ptr addrspace(7) addrspacecast (ptr null to ptr addrspace(7)) +; @nullptr7 = global ptr addrspace(7) addrspacecast (ptr null to ptr addrspace(7)) ; CHECK-LABEL: nullptr8: ; The exact form of the GCN output depends on how the printer gets fixed. ; GCN-NEXT: .zeroes 4 ; R600-NEXT: .long 0 -@nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(7)) +@nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(8)) `` https://github.com/llvm/llvm-project/pull/142015 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix broken XFAILed test for fat pointer null initializers (PR #142015)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/142015 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/139963 >From bc246e6523f726f9a9a6c2563a58eec39888aadc Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Wed, 7 May 2025 00:13:27 + Subject: [PATCH 1/5] adding static samplers --- llvm/include/llvm/BinaryFormat/DXContainer.h | 51 + .../BinaryFormat/DXContainerConstants.def | 73 +++ .../llvm/MC/DXContainerRootSignature.h| 1 + llvm/include/llvm/Object/DXContainer.h| 5 ++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 26 +++ llvm/lib/MC/DXContainerRootSignature.cpp | 20 - llvm/lib/Object/DXContainer.cpp | 4 + llvm/lib/ObjectYAML/DXContainerYAML.cpp | 37 ++ 8 files changed, 216 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 98e30b1b6a8af..c2b55c2c820aa 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -18,6 +18,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/TargetParser/Triple.h" +#include #include namespace llvm { @@ -600,6 +601,25 @@ inline bool isValidShaderVisibility(uint32_t V) { return false; } +#define STATIC_SAMPLER_FILTER(Val, Enum) Enum = Val, +enum class StaticSamplerFilter : uint32_t { +#include "DXContainerConstants.def" +}; + +#define TEXTURE_ADDRESS_MODE(Val, Enum) Enum = Val, +enum class TextureAddressMode : uint32_t { +#include "DXContainerConstants.def" +}; + +#define COMPARISON_FUNCTION(Val, Enum) Enum = Val, +enum class ComparisonFunction : uint32_t { +#include "DXContainerConstants.def" +}; + +#define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, +enum class StaticBorderColor : uint32_t { +#include "DXContainerConstants.def" +}; namespace v0 { struct RootSignatureHeader { @@ -667,6 +687,37 @@ struct DescriptorRange { sys::swapByteOrder(OffsetInDescriptorsFromTableStart); } }; + +struct StaticSampler { + StaticSamplerFilter Filter; + TextureAddressMode AddressU; + TextureAddressMode AddressV; + TextureAddressMode AddressW; + float MipLODBias; + uint32_t MaxAnisotropy; + ComparisonFunction ComparisonFunc; + StaticBorderColor BorderColor; + float MinLOD; + float MaxLOD; + uint32_t ShaderRegister; + uint32_t RegisterSpace; + ShaderVisibility ShaderVisibility; + void swapBytes() { +sys::swapByteOrder(Filter); +sys::swapByteOrder(AddressU); +sys::swapByteOrder(AddressV); +sys::swapByteOrder(AddressW); +sys::swapByteOrder(MipLODBias); +sys::swapByteOrder(MaxAnisotropy); +sys::swapByteOrder(ComparisonFunc); +sys::swapByteOrder(BorderColor); +sys::swapByteOrder(MinLOD); +sys::swapByteOrder(MaxLOD); +sys::swapByteOrder(ShaderRegister); +sys::swapByteOrder(RegisterSpace); +sys::swapByteOrder(ShaderVisibility); + }; +}; } // namespace v0 namespace v1 { diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 5fe7e7c321a33..f25c464d5f9cc 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -129,6 +129,79 @@ SHADER_VISIBILITY(7, Mesh) #undef SHADER_VISIBILITY #endif // SHADER_VISIBILITY +#ifdef STATIC_SAMPLER_FILTER + +STATIC_SAMPLER_FILTER(0, MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x1, MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x4, MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x5, MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x10, MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x11, MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x14, MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x15, MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x55, ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x80, COMPARISON_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x81, COMPARISON_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x84, COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x85, COMPARISON_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x90, COMPARISON_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x91, COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x94, COMPARISON_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x95, COMPARISON_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0xd5, COMPARISON_ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x100, MINIMUM_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x101, MINIMUM_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x104, MINIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x105, MINIMUM_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x110, MINIMUM_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x111, MINIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x114, MINIMUM_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x115, MINIMUM_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x155, MINIMUM_A
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/139963 >From bc246e6523f726f9a9a6c2563a58eec39888aadc Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Wed, 7 May 2025 00:13:27 + Subject: [PATCH 1/6] adding static samplers --- llvm/include/llvm/BinaryFormat/DXContainer.h | 51 + .../BinaryFormat/DXContainerConstants.def | 73 +++ .../llvm/MC/DXContainerRootSignature.h| 1 + llvm/include/llvm/Object/DXContainer.h| 5 ++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 26 +++ llvm/lib/MC/DXContainerRootSignature.cpp | 20 - llvm/lib/Object/DXContainer.cpp | 4 + llvm/lib/ObjectYAML/DXContainerYAML.cpp | 37 ++ 8 files changed, 216 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 98e30b1b6a8af..c2b55c2c820aa 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -18,6 +18,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/TargetParser/Triple.h" +#include #include namespace llvm { @@ -600,6 +601,25 @@ inline bool isValidShaderVisibility(uint32_t V) { return false; } +#define STATIC_SAMPLER_FILTER(Val, Enum) Enum = Val, +enum class StaticSamplerFilter : uint32_t { +#include "DXContainerConstants.def" +}; + +#define TEXTURE_ADDRESS_MODE(Val, Enum) Enum = Val, +enum class TextureAddressMode : uint32_t { +#include "DXContainerConstants.def" +}; + +#define COMPARISON_FUNCTION(Val, Enum) Enum = Val, +enum class ComparisonFunction : uint32_t { +#include "DXContainerConstants.def" +}; + +#define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, +enum class StaticBorderColor : uint32_t { +#include "DXContainerConstants.def" +}; namespace v0 { struct RootSignatureHeader { @@ -667,6 +687,37 @@ struct DescriptorRange { sys::swapByteOrder(OffsetInDescriptorsFromTableStart); } }; + +struct StaticSampler { + StaticSamplerFilter Filter; + TextureAddressMode AddressU; + TextureAddressMode AddressV; + TextureAddressMode AddressW; + float MipLODBias; + uint32_t MaxAnisotropy; + ComparisonFunction ComparisonFunc; + StaticBorderColor BorderColor; + float MinLOD; + float MaxLOD; + uint32_t ShaderRegister; + uint32_t RegisterSpace; + ShaderVisibility ShaderVisibility; + void swapBytes() { +sys::swapByteOrder(Filter); +sys::swapByteOrder(AddressU); +sys::swapByteOrder(AddressV); +sys::swapByteOrder(AddressW); +sys::swapByteOrder(MipLODBias); +sys::swapByteOrder(MaxAnisotropy); +sys::swapByteOrder(ComparisonFunc); +sys::swapByteOrder(BorderColor); +sys::swapByteOrder(MinLOD); +sys::swapByteOrder(MaxLOD); +sys::swapByteOrder(ShaderRegister); +sys::swapByteOrder(RegisterSpace); +sys::swapByteOrder(ShaderVisibility); + }; +}; } // namespace v0 namespace v1 { diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 5fe7e7c321a33..f25c464d5f9cc 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -129,6 +129,79 @@ SHADER_VISIBILITY(7, Mesh) #undef SHADER_VISIBILITY #endif // SHADER_VISIBILITY +#ifdef STATIC_SAMPLER_FILTER + +STATIC_SAMPLER_FILTER(0, MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x1, MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x4, MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x5, MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x10, MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x11, MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x14, MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x15, MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x55, ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x80, COMPARISON_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x81, COMPARISON_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x84, COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x85, COMPARISON_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x90, COMPARISON_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x91, COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x94, COMPARISON_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x95, COMPARISON_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0xd5, COMPARISON_ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x100, MINIMUM_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x101, MINIMUM_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x104, MINIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x105, MINIMUM_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x110, MINIMUM_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x111, MINIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x114, MINIMUM_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x115, MINIMUM_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x155, MINIMUM_A
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran unassigned https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)
@@ -973,6 +1076,8 @@ void SemaHLSL::handleRootSignatureAttr(Decl *D, const ParsedAttr &AL) { if (auto *SignatureDecl = dyn_cast(R.getFoundDecl())) { // Perform validation of constructs here + if (handleRootSignatureDecl(SignatureDecl, AL.getLoc())) +return; inbelic wrote: I would like feedback here. This will actually be invoked for each unique `RootSignatureAttr` and not for each unique `RootSignatureDecl`. So if multiple `RootSignatureAttr`s reference the same decl then it will be run redundant times. Is there a sema "pass" of sorts that will just run on the AST declarations? This would probably be better hooked in there. Otherwise, we could implement a simple set of Decl * to track which ones have already be validated? https://github.com/llvm/llvm-project/pull/140962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)
@@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str()); } +namespace { + +// A resource range overlaps with another resource range if they have: +// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler) +// - equivalent resource space +// - overlapping visbility +class ResourceRanges { +public: + // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum + using KeyT = uint64_t; + + static const unsigned NumVisEnums = inbelic wrote: I guess since this is could be `size_t` to best describe that is an upper bound of a range. Regarding inconsistencies, they were copied over from DXC, since they are spread out throughout the file, I will create a clean-up pr to audit the uses everywhere in `HLSLRootSignature.h` https://github.com/llvm/llvm-project/pull/140962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Handle direct loads to LDS in memory model (PR #142018)
https://github.com/kerbowa created https://github.com/llvm/llvm-project/pull/142018 Add additional waitcnt insertion to ensure proper ordering between LDS operations and direct loads from global memory to LDS on pre-GFX10 hardware. Direct LDS loads perform both a global memory load and an LDS store, which can be reordered with respect to other LDS operations without explicit synchronization. This can cause ordering violations even within a single thread. The change conservatively inserts vmcnt(0) waits for all sync scopes when the LDS address space is involved. Future optimizations in SIInsertWaitcnts can relax this to only wait for outstanding direct LDS loads rather than all vmcnt events. This change only affects LDS address space synchronization and preserves existing cross-address space ordering behavior. >From c5c5225accd5dbc32cc62e64ae63bb00f5632a1c Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Thu, 29 May 2025 10:28:16 -0700 Subject: [PATCH] [AMDGPU] Handle direct loads to LDS in memory model Add additional waitcnt insertion to ensure proper ordering between LDS operations and direct loads from global memory to LDS on pre-GFX10 hardware. Direct LDS loads perform both a global memory load and an LDS store, which can be reordered with respect to other LDS operations without explicit synchronization. This can cause ordering violations even within a single thread. The change conservatively inserts vmcnt(0) waits for all sync scopes when the LDS address space is involved. Future optimizations in SIInsertWaitcnts can relax this to only wait for outstanding direct LDS loads rather than all vmcnt events. This change only affects LDS address space synchronization and preserves existing cross-address space ordering behavior. --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 17 .../memory-legalizer-atomic-fence.ll | 80 +++ .../CodeGen/AMDGPU/branch-condition-and.ll| 4 +- .../CodeGen/AMDGPU/indirect-addressing-si.ll | 2 + .../kernel-vgpr-spill-mubuf-with-voffset.ll | 1 + 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 56fec409d11ae..7624bcfe3da0e 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1084,6 +1084,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, bool VMCnt = false; bool LGKMCnt = false; + bool DirectLDSWait = false; if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) != SIAtomicAddrSpace::NONE) { @@ -1104,6 +1105,10 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, } if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) { +// Wait for direct loads to LDS from global memory to ensure that +// LDS operations cannot be reordered with respect to global memory +// operations. +DirectLDSWait = true; switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: @@ -1149,6 +1154,18 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, } } + // Conservatively wait for vmcnt(0) to ensure that LDS operations and direct + // LDS loads from global memory cannot be reordered with respect to each other. + // This waitcnt can be safely optimized to wait for a higher vmcnt based on + // the number of outstanding direct LDS loads. + if (DirectLDSWait) { +unsigned WaitCntImmediate = AMDGPU::encodeWaitcnt( +IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); +BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_DIRECT_LDS_LOAD_soft)) +.addImm(WaitCntImmediate); +Changed = true; + } + if (VMCnt || LGKMCnt) { unsigned WaitCntImmediate = AMDGPU::encodeWaitcnt(IV, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 66037615f0ba0..7f197b3580042 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -13,12 +13,14 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; GFX6-LABEL: name: system_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: S_WAITCNT_DIRECT_LDS_LOAD_soft 3952 ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: S_WAITCNT_DIRECT_LDS_LOAD_soft 3952 ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 @@ -62,11 +64,13 @@ entry: define amdgpu_kernel void @system_one_as_release() #0 { ; GFX6-LABEL: name: system_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: S_WAITCNT_DIRECT_LDS_LOAD_soft 3952
[llvm-branch-commits] [llvm] [AMDGPU] Handle direct loads to LDS in memory model (PR #142018)
kerbowa wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/142018?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#142018** https://app.graphite.dev/github/pr/llvm/llvm-project/142018?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142018?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138802** https://app.graphite.dev/github/pr/llvm/llvm-project/138802?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/142018 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Handle direct loads to LDS in memory model (PR #142018)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 7624bcfe3..520112367 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1155,9 +1155,9 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, } // Conservatively wait for vmcnt(0) to ensure that LDS operations and direct - // LDS loads from global memory cannot be reordered with respect to each other. - // This waitcnt can be safely optimized to wait for a higher vmcnt based on - // the number of outstanding direct LDS loads. + // LDS loads from global memory cannot be reordered with respect to each + // other. This waitcnt can be safely optimized to wait for a higher vmcnt + // based on the number of outstanding direct LDS loads. if (DirectLDSWait) { unsigned WaitCntImmediate = AMDGPU::encodeWaitcnt( IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); `` https://github.com/llvm/llvm-project/pull/142018 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Serialization: support hashing null template arguments (PR #141957)
https://github.com/zyn0217 approved this pull request. https://github.com/llvm/llvm-project/pull/141957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141948 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141947 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add missing fract test (PR #141985)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/141985 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][EmbedBitcodePass] Prevent modifying the module with ThinLTO (PR #139999)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/13 >From 5205f4abb2379b11b2a2be075c262dea8d79c889 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 14 May 2025 20:47:26 -0700 Subject: [PATCH] [llvm][EmbedBitcodePass] Prevent modifying the module with ThinLTO Since ThinLTOBitcodeWriterPass handles many things for CFI and WPD, like updating vtable linkage, we need to prevent those changes from persisting in the non-LTO object code we will compile under FatLTO. The only non-invasive way to do that is to clone the module when serializing the module in ThinLTOBitcodeWriterPass. We may be able to avoid cloning in the future with additional infrastructure to restore the IR to its original state. Fixes #139440 --- llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp | 6 +- llvm/test/Transforms/EmbedBitcode/embed-wpd.ll | 7 --- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp index 73f567734a91b..5e8b2a4e3d842 100644 --- a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp +++ b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -33,8 +34,11 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) { std::string Data; raw_string_ostream OS(Data); + // Clone the module with Thin LTO, since ThinLTOBitcodeWriterPass changes + // vtable linkage that would break the non-lto object code for FatLTO. if (IsThinLTO) -ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM); +ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr) +.run(*llvm::CloneModule(M), AM); else BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary) .run(M, AM); diff --git a/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll b/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll index f1f7712f54039..54931be42b4eb 100644 --- a/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll +++ b/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll @@ -1,12 +1,13 @@ ; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S | FileCheck %s -; CHECK-NOT: $_ZTV3Foo = comdat any +; CHECK: $_ZTV3Foo = comdat any $_ZTV3Foo = comdat any $_ZTI3Foo = comdat any -; CHECK: @_ZTV3Foo = external hidden unnamed_addr constant { [5 x ptr] }, align 8 -; CHECK: @_ZTI3Foo = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr @_ZTS3Foo, ptr @_ZTISt13runtime_error }, comdat, align 8 +;; ThinLTOBitcodeWriter will remove the vtable for Foo, and make it an external symbol +; CHECK: @_ZTV3Foo = linkonce_odr hidden unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI3Foo, ptr @_ZN3FooD2Ev, ptr @_ZN3FooD0Ev, ptr @_ZNKSt13runtime_error4whatEv] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5 +; CHECK-NOT: @foo = external unnamed_addr constant { [5 x ptr] }, align 8 ; CHECK: @llvm.embedded.object = private constant {{.*}}, section ".llvm.lto", align 1 ; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @llvm.embedded.object], section "llvm.metadata" @_ZTV3Foo = linkonce_odr hidden unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI3Foo, ptr @_ZN3FooD2Ev, ptr @_ZN3FooD0Ev, ptr @_ZNKSt13runtime_error4whatEv] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
https://github.com/qinkunbao converted_to_draft https://github.com/llvm/llvm-project/pull/142027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][EmbedBitcodePass] Prevent modifying the module with ThinLTO (PR #139999)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/13 >From 7373f7c53b287c9fcc2339bfe055d0eb60b974f4 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 14 May 2025 20:47:26 -0700 Subject: [PATCH] [llvm][EmbedBitcodePass] Prevent modifying the module with ThinLTO Since ThinLTOBitcodeWriterPass handles many things for CFI and WPD, like updating vtable linkage, we need to prevent those changes from persisting in the non-LTO object code we will compile under FatLTO. The only non-invasive way to do that is to clone the module when serializing the module in ThinLTOBitcodeWriterPass. We may be able to avoid cloning in the future with additional infrastructure to restore the IR to its original state. Fixes #139440 --- llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp | 6 +- llvm/test/Transforms/EmbedBitcode/embed-wpd.ll | 7 --- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp index 73f567734a91b..5e8b2a4e3d842 100644 --- a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp +++ b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -33,8 +34,11 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) { std::string Data; raw_string_ostream OS(Data); + // Clone the module with Thin LTO, since ThinLTOBitcodeWriterPass changes + // vtable linkage that would break the non-lto object code for FatLTO. if (IsThinLTO) -ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM); +ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr) +.run(*llvm::CloneModule(M), AM); else BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary) .run(M, AM); diff --git a/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll b/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll index f1f7712f54039..54931be42b4eb 100644 --- a/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll +++ b/llvm/test/Transforms/EmbedBitcode/embed-wpd.ll @@ -1,12 +1,13 @@ ; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes="embed-bitcode" -S | FileCheck %s -; CHECK-NOT: $_ZTV3Foo = comdat any +; CHECK: $_ZTV3Foo = comdat any $_ZTV3Foo = comdat any $_ZTI3Foo = comdat any -; CHECK: @_ZTV3Foo = external hidden unnamed_addr constant { [5 x ptr] }, align 8 -; CHECK: @_ZTI3Foo = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr @_ZTS3Foo, ptr @_ZTISt13runtime_error }, comdat, align 8 +;; ThinLTOBitcodeWriter will remove the vtable for Foo, and make it an external symbol +; CHECK: @_ZTV3Foo = linkonce_odr hidden unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI3Foo, ptr @_ZN3FooD2Ev, ptr @_ZN3FooD0Ev, ptr @_ZNKSt13runtime_error4whatEv] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5 +; CHECK-NOT: @foo = external unnamed_addr constant { [5 x ptr] }, align 8 ; CHECK: @llvm.embedded.object = private constant {{.*}}, section ".llvm.lto", align 1 ; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @llvm.embedded.object], section "llvm.metadata" @_ZTV3Foo = linkonce_odr hidden unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI3Foo, ptr @_ZN3FooD2Ev, ptr @_ZN3FooD0Ev, ptr @_ZNKSt13runtime_error4whatEv] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/141766 >From 2ef30aacee4d80c0e4a925aa5ba9416423d10b1b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 27 May 2025 07:55:04 -0500 Subject: [PATCH 1/5] [utils][TableGen] Handle versions on clause/directive spellings In "getDirectiveName(Kind, Version)", return the spelling that corresponds to Version, and in "getDirectiveKindAndVersions(Name)" return the pair {Kind, VersionRange}, where VersionRange contains the minimum and the maximum versions that allow "Name" as a spelling. This applies to clauses as well. In general it applies to classes that have spellings (defined via TableGen class "Spelling"). Given a Kind and a Version, getting the corresponding spelling requires a runtime search (which can fail in a general case). To avoid generating the search function inline, a small additional component of llvm/Frontent was added: LLVMFrontendDirective. The corresponding header file also defines C++ classes "Spelling" and "VersionRange", which are used in TableGen/DirectiveEmitter as well. For background information see https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 --- .../llvm/Frontend/Directive/Spelling.h| 39 + llvm/include/llvm/TableGen/DirectiveEmitter.h | 25 +-- llvm/lib/Frontend/CMakeLists.txt | 1 + llvm/lib/Frontend/Directive/CMakeLists.txt| 6 + llvm/lib/Frontend/Directive/Spelling.cpp | 31 llvm/lib/Frontend/OpenACC/CMakeLists.txt | 2 +- llvm/lib/Frontend/OpenMP/CMakeLists.txt | 1 + llvm/test/TableGen/directive1.td | 34 ++-- llvm/test/TableGen/directive2.td | 24 +-- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 146 +++--- 10 files changed, 212 insertions(+), 97 deletions(-) create mode 100644 llvm/include/llvm/Frontend/Directive/Spelling.h create mode 100644 llvm/lib/Frontend/Directive/CMakeLists.txt create mode 100644 llvm/lib/Frontend/Directive/Spelling.cpp diff --git a/llvm/include/llvm/Frontend/Directive/Spelling.h b/llvm/include/llvm/Frontend/Directive/Spelling.h new file mode 100644 index 0..3ba0ae2296535 --- /dev/null +++ b/llvm/include/llvm/Frontend/Directive/Spelling.h @@ -0,0 +1,39 @@ +//===-- Spelling.h C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef LLVM_FRONTEND_DIRECTIVE_SPELLING_H +#define LLVM_FRONTEND_DIRECTIVE_SPELLING_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" + +#include + +namespace llvm::directive { + +struct VersionRange { + static constexpr int MaxValue = std::numeric_limits::max(); + int Min = 1; + int Max = MaxValue; +}; + +inline bool operator<(const VersionRange &A, const VersionRange &B) { + if (A.Min != B.Min) +return A.Min < B.Min; + return A.Max < B.Max; +} + +struct Spelling { + StringRef Name; + VersionRange Versions; +}; + +StringRef FindName(llvm::iterator_range, unsigned Version); + +} // namespace llvm::directive + +#endif // LLVM_FRONTEND_DIRECTIVE_SPELLING_H diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index 1235b7638e761..c7d7460087723 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/Directive/Spelling.h" #include "llvm/Support/MathExtras.h" #include "llvm/TableGen/Record.h" #include @@ -113,29 +114,19 @@ class Versioned { constexpr static int IntWidth = 8 * sizeof(int); }; -// Range of specification versions: [Min, Max] -// Default value: all possible versions. -// This is the same structure as the one emitted into the generated sources. -#define STRUCT_VERSION_RANGE \ - struct VersionRange { \ -int Min = 1; \ -int Max = INT_MAX; \ - } - -STRUCT_VERSION_RANGE; - class Spelling : public Versioned { public: - using Value = std::pair; + using Value = llvm::directive::Spelling; Spelling(const Record *Def) : Def(Def) {} StringRef getText() const { return Def->getValueAsString("spelling"); } - VersionRange getVersions() const { -return VersionRange{getMinVersion(Def), getMaxVersion(Def)}; + llvm::directive::VersionRange getVersions() const { +return llvm::directive::VersionRange{getMinVersion(Def), +
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
https://github.com/tblah approved this pull request. Thanks for the update https://github.com/llvm/llvm-project/pull/141766 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Simplify `createWriteOrMaskedWrite` (NFC) (PR #141567)
https://github.com/Max191 approved this pull request. https://github.com/llvm/llvm-project/pull/141567 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Unify converting names to upper-camel case (PR #141762)
@@ -172,26 +197,13 @@ class Directive : public BaseRecord { // Clang uses a different format for names of its directives enum. std::string getClangAccSpelling() const { -std::string Name = Def->getValueAsString("name").str(); +StringRef Name = Def->getValueAsString("name"); // Clang calls the 'unknown' value 'invalid'. if (Name == "unknown") return "Invalid"; -// Clang entries all start with a capital letter, so apply that. -Name[0] = std::toupper(Name[0]); -// Additionally, spaces/underscores are handled by capitalizing the next -// letter of the name and removing the space/underscore. -for (unsigned I = 0; I < Name.size(); ++I) { - if (Name[I] == ' ' || Name[I] == '_') { -Name.erase(I, 1); -assert(Name[I] != ' ' && Name[I] != '_' && - "No double spaces/underscores"); -Name[I] = std::toupper(Name[I]); - } -} - -return Name; +return BaseRecord::getUpperCamelName(Name, " _"); mrkajetanp wrote: Ah I see, all good then :) https://github.com/llvm/llvm-project/pull/141762 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [NoSanitizeList][NFI] Add containsPrefix to remove duplicated logics. (PR #142027)
https://github.com/JustinStitt commented: LGTM https://github.com/llvm/llvm-project/pull/142027 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
@@ -77,6 +77,19 @@ static std::string getIdentifierName(const Record *Rec, StringRef Prefix) { return Prefix.str() + BaseRecord(Rec).getFormattedName(); } +using RecordWithSpelling = std::pair; + +static std::vector +getSpellings(ArrayRef Records) { kparzysz wrote: It actually works on both now: the `generateGetName` and `generateGetKind` functions use it. https://github.com/llvm/llvm-project/pull/141766 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Unify converting names to upper-camel case (PR #141762)
https://github.com/mrkajetanp approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/141762 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
@@ -1157,3 +1157,49 @@ TEST(RootSignature, ParseDescriptorTable) { ASSERT_EQ(Range.OffsetInDescriptorsFromTableStart, -1); } } + +TEST(RootSignature, ParseStaticSamplers) { + { +uint8_t Buffer[] = { +0x44, 0x58, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x90, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x52, 0x54, 0x53, 0x30, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, +0xa4, 0x70, 0x9d, 0x3f, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x85, 0xeb, 0x91, 0x40, 0x66, 0x66, 0x0e, 0x41, +0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00}; +DXContainer C = +llvm::cantFail(DXContainer::create(getMemoryBuffer<133>(Buffer))); + +auto MaybeRS = C.getRootSignature(); +ASSERT_TRUE(MaybeRS.has_value()); +const auto &RS = MaybeRS.value(); +ASSERT_EQ(RS.getVersion(), 2u); +ASSERT_EQ(RS.getNumParameters(), 0u); +ASSERT_EQ(RS.getRootParametersOffset(), 0u); +ASSERT_EQ(RS.getNumStaticSamplers(), 1u); +ASSERT_EQ(RS.getStaticSamplersOffset(), 24u); +ASSERT_EQ(RS.getFlags(), 17u); + +auto Sampler = *RS.samplers().begin(); + +ASSERT_EQ(Sampler.Filter, 10u); +ASSERT_EQ(Sampler.AddressU, 1u); +ASSERT_EQ(Sampler.AddressV, 2u); +ASSERT_EQ(Sampler.AddressW, 5u); +EXPECT_FLOAT_EQ(Sampler.MipLODBias, 1.23); +ASSERT_EQ(Sampler.MaxAnisotropy, 20u); +ASSERT_EQ(Sampler.ComparisonFunc, 4u); +ASSERT_EQ(Sampler.BorderColor, 0u); +EXPECT_FLOAT_EQ(Sampler.MinLOD, 4.56); joaosaffran wrote: `EXPECT_FLOAT_EQ` accounts for rounding errors, and make sure the floats are approximately equal instead of exactly equal, they consider an error within 4 Units in the Last Place (ULPs), here is the documentation about it https://google.github.io/googletest/reference/assertions.html#EXPECT_FLOAT_EQ https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/139963 >From bc246e6523f726f9a9a6c2563a58eec39888aadc Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Wed, 7 May 2025 00:13:27 + Subject: [PATCH 1/3] adding static samplers --- llvm/include/llvm/BinaryFormat/DXContainer.h | 51 + .../BinaryFormat/DXContainerConstants.def | 73 +++ .../llvm/MC/DXContainerRootSignature.h| 1 + llvm/include/llvm/Object/DXContainer.h| 5 ++ .../include/llvm/ObjectYAML/DXContainerYAML.h | 26 +++ llvm/lib/MC/DXContainerRootSignature.cpp | 20 - llvm/lib/Object/DXContainer.cpp | 4 + llvm/lib/ObjectYAML/DXContainerYAML.cpp | 37 ++ 8 files changed, 216 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 98e30b1b6a8af..c2b55c2c820aa 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -18,6 +18,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/TargetParser/Triple.h" +#include #include namespace llvm { @@ -600,6 +601,25 @@ inline bool isValidShaderVisibility(uint32_t V) { return false; } +#define STATIC_SAMPLER_FILTER(Val, Enum) Enum = Val, +enum class StaticSamplerFilter : uint32_t { +#include "DXContainerConstants.def" +}; + +#define TEXTURE_ADDRESS_MODE(Val, Enum) Enum = Val, +enum class TextureAddressMode : uint32_t { +#include "DXContainerConstants.def" +}; + +#define COMPARISON_FUNCTION(Val, Enum) Enum = Val, +enum class ComparisonFunction : uint32_t { +#include "DXContainerConstants.def" +}; + +#define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, +enum class StaticBorderColor : uint32_t { +#include "DXContainerConstants.def" +}; namespace v0 { struct RootSignatureHeader { @@ -667,6 +687,37 @@ struct DescriptorRange { sys::swapByteOrder(OffsetInDescriptorsFromTableStart); } }; + +struct StaticSampler { + StaticSamplerFilter Filter; + TextureAddressMode AddressU; + TextureAddressMode AddressV; + TextureAddressMode AddressW; + float MipLODBias; + uint32_t MaxAnisotropy; + ComparisonFunction ComparisonFunc; + StaticBorderColor BorderColor; + float MinLOD; + float MaxLOD; + uint32_t ShaderRegister; + uint32_t RegisterSpace; + ShaderVisibility ShaderVisibility; + void swapBytes() { +sys::swapByteOrder(Filter); +sys::swapByteOrder(AddressU); +sys::swapByteOrder(AddressV); +sys::swapByteOrder(AddressW); +sys::swapByteOrder(MipLODBias); +sys::swapByteOrder(MaxAnisotropy); +sys::swapByteOrder(ComparisonFunc); +sys::swapByteOrder(BorderColor); +sys::swapByteOrder(MinLOD); +sys::swapByteOrder(MaxLOD); +sys::swapByteOrder(ShaderRegister); +sys::swapByteOrder(RegisterSpace); +sys::swapByteOrder(ShaderVisibility); + }; +}; } // namespace v0 namespace v1 { diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 5fe7e7c321a33..f25c464d5f9cc 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -129,6 +129,79 @@ SHADER_VISIBILITY(7, Mesh) #undef SHADER_VISIBILITY #endif // SHADER_VISIBILITY +#ifdef STATIC_SAMPLER_FILTER + +STATIC_SAMPLER_FILTER(0, MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x1, MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x4, MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x5, MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x10, MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x11, MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x14, MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x15, MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x55, ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x80, COMPARISON_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x81, COMPARISON_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x84, COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x85, COMPARISON_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x90, COMPARISON_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x91, COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x94, COMPARISON_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x95, COMPARISON_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0xd5, COMPARISON_ANISOTROPIC) +STATIC_SAMPLER_FILTER(0x100, MINIMUM_MIN_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x101, MINIMUM_MIN_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x104, MINIMUM_MIN_POINT_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x105, MINIMUM_MIN_POINT_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x110, MINIMUM_MIN_LINEAR_MAG_MIP_POINT) +STATIC_SAMPLER_FILTER(0x111, MINIMUM_MIN_LINEAR_MAG_POINT_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x114, MINIMUM_MIN_MAG_LINEAR_MIP_POINT) +STATIC_SAMPLER_FILTER(0x115, MINIMUM_MIN_MAG_MIP_LINEAR) +STATIC_SAMPLER_FILTER(0x155, MINIMUM_A
[llvm-branch-commits] [llvm] [DirectX] Adding support for static samples is yaml2obj/obj2yaml (PR #139963)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/BinaryFormat/DXContainer.h llvm/include/llvm/MC/DXContainerRootSignature.h llvm/include/llvm/Object/DXContainer.h llvm/include/llvm/ObjectYAML/DXContainerYAML.h llvm/lib/MC/DXContainerRootSignature.cpp llvm/lib/Object/DXContainer.cpp llvm/lib/ObjectYAML/DXContainerEmitter.cpp llvm/lib/ObjectYAML/DXContainerYAML.cpp llvm/unittests/Object/DXContainerTest.cpp llvm/unittests/ObjectYAML/DXContainerYAMLTest.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/MC/DXContainerRootSignature.cpp b/llvm/lib/MC/DXContainerRootSignature.cpp index dd1a5416c..6c71823a5 100644 --- a/llvm/lib/MC/DXContainerRootSignature.cpp +++ b/llvm/lib/MC/DXContainerRootSignature.cpp @@ -71,7 +71,7 @@ void RootSignatureDesc::write(raw_ostream &OS) const { BOS.reserveExtraSpace(getSize()); const uint32_t NumParameters = ParametersContainer.size(); - + support::endian::write(BOS, Version, llvm::endianness::little); support::endian::write(BOS, NumParameters, llvm::endianness::little); support::endian::write(BOS, RootParameterOffset, llvm::endianness::little); `` https://github.com/llvm/llvm-project/pull/139963 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/141945.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+3-25) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+17) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+7) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5f6ab24182d5e..b0774385547c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, return maxnum(Src0, Src1); } -enum class KnownIEEEMode { Unknown, On, Off }; - -/// Return KnownIEEEMode::On if we know if the use context can assume -/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume -/// "amdgpu-ieee"="false". -static KnownIEEEMode fpenvIEEEMode(const Instruction &I, - const GCNSubtarget &ST) { - if (!ST.hasIEEEMode()) // Only mode on gfx12 -return KnownIEEEMode::On; - - const Function *F = I.getFunction(); - if (!F) -return KnownIEEEMode::Unknown; - - Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); - if (IEEEAttr.isValid()) -return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; - - return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off - : KnownIEEEMode::On; -} - // Check if a value can be converted to a 16-bit value without losing // precision. // The value is expected to be either a float (IsFloat = true) or an unsigned @@ -1003,7 +981,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // TODO: Also can fold to 2 operands with infinities. if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) || isa(Src0)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc0 && ConstSrc0->isSignaling()) @@ -1018,7 +996,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) || isa(Src1)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc1 && ConstSrc1->isSignaling()) @@ -1034,7 +1012,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) || isa(Src2)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: if (ConstSrc2 && ConstSrc2->isSignaling()) { auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index c1ccc8f6798a6..563c46f57dfa5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first}); LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } + +GCNTTIImpl::KnownIEEEMode +GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const { + if (!ST->hasIEEEMode()) // Only mode on gfx12 +return KnownIEEEMode::On; + + const Function *F = I.getFunction(); + if (!F) +return KnownIEEEMode::Unknown; + + Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); + if (IEEEAttr.isValid()) +return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; + + return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off + : KnownIEEEMode::On; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index ec298c7e9631a..0fae301abf532 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -281,6 +281,13 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + + enum class KnownIEEEMode { Unknown, On, Off }; + + /// Return KnownIEEEMode::On if we know if the use context can assume + /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume + /// "amdgpu-ieee"="false". + KnownIEEEMode fpenvIEEEMode(const Instruction &I) const; }; } // end namespace llvm `
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/141945.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+3-25) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+17) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+7) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5f6ab24182d5e..b0774385547c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, return maxnum(Src0, Src1); } -enum class KnownIEEEMode { Unknown, On, Off }; - -/// Return KnownIEEEMode::On if we know if the use context can assume -/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume -/// "amdgpu-ieee"="false". -static KnownIEEEMode fpenvIEEEMode(const Instruction &I, - const GCNSubtarget &ST) { - if (!ST.hasIEEEMode()) // Only mode on gfx12 -return KnownIEEEMode::On; - - const Function *F = I.getFunction(); - if (!F) -return KnownIEEEMode::Unknown; - - Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); - if (IEEEAttr.isValid()) -return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; - - return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off - : KnownIEEEMode::On; -} - // Check if a value can be converted to a 16-bit value without losing // precision. // The value is expected to be either a float (IsFloat = true) or an unsigned @@ -1003,7 +981,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // TODO: Also can fold to 2 operands with infinities. if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) || isa(Src0)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc0 && ConstSrc0->isSignaling()) @@ -1018,7 +996,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) || isa(Src1)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: // TODO: If Src2 is snan, does it need quieting? if (ConstSrc1 && ConstSrc1->isSignaling()) @@ -1034,7 +1012,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) || isa(Src2)) { - switch (fpenvIEEEMode(II, *ST)) { + switch (fpenvIEEEMode(II)) { case KnownIEEEMode::On: if (ConstSrc2 && ConstSrc2->isSignaling()) { auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index c1ccc8f6798a6..563c46f57dfa5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first}); LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } + +GCNTTIImpl::KnownIEEEMode +GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const { + if (!ST->hasIEEEMode()) // Only mode on gfx12 +return KnownIEEEMode::On; + + const Function *F = I.getFunction(); + if (!F) +return KnownIEEEMode::Unknown; + + Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee"); + if (IEEEAttr.isValid()) +return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off; + + return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off + : KnownIEEEMode::On; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index ec298c7e9631a..0fae301abf532 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -281,6 +281,13 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + + enum class KnownIEEEMode { Unknown, On, Off }; + + /// Return KnownIEEEMode::On if we know if the use context can assume + /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume + /// "amdgpu-ieee"="false". + KnownIEEEMode fpenvIEEEMode(const Instruction &I) const; }; } // end namespace llvm
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141944 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes We should only divide the number of pieces to fit the packed instructions if we actually have pk instructions. This increases the cost of copysign, but is closer to the current codegen output. It could be much cheaper than it is now. --- Patch is 137.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141943.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+1-1) - (modified) llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll (+12-12) - (modified) llvm/test/Analysis/CostModel/AMDGPU/copysign.ll (+14-14) - (modified) llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll (+244-136) - (modified) llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll (+244-136) - (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll (+7-5) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 86a6e49fce027..0dbaf7c548f89 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -721,7 +721,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || + if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll index e162edbf611e2..7ac4db3119210 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll @@ -22,12 +22,12 @@ define void @canonicalize_f16() { ; ; GFX8-LABEL: 'canonicalize_f16' ; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef) ; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-LABEL: 'canonicalize_f16' @@ -62,12 +62,12 @@ define void @canonicalize_f16() { ; ; GFX8-SIZE-LABEL: 'canonicalize_f16' ; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 48 f
[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes We should only divide the number of pieces to fit the packed instructions if we actually have pk instructions. This increases the cost of copysign, but is closer to the current codegen output. It could be much cheaper than it is now. --- Patch is 137.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141943.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+1-1) - (modified) llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll (+12-12) - (modified) llvm/test/Analysis/CostModel/AMDGPU/copysign.ll (+14-14) - (modified) llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll (+244-136) - (modified) llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll (+244-136) - (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll (+7-5) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 86a6e49fce027..0dbaf7c548f89 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -721,7 +721,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || + if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll index e162edbf611e2..7ac4db3119210 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll @@ -22,12 +22,12 @@ define void @canonicalize_f16() { ; ; GFX8-LABEL: 'canonicalize_f16' ; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) -; GFX8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) +; GFX8-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef) ; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-LABEL: 'canonicalize_f16' @@ -62,12 +62,12 @@ define void @canonicalize_f16() { ; ; GFX8-SIZE-LABEL: 'canonicalize_f16' ; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef) -; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 48 fo
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes The real implementation is 1 real instruction plus a constant materialize. Call that a 1, it's not a real f64 operation. --- Full diff: https://github.com/llvm/llvm-project/pull/141944.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+7-5) - (modified) llvm/test/Analysis/CostModel/AMDGPU/copysign.ll (+16-16) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0dbaf7c548f89..c1ccc8f6798a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; - if (SLT == MVT::f64) -return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, switch (ICA.getID()) { case Intrinsic::fma: case Intrinsic::fmuladd: +if (SLT == MVT::f64) { + InstRate = get64BitInstrCost(CostKind); + break; +} + if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16) InstRate = getFullRateInstrCost(); else { @@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::copysign: return NElts * getFullRateInstrCost(); case Intrinsic::canonicalize: { -assert(SLT != MVT::f64); -InstRate = getFullRateInstrCost(); +InstRate = +SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); break; } case Intrinsic::uadd_sat: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll index 334bb341a3c3e..5b042a8a04603 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll @@ -245,25 +245,25 @@ define void @copysign_bf16() { define void @copysign_f64() { ; ALL-LABEL: 'copysign_f64' -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef) -; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef) ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: '
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Handle versions on clause/directive spellings (PR #141766)
@@ -77,6 +77,19 @@ static std::string getIdentifierName(const Record *Rec, StringRef Prefix) { return Prefix.str() + BaseRecord(Rec).getFormattedName(); } +using RecordWithSpelling = std::pair; + +static std::vector +getSpellings(ArrayRef Records) { kparzysz wrote: No. I see what you mean. https://github.com/llvm/llvm-project/pull/141766 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/141948.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+22-1) - (modified) llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll (+28-28) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 357f8c5cfcd02..f9dbfb18ab7ee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { InstructionCost GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { - if (ICA.getID() == Intrinsic::fabs) + switch (ICA.getID()) { + case Intrinsic::fabs: +// Free source modifier in the common case. +return 0; + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: +// TODO: If hasPackedTID, or if the calling context is not an entry point +// there may be a bit instruction. +return 0; + case Intrinsic::amdgcn_workgroup_id_x: + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::amdgcn_lds_kernel_id: + case Intrinsic::amdgcn_dispatch_ptr: + case Intrinsic::amdgcn_dispatch_id: + case Intrinsic::amdgcn_implicitarg_ptr: + case Intrinsic::amdgcn_queue_ptr: +// Read from an argument register. return 0; + default: +break; + } if (!intrinsicHasPackedVectorBenefit(ICA.getID())) return BaseT::getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll index ea045e04310be..00dbcff0a021f 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll @@ -7,11 +7,11 @@ define i32 @workitem_id_x() { ; ALL-LABEL: 'workitem_id_x' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result ; ; SIZE-LABEL: 'workitem_id_x' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result ; %result = call i32 @llvm.amdgcn.workitem.id.x() @@ -20,12 +20,12 @@ define i32 @workitem_id_x() { define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { ; ALL-LABEL: 'kernel_workitem_id_x' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'kernel_workitem_id_x' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -36,11 +36,11 @@ define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { define i32 @workitem_id_y() { ; ALL-LABEL: 'workitem_id_y' -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result ; ; SIZE-LABEL: 'workitem_id_y' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result ; %result = call i32
[llvm-branch-commits] [SPARC][IAS] Add definitions for cryptographic instructions (PR #139451)
koachan wrote: Ping? https://github.com/llvm/llvm-project/pull/139451 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Unify converting names to upper-camel case (PR #141762)
@@ -172,26 +197,13 @@ class Directive : public BaseRecord { // Clang uses a different format for names of its directives enum. std::string getClangAccSpelling() const { -std::string Name = Def->getValueAsString("name").str(); +StringRef Name = Def->getValueAsString("name"); // Clang calls the 'unknown' value 'invalid'. if (Name == "unknown") return "Invalid"; -// Clang entries all start with a capital letter, so apply that. -Name[0] = std::toupper(Name[0]); -// Additionally, spaces/underscores are handled by capitalizing the next -// letter of the name and removing the space/underscore. -for (unsigned I = 0; I < Name.size(); ++I) { - if (Name[I] == ' ' || Name[I] == '_') { -Name.erase(I, 1); -assert(Name[I] != ' ' && Name[I] != '_' && - "No double spaces/underscores"); -Name[I] = std::toupper(Name[I]); - } -} - -return Name; +return BaseRecord::getUpperCamelName(Name, " _"); kparzysz wrote: Yes, the previous code removed both a space and an underscore. https://github.com/llvm/llvm-project/pull/141762 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle other fmin flavors in fract combine (PR #141987)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Since the input is either known not-nan, or we have explicit use code checking if the input is a nan, any of the 3 is valid to match. --- Patch is 42.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141987.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+14-5) - (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+216-273) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 52177a2523bcb..a3f668e6d65ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -327,7 +327,7 @@ class AMDGPUCodeGenPrepareImpl bool visitIntrinsicInst(IntrinsicInst &I); bool visitBitreverseIntrinsicInst(IntrinsicInst &I); - bool visitMinNum(IntrinsicInst &I); + bool visitFMinLike(IntrinsicInst &I); bool visitSqrt(IntrinsicInst &I); bool run(); }; @@ -2197,7 +2197,9 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { case Intrinsic::bitreverse: return visitBitreverseIntrinsicInst(I); case Intrinsic::minnum: -return visitMinNum(I); + case Intrinsic::minimumnum: + case Intrinsic::minimum: +return visitFMinLike(I); case Intrinsic::sqrt: return visitSqrt(I); default: @@ -2216,7 +2218,9 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { } /// Match non-nan fract pattern. -/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0) +/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) +/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) +/// minimum(fsub(x, floor(x)), nextafter(1.0, -1.0)) /// /// If fract is a useful instruction for the subtarget. Does not account for the /// nan handling; the instruction has a nan check on the input value. @@ -2224,7 +2228,12 @@ Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) { if (ST.hasFractBug()) return nullptr; - if (I.getIntrinsicID() != Intrinsic::minnum) + Intrinsic::ID IID = I.getIntrinsicID(); + + // The value is only used in contexts where we know the input isn't a nan, so + // any of the fmin variants are fine. + if (IID != Intrinsic::minnum && + IID != Intrinsic::minimumnum & IID != Intrinsic::minimum) return nullptr; Type *Ty = I.getType(); @@ -2270,7 +2279,7 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder, return insertValues(Builder, FractArg->getType(), ResultVals); } -bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) { +bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) { Value *FractArg = matchFractPat(I); if (!FractArg) return false; diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 9d98a8dab0501..4ee48716439bd 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -2996,19 +2996,30 @@ entry: } define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly captures(none) %ip) { -; IR-LABEL: define float @safe_math_fract_f32_minimum( -; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { -; IR-NEXT: [[ENTRY:.*:]] -; IR-NEXT:[[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) -; IR-NEXT:[[SUB:%.*]] = fsub float [[X]], [[FLOOR]] -; IR-NEXT:[[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFE000) -; IR-NEXT:[[UNO:%.*]] = fcmp uno float [[X]], 0.00e+00 -; IR-NEXT:[[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] -; IR-NEXT:[[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) -; IR-NEXT:[[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0 -; IR-NEXT:[[COND6:%.*]] = select i1 [[CMPINF]], float 0.00e+00, float [[COND]] -; IR-NEXT:store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 -; IR-NEXT:ret float [[COND6]] +; GFX6-IR-LABEL: define float @safe_math_fract_f32_minimum( +; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; GFX6-IR-NEXT: [[ENTRY:.*:]] +; GFX6-IR-NEXT:[[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; GFX6-IR-NEXT:[[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; GFX6-IR-NEXT:[[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFE000) +; GFX6-IR-NEXT:[[UNO:%.*]] = fcmp uno float [[X]], 0.00e+00 +; GFX6-IR-NEXT:[[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] +; GFX6-IR-NEXT:[[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; GFX6-IR-NEXT:[[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0 +; GFX6-IR-NEXT:[[COND6:%.*]] = select i1 [[CMPINF]], float 0.00e+00, float [[COND]] +; GFX6-IR-NEXT:
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for fract combine with other fmin types (PR #141986)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 38.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141986.diff 1 Files Affected: - (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+872) ``diff diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 5766bc0ae2898..9d98a8dab0501 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -2995,6 +2995,878 @@ entry: ret <2 x double> %cond6 } +define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly captures(none) %ip) { +; IR-LABEL: define float @safe_math_fract_f32_minimum( +; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; IR-NEXT: [[ENTRY:.*:]] +; IR-NEXT:[[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; IR-NEXT:[[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; IR-NEXT:[[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFE000) +; IR-NEXT:[[UNO:%.*]] = fcmp uno float [[X]], 0.00e+00 +; IR-NEXT:[[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] +; IR-NEXT:[[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; IR-NEXT:[[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0 +; IR-NEXT:[[COND6:%.*]] = select i1 [[CMPINF]], float 0.00e+00, float [[COND]] +; IR-NEXT:store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; IR-NEXT:ret float [[COND6]] +; +; GFX6-LABEL: safe_math_fract_f32_minimum: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT:v_floor_f32_e32 v3, v0 +; GFX6-NEXT:v_sub_f32_e32 v4, v0, v3 +; GFX6-NEXT:v_min_f32_e32 v5, 0x3f7f, v4 +; GFX6-NEXT:v_mov_b32_e32 v6, 0x7fc0 +; GFX6-NEXT:v_cmp_o_f32_e32 vcc, v4, v4 +; GFX6-NEXT:v_cndmask_b32_e32 v4, v6, v5, vcc +; GFX6-NEXT:v_cmp_u_f32_e32 vcc, v0, v0 +; GFX6-NEXT:s_mov_b32 s8, 0x7f80 +; GFX6-NEXT:s_mov_b32 s6, 0 +; GFX6-NEXT:v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX6-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX6-NEXT:s_mov_b32 s7, 0xf000 +; GFX6-NEXT:s_mov_b32 s4, s6 +; GFX6-NEXT:s_mov_b32 s5, s6 +; GFX6-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-NEXT:buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 +; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT:s_setpc_b64 s[30:31] +; +; GFX7-LABEL: safe_math_fract_f32_minimum: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:v_floor_f32_e32 v3, v0 +; GFX7-NEXT:v_sub_f32_e32 v4, v0, v3 +; GFX7-NEXT:v_min_f32_e32 v5, 0x3f7f, v4 +; GFX7-NEXT:v_mov_b32_e32 v6, 0x7fc0 +; GFX7-NEXT:v_cmp_o_f32_e32 vcc, v4, v4 +; GFX7-NEXT:v_cndmask_b32_e32 v4, v6, v5, vcc +; GFX7-NEXT:v_cmp_u_f32_e32 vcc, v0, v0 +; GFX7-NEXT:s_mov_b32 s8, 0x7f80 +; GFX7-NEXT:s_mov_b32 s6, 0 +; GFX7-NEXT:v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX7-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT:s_mov_b32 s7, 0xf000 +; GFX7-NEXT:s_mov_b32 s4, s6 +; GFX7-NEXT:s_mov_b32 s5, s6 +; GFX7-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT:buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT:s_waitcnt vmcnt(0) +; GFX7-NEXT:s_setpc_b64 s[30:31] +; +; GFX8-LABEL: safe_math_fract_f32_minimum: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:v_floor_f32_e32 v3, v0 +; GFX8-NEXT:v_sub_f32_e32 v4, v0, v3 +; GFX8-NEXT:v_min_f32_e32 v5, 0x3f7f, v4 +; GFX8-NEXT:v_mov_b32_e32 v6, 0x7fc0 +; GFX8-NEXT:v_cmp_o_f32_e32 vcc, v4, v4 +; GFX8-NEXT:v_cndmask_b32_e32 v4, v6, v5, vcc +; GFX8-NEXT:v_cmp_u_f32_e32 vcc, v0, v0 +; GFX8-NEXT:s_mov_b32 s4, 0x7f80 +; GFX8-NEXT:v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX8-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT:global_store_dword v[1:2], v3, off +; GFX8-NEXT:s_waitcnt vmcnt(0) +; GFX8-NEXT:s_setpc_b64 s[30:31] +; +; GFX11-LABEL: safe_math_fract_f32_minimum: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:v_floor_f32_e32 v3, v0 +; GFX11-NEXT:s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX11-NEXT:v_sub_f32_e32 v4, v0, v3 +; GFX11-NEXT:global_store_b32 v[1:2], v3, off +; GFX11-NEXT:v_min_f32_e32 v5, 0x3f7f, v4 +; GFX11-NEXT:v_cmp_o_f32_e32 vcc_lo, v4, v4 +; GFX11-NEXT:v_cndmask_b32_e32 v4, 0x7fc0, v5, vcc_lo +; GFX11-NEXT:v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-NEXT:s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT:v_cndmask_b32_e32 v4, v4, v0, vcc_lo +; GFX11-NEXT
[llvm-branch-commits] [llvm] AMDGPU: Handle other fmin flavors in fract combine (PR #141987)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141987 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add missing fract test (PR #141985)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141985 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for fract combine with other fmin types (PR #141986)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141986 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add missing fract test (PR #141985)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This was missing the case where the fcmp condition and select were inverted. --- Full diff: https://github.com/llvm/llvm-project/pull/141985.diff 1 Files Affected: - (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+111) ``diff diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 4307b79bf2c3b..5766bc0ae2898 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -134,6 +134,117 @@ entry: ret float %cond6 } +define float @safe_math_fract_f32_swap(float %x, ptr addrspace(1) writeonly captures(none) %ip) { +; GFX6-IR-LABEL: define float @safe_math_fract_f32_swap( +; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; GFX6-IR-NEXT: [[ENTRY:.*:]] +; GFX6-IR-NEXT:[[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; GFX6-IR-NEXT:[[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; GFX6-IR-NEXT:[[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFE000) +; GFX6-IR-NEXT:[[UNO:%.*]] = fcmp ord float [[X]], 0.00e+00 +; GFX6-IR-NEXT:[[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]] +; GFX6-IR-NEXT:[[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; GFX6-IR-NEXT:[[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0 +; GFX6-IR-NEXT:[[COND6:%.*]] = select i1 [[CMPINF]], float 0.00e+00, float [[COND]] +; GFX6-IR-NEXT:store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; GFX6-IR-NEXT:ret float [[COND6]] +; +; IR-FRACT-LABEL: define float @safe_math_fract_f32_swap( +; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; IR-FRACT-NEXT: [[ENTRY:.*:]] +; IR-FRACT-NEXT:[[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; IR-FRACT-NEXT:[[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) +; IR-FRACT-NEXT:[[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; IR-FRACT-NEXT:[[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0 +; IR-FRACT-NEXT:[[COND6:%.*]] = select i1 [[CMPINF]], float 0.00e+00, float [[COND]] +; IR-FRACT-NEXT:store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; IR-FRACT-NEXT:ret float [[COND6]] +; +; GFX6-LABEL: safe_math_fract_f32_swap: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT:v_floor_f32_e32 v3, v0 +; GFX6-NEXT:v_sub_f32_e32 v4, v0, v3 +; GFX6-NEXT:v_min_f32_e32 v4, 0x3f7f, v4 +; GFX6-NEXT:v_cmp_o_f32_e32 vcc, v0, v0 +; GFX6-NEXT:s_mov_b32 s8, 0x7f80 +; GFX6-NEXT:s_mov_b32 s6, 0 +; GFX6-NEXT:v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX6-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX6-NEXT:s_mov_b32 s7, 0xf000 +; GFX6-NEXT:s_mov_b32 s4, s6 +; GFX6-NEXT:s_mov_b32 s5, s6 +; GFX6-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-NEXT:buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 +; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT:s_setpc_b64 s[30:31] +; +; GFX7-LABEL: safe_math_fract_f32_swap: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:s_mov_b32 s8, 0x7f80 +; GFX7-NEXT:s_mov_b32 s6, 0 +; GFX7-NEXT:v_fract_f32_e32 v4, v0 +; GFX7-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT:s_mov_b32 s7, 0xf000 +; GFX7-NEXT:s_mov_b32 s4, s6 +; GFX7-NEXT:s_mov_b32 s5, s6 +; GFX7-NEXT:v_floor_f32_e32 v3, v0 +; GFX7-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT:buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT:s_waitcnt vmcnt(0) +; GFX7-NEXT:s_setpc_b64 s[30:31] +; +; GFX8-LABEL: safe_math_fract_f32_swap: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:s_mov_b32 s4, 0x7f80 +; GFX8-NEXT:v_fract_f32_e32 v4, v0 +; GFX8-NEXT:v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT:v_floor_f32_e32 v3, v0 +; GFX8-NEXT:v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT:global_store_dword v[1:2], v3, off +; GFX8-NEXT:s_waitcnt vmcnt(0) +; GFX8-NEXT:s_setpc_b64 s[30:31] +; +; GFX11-LABEL: safe_math_fract_f32_swap: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:v_fract_f32_e32 v3, v0 +; GFX11-NEXT:v_cmp_neq_f32_e64 vcc_lo, 0x7f80, |v0| +; GFX11-NEXT:v_floor_f32_e32 v4, v0 +; GFX11-NEXT:s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT:v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX11-NEXT:global_store_b32 v[1:2], v4, off +; GFX11-NEXT:s_setpc_b64 s[30:31] +; +; GFX12-LABEL: safe_math_fract_f32_swap: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT:s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:s_wait_expcnt 0x0 +; GFX12-NEXT:s_wait_sa
[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 183.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141946.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+19) - (modified) llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll (+318-264) - (modified) llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll (+318-264) - (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll (+40) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 563c46f57dfa5..357f8c5cfcd02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -685,6 +685,8 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::fma: case Intrinsic::fmuladd: case Intrinsic::copysign: + case Intrinsic::minimumnum: + case Intrinsic::maximumnum: case Intrinsic::canonicalize: // There's a small benefit to using vector ops in the legalized code. case Intrinsic::round: @@ -742,6 +744,23 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, break; case Intrinsic::copysign: return NElts * getFullRateInstrCost(); + case Intrinsic::minimumnum: + case Intrinsic::maximumnum: { +// Instruction + 2 canonicalizes. For cases that need type promotion, we the +// promotion takes the place of the canonicalize. +unsigned NumOps = 3; +if (const IntrinsicInst *II = ICA.getInst()) { + // Directly legal with ieee=0 + // TODO: Not directly legal with strictfp + if (fpenvIEEEMode(*II) == KnownIEEEMode::Off) +NumOps = 1; +} + +unsigned BaseRate = +SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); +InstRate = BaseRate * NumOps; +break; + } case Intrinsic::canonicalize: { InstRate = SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); diff --git a/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll b/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll index 5b158e3d8d674..a81cb63f0c51f 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll @@ -11,75 +11,75 @@ define void @maximumnum_f16() { ; GFX7-LABEL: 'maximumnum_f16' -; GFX7-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.maximumnum.f16(half poison, half poison) -; GFX7-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) -; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> poison, <3 x half> poison) -; GFX7-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) -; GFX7-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) -; GFX7-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16 = call half @llvm.maximumnum.f16(half poison, half poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f16 = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> poison, <3 x half> poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; GFX7-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) ; GFX7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX8-LABEL: 'maximumnum_f16' -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.maximumnum.f16(half poison, half poison) -; GFX8-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f16 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) -; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximumnum.v3f16(<3 x half> poison, <3 x half> poison) -; GFX8-NEXT: Cost Model: Found an estimated
[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/141947.diff 1 Files Affected: - (added) llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll (+202) ``diff diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll new file mode 100644 index 0..ea045e04310be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,UNPACKEDID %s +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=ALL,PACKEDID %s + +; RUN: opt -passes='print' -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SIZE,SIZE-UNPACKEDID %s +; RUN: opt -passes='print' -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=SIZE,SIZE-PACKEDID %s + +define i32 @workitem_id_x() { +; ALL-LABEL: 'workitem_id_x' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_x' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result +; + %result = call i32 @llvm.amdgcn.workitem.id.x() + ret i32 %result +} + +define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) { +; ALL-LABEL: 'kernel_workitem_id_x' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SIZE-LABEL: 'kernel_workitem_id_x' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.x() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %result = call i32 @llvm.amdgcn.workitem.id.x() + store i32 %result, ptr addrspace(1) %ptr + ret void +} + +define i32 @workitem_id_y() { +; ALL-LABEL: 'workitem_id_y' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_y' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result +; + %result = call i32 @llvm.amdgcn.workitem.id.y() + ret i32 %result +} + +define amdgpu_kernel void @kernel_workitem_id_y(ptr addrspace(1) %ptr) { +; ALL-LABEL: 'kernel_workitem_id_y' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SIZE-LABEL: 'kernel_workitem_id_y' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %result, ptr addrspace(1) %ptr, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %result = call i32 @llvm.amdgcn.workitem.id.y() + store i32 %result, ptr addrspace(1) %ptr + ret void +} + +define i32 @workitem_id_z() { +; ALL-LABEL: 'workitem_id_z' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 %result +; +; SIZE-LABEL: 'workitem_id_z' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %result = call i32 @llvm.amdgcn.workitem.id.y() +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %result +; + %result = call i32 @llvm.amdgcn.workitem.id.y() + ret i32 %result +} + +define amdgpu_kernel void @kernel_workit
[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/141946 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Serialization: support hashing null template arguments (PR #141957)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Matheus Izvekov (mizvekov) Changes When performing overload resolution during code completion, clang will allow incomplete substitutions in more places than would be allowed for valid code, because for completion to work well, it needs clang to keep going so it can explore the space of possibilities. Notably, we accept instantiating declarations will null template arguments, and this works fine, except that when lazily loading serialzied templated declarations, the template argument hasher assumes null arguments can't be used. This patch makes the hasher happily accept that. Fixes https://github.com/llvm/llvm-project/issues/139019 --- Full diff: https://github.com/llvm/llvm-project/pull/141957.diff 3 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+1) - (modified) clang/lib/Serialization/TemplateArgumentHasher.cpp (+3-1) - (added) clang/test/CodeCompletion/GH139019.cpp (+26) ``diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7b84210fddab3..262bf4e3d4f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1118,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template with a dependent underlying type is subject to integral promotion. (#GH117960) +- Fix code completion crash involving PCH serialzied templates. (#GH139019) OpenACC Specific Changes diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index 598f098f526d0..5fd6941256fe2 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) { switch (Kind) { case TemplateArgument::Null: -llvm_unreachable("Expected valid TemplateArgument"); +// These can occur in incomplete substitutions performed with code +// completion (see PartialOverloading). +break; case TemplateArgument::Type: AddQualType(TA.getAsType()); break; diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp new file mode 100644 index 0..fed35b38362a1 --- /dev/null +++ b/clang/test/CodeCompletion/GH139019.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch +// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17 + +//--- test.hpp +#pragma once +class provider_t +{ + public: +template +void emit(T *data) +{} +}; + +//--- test.cpp +#include "test.hpp" + +void test() +{ +provider_t *focus; +void *data; +focus->emit(&data); +} `` https://github.com/llvm/llvm-project/pull/141957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Serialization: support hashing null template arguments (PR #141957)
https://github.com/mizvekov created https://github.com/llvm/llvm-project/pull/141957 When performing overload resolution during code completion, clang will allow incomplete substitutions in more places than would be allowed for valid code, because for completion to work well, it needs clang to keep going so it can explore the space of possibilities. Notably, we accept instantiating declarations will null template arguments, and this works fine, except that when lazily loading serialzied templated declarations, the template argument hasher assumes null arguments can't be used. This patch makes the hasher happily accept that. Fixes https://github.com/llvm/llvm-project/issues/139019 >From 7759bb57c24390797ee34fa58a5e1234f5aa9369 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Thu, 29 May 2025 00:28:21 -0300 Subject: [PATCH] [clang] Serialization: support hashing null template arguments When performing overload resolution during code completion, clang will allow incomplete substitutions in more places than would be allowed for valid code, because for completion to work well, it needs clang to keep going so it can explore the space of possibilities. Notably, we accept instantiating declarations will null template arguments, and this works fine, except that when lazily loading serialzied templated declarations, the template argument hasher assumes null arguments can't be used. This patch makes the hasher happily accept that. Fixes https://github.com/llvm/llvm-project/issues/139019 --- clang/docs/ReleaseNotes.rst | 1 + .../Serialization/TemplateArgumentHasher.cpp | 4 ++- clang/test/CodeCompletion/GH139019.cpp| 26 +++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeCompletion/GH139019.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7b84210fddab3..262bf4e3d4f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1118,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template with a dependent underlying type is subject to integral promotion. (#GH117960) +- Fix code completion crash involving PCH serialzied templates. (#GH139019) OpenACC Specific Changes diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index 598f098f526d0..5fd6941256fe2 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) { switch (Kind) { case TemplateArgument::Null: -llvm_unreachable("Expected valid TemplateArgument"); +// These can occur in incomplete substitutions performed with code +// completion (see PartialOverloading). +break; case TemplateArgument::Type: AddQualType(TA.getAsType()); break; diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp new file mode 100644 index 0..fed35b38362a1 --- /dev/null +++ b/clang/test/CodeCompletion/GH139019.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch +// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17 + +//--- test.hpp +#pragma once +class provider_t +{ + public: +template +void emit(T *data) +{} +}; + +//--- test.cpp +#include "test.hpp" + +void test() +{ +provider_t *focus; +void *data; +focus->emit(&data); +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Backport: [clang] Serialization: support hashing null template arguments (PR #141957)
llvmbot wrote: @llvm/pr-subscribers-clang-modules Author: Matheus Izvekov (mizvekov) Changes When performing overload resolution during code completion, clang will allow incomplete substitutions in more places than would be allowed for valid code, because for completion to work well, it needs clang to keep going so it can explore the space of possibilities. Notably, we accept instantiating declarations will null template arguments, and this works fine, except that when lazily loading serialzied templated declarations, the template argument hasher assumes null arguments can't be used. This patch makes the hasher happily accept that. Fixes https://github.com/llvm/llvm-project/issues/139019 --- Full diff: https://github.com/llvm/llvm-project/pull/141957.diff 3 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+1) - (modified) clang/lib/Serialization/TemplateArgumentHasher.cpp (+3-1) - (added) clang/test/CodeCompletion/GH139019.cpp (+26) ``diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7b84210fddab3..262bf4e3d4f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1118,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template with a dependent underlying type is subject to integral promotion. (#GH117960) +- Fix code completion crash involving PCH serialzied templates. (#GH139019) OpenACC Specific Changes diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index 598f098f526d0..5fd6941256fe2 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) { switch (Kind) { case TemplateArgument::Null: -llvm_unreachable("Expected valid TemplateArgument"); +// These can occur in incomplete substitutions performed with code +// completion (see PartialOverloading). +break; case TemplateArgument::Type: AddQualType(TA.getAsType()); break; diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp new file mode 100644 index 0..fed35b38362a1 --- /dev/null +++ b/clang/test/CodeCompletion/GH139019.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch +// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17 + +//--- test.hpp +#pragma once +class provider_t +{ + public: +template +void emit(T *data) +{} +}; + +//--- test.cpp +#include "test.hpp" + +void test() +{ +provider_t *focus; +void *data; +focus->emit(&data); +} `` https://github.com/llvm/llvm-project/pull/141957 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
arsenm wrote: ### Merge activity * **May 29, 5:28 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/140587). https://github.com/llvm/llvm-project/pull/140587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for #139317 (PR #140607)
arsenm wrote: ### Merge activity * **May 29, 5:28 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/140607). https://github.com/llvm/llvm-project/pull/140607 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][EmbedBitcodePass] Prevent modifying the module with ThinLTO (PR #139999)
ilovepi wrote: ### Merge activity * **May 29, 5:24 PM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/13). https://github.com/llvm/llvm-project/pull/13 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] fb79103 - Revert "[clang][Sema] Declare builtins used in #pragma intrinsic (#138205)"
Author: Nick Sarnie Date: 2025-05-29T17:33:04Z New Revision: fb79103e3dca8a2a0f5732aefbc9cf71b967d357 URL: https://github.com/llvm/llvm-project/commit/fb79103e3dca8a2a0f5732aefbc9cf71b967d357 DIFF: https://github.com/llvm/llvm-project/commit/fb79103e3dca8a2a0f5732aefbc9cf71b967d357.diff LOG: Revert "[clang][Sema] Declare builtins used in #pragma intrinsic (#138205)" This reverts commit 95bd9eef42679f3d13e3279204c75372e2c062f0. Added: Modified: clang/lib/Parse/ParsePragma.cpp Removed: clang/test/Sema/Inputs/builtin-system-header.h clang/test/Sema/builtin-pragma-intrinsic.c diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 4e67fd033b9aa..77b61af768993 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -301,13 +301,9 @@ struct PragmaMSRuntimeChecksHandler : public EmptyPragmaHandler { }; struct PragmaMSIntrinsicHandler : public PragmaHandler { - PragmaMSIntrinsicHandler(Sema &Actions) - : PragmaHandler("intrinsic"), Actions(Actions) {} + PragmaMSIntrinsicHandler() : PragmaHandler("intrinsic") {} void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, Token &FirstToken) override; - -private: - Sema &Actions; }; // "\#pragma fenv_access (on)". @@ -521,7 +517,7 @@ void Parser::initializePragmaHandlers() { PP.AddPragmaHandler(MSOptimize.get()); MSRuntimeChecks = std::make_unique(); PP.AddPragmaHandler(MSRuntimeChecks.get()); -MSIntrinsic = std::make_unique(Actions); +MSIntrinsic = std::make_unique(); PP.AddPragmaHandler(MSIntrinsic.get()); MSFenvAccess = std::make_unique(); PP.AddPragmaHandler(MSFenvAccess.get()); @@ -3797,15 +3793,7 @@ void PragmaMSIntrinsicHandler::HandlePragma(Preprocessor &PP, if (!II->getBuiltinID()) PP.Diag(Tok.getLocation(), diag::warn_pragma_intrinsic_builtin) << II << SuggestIntrinH; -// If the builtin hasn't already been declared, declare it now. -DeclarationNameInfo NameInfo(II, Tok.getLocation()); -LookupResult Previous(Actions, NameInfo, Sema::LookupOrdinaryName, - Actions.forRedeclarationInCurContext()); -Actions.LookupName(Previous, Actions.getCurScope(), - /*CreateBuiltins*/ false); -if (Previous.empty()) - Actions.LazilyCreateBuiltin(II, II->getBuiltinID(), Actions.getCurScope(), - /*ForRedeclaration*/ true, Tok.getLocation()); + PP.Lex(Tok); if (Tok.isNot(tok::comma)) break; diff --git a/clang/test/Sema/Inputs/builtin-system-header.h b/clang/test/Sema/Inputs/builtin-system-header.h deleted file mode 100644 index 7eeb8d811fcfa..0 --- a/clang/test/Sema/Inputs/builtin-system-header.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifdef USE_PRAGMA_BEFORE -#pragma intrinsic(_InterlockedOr64) -#endif - -#define MACRO(x,y) _InterlockedOr64(x,y); - -#ifdef USE_PRAGMA_AFTER -#pragma intrinsic(_InterlockedOr64) -#endif diff --git a/clang/test/Sema/builtin-pragma-intrinsic.c b/clang/test/Sema/builtin-pragma-intrinsic.c deleted file mode 100644 index 1e8507bfd37df..0 --- a/clang/test/Sema/builtin-pragma-intrinsic.c +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify -triple arm64-windows -isystem %S/Inputs %s -DUSE_PRAGMA_BEFORE -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify -triple arm64-windows -isystem %S/Inputs %s -DUSE_PRAGMA_AFTER -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify -triple arm64-windows -isystem %S/Inputs %s -DUSE_PRAGMA_AFTER_USE -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify -triple arm64-windows -isystem %S/Inputs %s -DUSE_PRAGMA_SAME_FILE -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify -triple arm64-windows -isystem %S/Inputs %s - -#if defined(USE_PRAGMA_BEFORE) || defined(USE_PRAGMA_AFTER) || defined(USE_PRAGMA_SAME_FILE) -// expected-no-diagnostics -#else -// expected-error@+10 {{call to undeclared library function '_InterlockedOr64'}} -// expected-note@+9 {{include the header or explicitly provide a declaration for '_InterlockedOr64'}} -#endif -#include - -#ifdef USE_PRAGMA_SAME_FILE -#pragma intrinsic(_InterlockedOr64) -#endif - -void foo() { - MACRO(0,0); -} - -#ifdef USE_PRAGMA_AFTER_USE -#pragma intrinsic(_InterlockedOr64) -#endif ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits