[llvm-branch-commits] [llvm] [DirectX] Validating Root flags are denying shader stage (PR #153287)
@@ -236,21 +272,33 @@ static void validateRootSignature(Module &M,
BoundRegs.findBoundReg(RC, Binding.Space, Binding.LowerBound,
Binding.LowerBound + Binding.Size - 1);
-if (Reg != nullptr) {
- const auto *ParamInfo =
- static_cast(Reg->Cookie);
+if (!Reg) {
+ reportRegNotBound(M, RC, Binding);
+ continue;
+}
- if (RC != ResourceClass::SRV && RC != ResourceClass::UAV)
-continue;
+const auto *ParamInfo =
+static_cast(Reg->Cookie);
+
+const bool IsRootSRVOrUAV =
+RC == ResourceClass::SRV || RC == ResourceClass::UAV;
+const bool IsDescriptorTable =
+ParamInfo->Type == dxbc::RootParameterType::DescriptorTable;
+const bool IsRawOrStructuredBuffer =
+RK != ResourceKind::RawBuffer && RK != ResourceKind::StructuredBuffer;
+if (IsRootSRVOrUAV && !IsDescriptorTable && IsRawOrStructuredBuffer) {
+ reportInvalidHandleTyError(M, RC, Binding);
+ continue;
+}
- if (ParamInfo->Type == dxbc::RootParameterType::DescriptorTable)
-continue;
+HasBindings = true;
+ }
- if (RK != ResourceKind::RawBuffer && RK !=
ResourceKind::StructuredBuffer)
-reportInvalidHandleTyError(M, RC, Binding);
-} else {
- reportRegNotBound(M, RC, Binding);
-}
+ if (HasBindings && MMI.ShaderProfile != Triple::Compute) {
+const dxbc::RootFlags Flags = dxbc::RootFlags(RSD.Flags);
+const dxbc::RootFlags Mask = getEnvironmentDenyFlagMask(MMI.ShaderProfile);
bogner wrote:
Does this do the right thing for Library shaders? What about the various
raytracing profiles (RayGeneration, Intersection, AnyHit, etc)? Is there some
reason we can't get here for those?
https://github.com/llvm/llvm-project/pull/153287
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Remarks] Restructure bitstream remarks to be fully standalone (PR #156715)
https://github.com/tobias-stadler updated https://github.com/llvm/llvm-project/pull/156715 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Remarks] Restructure bitstream remarks to be fully standalone (PR #156715)
https://github.com/tobias-stadler updated https://github.com/llvm/llvm-project/pull/156715 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
https://github.com/mtrofin commented: overall lgtm, some questions https://github.com/llvm/llvm-project/pull/158376 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove unnecessary operand legalization for WMMAs (PR #159370)
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/159370
The operand constraints already express this constraint, and
InstrEmitter will respect them.
>From 257ef69737087efe560c067660a32182ab827e03 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Wed, 17 Sep 2025 23:19:39 +0900
Subject: [PATCH] AMDGPU: Remove unnecessary operand legalization for WMMAs
The operand constraints already express this constraint, and
InstrEmitter will respect them.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 15 ---
1 file changed, 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 37c75fe7f7dfd..a737ad98c1d80 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6547,21 +6547,6 @@ void
SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
!RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
legalizeOpWithMove(MI, VOP3Idx[2]);
- if (isWMMA(MI)) {
-// scale_src has a register class restricted to low 256 VGPRs, we may need
-// to insert a copy to the restricted VGPR class.
-int ScaleSrc0Idx =
-AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::scale_src0);
-if (ScaleSrc0Idx != -1) {
- int ScaleSrc1Idx =
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::scale_src1);
- if (!isOperandLegal(MI, ScaleSrc0Idx))
-legalizeOpWithMove(MI, ScaleSrc0Idx);
- if (!isOperandLegal(MI, ScaleSrc1Idx))
-legalizeOpWithMove(MI, ScaleSrc1Idx);
-}
- }
-
// Fix the register class of packed FP32 instructions on gfx12+. See
// SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information.
if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) {
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies when no runtime (PR #157754)
@@ -1,4 +1,5 @@ ; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s +; XFAIL: * jdenny-ornl wrote: I removed the xfails and made the tests pass. Let me know whether it's what you had in mind. https://github.com/llvm/llvm-project/pull/157754 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)
https://github.com/jdenny-ornl updated https://github.com/llvm/llvm-project/pull/159163 >From 5a9959313c0aebc1c707d19e30055cb925be7760 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Tue, 16 Sep 2025 16:03:11 -0400 Subject: [PATCH 1/2] [LoopUnroll] Fix block frequencies for epilogue As another step in issue #135812, this patch fixes block frequencies for partial loop unrolling with an epilogue remainder loop. It does not fully handle the case when the epilogue loop itself is unrolled. That will be handled in the next patch. For the guard and latch of each of the unrolled loop and epilogue loop, this patch sets branch weights derived directly from the original loop latch branch weights. The total frequency of the original loop body, summed across all its occurrences in the unrolled loop and epilogue loop, is the same as in the original loop. This patch also sets `llvm.loop.estimated_trip_count` for the epilogue loop instead of relying on the epilogue's latch branch weights to imply it. This patch removes the XFAIL directives that PR #157754 added to the test suite. --- .../include/llvm/Transforms/Utils/LoopUtils.h | 32 .../llvm/Transforms/Utils/UnrollLoop.h| 4 +- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 31 ++-- .../Transforms/Utils/LoopUnrollRuntime.cpp| 94 -- llvm/lib/Transforms/Utils/LoopUtils.cpp | 48 ++ .../branch-weights-freq/unroll-epilog.ll | 160 ++ .../runtime-exit-phi-scev-invalidation.ll | 4 +- .../LoopUnroll/runtime-loop-branchweight.ll | 56 +- .../Transforms/LoopUnroll/runtime-loop.ll | 9 +- .../LoopUnroll/unroll-heuristics-pgo.ll | 64 +-- 10 files changed, 448 insertions(+), 54 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index c5dbb2bdd1dd8..71754b8f62a16 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -365,6 +365,38 @@ LLVM_ABI bool setLoopEstimatedTripCount( Loop *L, unsigned EstimatedTripCount, std::optional EstimatedLoopInvocationWeight = std::nullopt); +/// Based on branch weight metadata, return either: +/// - \c std::nullopt if the implementation is unable to handle the loop form +/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - Else, the estimated probability that, at the end of any iteration, the +/// latch of \p L will start another iteration. The result \c P is such that +/// `0 <= P <= 1`, and `1 - P` is the probability of exiting the loop. +std::optional getLoopProbability(Loop *L); + +/// Set branch weight metadata for the latch of \p L to indicate that, at the +/// end of any iteration, its estimated probability of starting another +/// iteration is \p P. Return false if the implementation is unable to handle +/// the loop form of \p L (e.g., \p L must have a latch block that controls the +/// loop exit). Otherwise, return true. +bool setLoopProbability(Loop *L, double P); + +/// Based on branch weight metadata, return either: +/// - \c std::nullopt if the implementation cannot extract the probability +/// (e.g., \p B must have exactly two target labels, so it must be a +/// conditional branch). +/// - The probability \c P that control flows from \p B to its first target +/// label such that `1 - P` is the probability of control flowing to its +/// second target label, or vice-versa if \p ForFirstTarget is false. +std::optional getBranchProbability(BranchInst *B, bool ForFirstTarget); + +/// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are +/// the probabilities of control flowing to its first and second target labels, +/// respectively, or vice-versa if \p ForFirstTarget is false. Return false if +/// the implementation cannot set the probability (e.g., \p B must have exactly +/// two target labels, so it must be a conditional branch). Otherwise, return +/// true. +bool setBranchProbability(BranchInst *B, double P, bool ForFirstTarget); + /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially /// true. diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 871c13d972470..571a0af6fd0db 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -97,7 +97,9 @@ LLVM_ABI bool UnrollRuntimeLoopRemainder( LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, -Loop **ResultLoop = nullptr); +Loop **ResultLoop = nullptr, +std::optional OriginalTripCount = std::nullopt,
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -301,12 +380,16 @@ class Vocabulary {
constexpr static unsigned NumCanonicalEntries =
MaxOpcodes + MaxCanonicalTypeIDs + MaxOperandKinds + MaxPredicateKinds;
- // Base offsets for slot layout to simplify index computation
+ // Base offsets for flat index computation
constexpr static unsigned OperandBaseOffset =
MaxOpcodes + MaxCanonicalTypeIDs;
constexpr static unsigned PredicateBaseOffset =
OperandBaseOffset + MaxOperandKinds;
+ /// Functions for predicate index calculations
mtrofin wrote:
can these move to the implementation (looks like they don't have a user outside
that)?
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove unnecessary operand legalization for WMMAs (PR #159370)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/159370 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][LoongArch] Introduce LASX and LSX conversion intrinsics (PR #157819)
https://github.com/heiher updated
https://github.com/llvm/llvm-project/pull/157819
>From 42b3d4745113019d68198fb6b0a05e88762553cb Mon Sep 17 00:00:00 2001
From: WANG Rui
Date: Wed, 10 Sep 2025 17:11:10 +0800
Subject: [PATCH] [clang][LoongArch] Introduce LASX and LSX conversion
intrinsics
This patch introduces the LASX and LSX conversion intrinsics:
- __m256 __lasx_cast_128_s (__m128)
- __m256d __lasx_cast_128_d (__m128d)
- __m256i __lasx_cast_128 (__m128i)
- __m256 __lasx_concat_128_s (__m128, __m128)
- __m256d __lasx_concat_128_d (__m128, __m128d)
- __m256i __lasx_concat_128 (__m128, __m128i)
- __m128 __lasx_extract_128_lo_s (__m256)
- __m128d __lasx_extract_128_lo_d (__m256d)
- __m128i __lasx_extract_128_lo (__m256i)
- __m128 __lasx_extract_128_hi_s (__m256)
- __m128d __lasx_extract_128_hi_d (__m256d)
- __m128i __lasx_extract_128_hi (__m256i)
- __m256 __lasx_insert_128_lo_s (__m256, __m128)
- __m256d __lasx_insert_128_lo_d (__m256d, __m128d)
- __m256i __lasx_insert_128_lo (__m256i, __m128i)
- __m256 __lasx_insert_128_hi_s (__m256, __m128)
- __m256d __lasx_insert_128_hi_d (__m256d, __m128d)
- __m256i __lasx_insert_128_hi (__m256i, __m128i)
---
.../clang/Basic/BuiltinsLoongArchLASX.def | 19 +++
clang/lib/Basic/Targets/LoongArch.cpp | 1 +
clang/lib/Headers/lasxintrin.h| 113 +
.../CodeGen/LoongArch/lasx/builtin-alias.c| 153 +
clang/test/CodeGen/LoongArch/lasx/builtin.c | 157 ++
clang/test/Preprocessor/init-loongarch.c | 3 +
6 files changed, 446 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index c4ea46a3bc5b5..b234dedad648e 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc",
"lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
+
+TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128, "V32ScV16Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128, "V32ScV16ScV16Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V16ScV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V16ScV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V32ScV32ScV16Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V32ScV32ScV16Sc", "nc", "lasx")
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp
b/clang/lib/Basic/Targets/LoongArch.cpp
index 8e29bb745734b..5863af3f3b920 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const
LangOptions &Opts,
Builder.defineMacro("__loongarch_simd_width", "256");
Builder.defineMacro("__loongarch_sx", Twine(1));
Builder.defineMacro("__loongarch_asx", Twine(1));
+Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
} else if (HasFeatureLSX) {
Builder.defineMacro("__loongarch_simd_width", "128");
Builder.defineMacro("__loongarch_sx", Twine(1));
diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h
index 85020d82829e2..5177b825f46aa 100644
--- a/clang/lib/Headers/lasxintrin.h
+++ b/clang/lib/Headers/lasxintrin.h
@@ -10,6 +10,8 @@
#ifndef _LOONGSON_ASXINTRIN_H
#define _LOONGSON_ASXINTRIN_H 1
+#include
+
#if defined(__loongarch_asx)
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
+#if defined(__loongarch_asx_sx_conv)
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__,
+ __artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
+ return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
+}
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__, __artificial
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -144,6 +145,73 @@ struct Embedding {
using InstEmbeddingsMap = DenseMap;
using BBEmbeddingsMap = DenseMap;
+/// Generic storage class for section-based vocabularies.
+/// VocabStorage provides a generic foundation for storing and accessing
+/// embeddings organized into sections.
+class VocabStorage {
+private:
+ /// Section-based storage
+ std::vector> Sections;
+
+ size_t TotalSize = 0;
+ unsigned Dimension = 0;
+
+public:
+ /// Default constructor creates empty storage (invalid state)
+ VocabStorage() : Sections(), TotalSize(0), Dimension(0) {}
+
+ /// Create a VocabStorage with pre-organized section data
+ VocabStorage(std::vector> &&SectionData);
+
+ VocabStorage(VocabStorage &&) = default;
+ VocabStorage &operator=(VocabStorage &&Other);
svkeerthy wrote:
deleted `operator=` but cannot delete move ctor though. It is being used in the
tool and tests.
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: MC: Better handle backslash-escaped symbols (PR #159420)
llvmbot wrote:
@llvm/pr-subscribers-backend-x86
Author: Nikita Popov (nikic)
Changes
---
Patch is 36.88 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/159420.diff
15 Files Affected:
- (modified) llvm/include/llvm/MC/MCContext.h (+4)
- (modified) llvm/include/llvm/MC/MCParser/MCAsmParser.h (+3)
- (modified) llvm/lib/MC/MCContext.cpp (+28-21)
- (modified) llvm/lib/MC/MCParser/AsmParser.cpp (+24-41)
- (modified) llvm/lib/MC/MCParser/COFFAsmParser.cpp (+22-44)
- (modified) llvm/lib/MC/MCParser/COFFMasmParser.cpp (+9-8)
- (modified) llvm/lib/MC/MCParser/DarwinAsmParser.cpp (+14-30)
- (modified) llvm/lib/MC/MCParser/ELFAsmParser.cpp (+13-21)
- (modified) llvm/lib/MC/MCParser/MCAsmParser.cpp (+9)
- (modified) llvm/lib/MC/MCParser/MCAsmParserExtension.cpp (+2-2)
- (modified) llvm/lib/MC/MCParser/MasmParser.cpp (+13-19)
- (modified) llvm/lib/MC/MCParser/WasmAsmParser.cpp (+6-9)
- (added) llvm/test/CodeGen/X86/symbol-name.ll (+5)
- (modified) llvm/test/MC/ELF/cgprofile.s (+5-5)
- (modified) llvm/test/MC/ELF/symbol-names.s (+3)
``diff
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 5a8ec17dae1cc..22a493a8df013 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -489,6 +489,10 @@ class MCContext {
/// \param Name - The symbol name, which must be unique across all symbols.
LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name);
+ /// Variant of getOrCreateSymbol that handles backslash-escaped symbols.
+ /// For example, parse "a\"b\\" as a"\.
+ LLVM_ABI MCSymbol *parseSymbol(const Twine &Name);
+
/// Gets a symbol that will be defined to the final stack offset of a local
/// variable after codegen.
///
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index cb9bd5c600d52..e3f44a08db641 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -279,6 +279,9 @@ class LLVM_ABI MCAsmParser {
/// Res to the identifier contents.
virtual bool parseIdentifier(StringRef &Res) = 0;
+ /// Parse identifier and get or create symbol for it.
+ bool parseSymbol(MCSymbol *&Res);
+
/// Parse up to the end of statement and return the contents from the
/// current token until the end of the statement; the current token on exit
/// will be either the EndOfStatement or EOF.
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 890184db1d1ef..1625455e38e06 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -217,27 +217,6 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection
&Sec) {
MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
SmallString<128> NameSV;
StringRef NameRef = Name.toStringRef(NameSV);
- if (NameRef.contains('\\')) {
-NameSV = NameRef;
-size_t S = 0;
-// Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
-// other characters following \\, which we do not implement due to code
-// structure.
-for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
- char C = NameSV[I];
- if (C == '\\' && I + 1 != E) {
-switch (NameSV[I + 1]) {
-case '"':
-case '\\':
- C = NameSV[++I];
- break;
-}
- }
- NameSV[S++] = C;
-}
-NameSV.resize(S);
-NameRef = NameSV;
- }
assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
@@ -258,6 +237,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
return Entry.second.Symbol;
}
+MCSymbol *MCContext::parseSymbol(const Twine &Name) {
+ SmallString<128> SV;
+ StringRef NameRef = Name.toStringRef(SV);
+ if (NameRef.contains('\\')) {
+SV = NameRef;
+size_t S = 0;
+// Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+// other characters following \\, which we do not implement due to code
+// structure.
+for (size_t I = 0, E = SV.size(); I != E; ++I) {
+ char C = SV[I];
+ if (C == '\\' && I + 1 != E) {
+switch (SV[I + 1]) {
+case '"':
+case '\\':
+ C = SV[++I];
+ break;
+}
+ }
+ SV[S++] = C;
+}
+SV.resize(S);
+NameRef = SV;
+ }
+
+ return getOrCreateSymbol(NameRef);
+}
+
MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
unsigned Idx) {
return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp
b/llvm/lib/MC/MCParser/AsmParser.cpp
index 9fd6c05a846db..929051c4f0c17 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1222,8 +1222,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res,
SMLoc &EndLoc,
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
if (!Sym)
-
[llvm-branch-commits] [llvm] release/21.x: MC: Better handle backslash-escaped symbols (PR #159420)
https://github.com/nikic created
https://github.com/llvm/llvm-project/pull/159420
None
>From 0dca43897ece415a59345b11dd86854d60a3a640 Mon Sep 17 00:00:00 2001
From: Nikita Popov
Date: Fri, 12 Sep 2025 09:11:08 +0200
Subject: [PATCH 1/2] [MC] Add parseSymbol() helper (NFC) (#158106)
This combines parseIdentifier() + getOrCreateSymbol(). This should make
it a bit easier if we want to change the parseIdentifier() API.
(cherry picked from commit 76aba5d415fbf206e0d9443a5822fcd9244fa33f)
---
llvm/include/llvm/MC/MCParser/MCAsmParser.h | 3 +
llvm/lib/MC/MCParser/AsmParser.cpp | 53 ++---
llvm/lib/MC/MCParser/COFFAsmParser.cpp | 66 +++--
llvm/lib/MC/MCParser/COFFMasmParser.cpp | 13 ++--
llvm/lib/MC/MCParser/DarwinAsmParser.cpp| 44 +-
llvm/lib/MC/MCParser/ELFAsmParser.cpp | 32 --
llvm/lib/MC/MCParser/MCAsmParser.cpp| 9 +++
llvm/lib/MC/MCParser/MasmParser.cpp | 19 +++---
llvm/lib/MC/MCParser/WasmAsmParser.cpp | 10 ++--
9 files changed, 96 insertions(+), 153 deletions(-)
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index cb9bd5c600d52..e3f44a08db641 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -279,6 +279,9 @@ class LLVM_ABI MCAsmParser {
/// Res to the identifier contents.
virtual bool parseIdentifier(StringRef &Res) = 0;
+ /// Parse identifier and get or create symbol for it.
+ bool parseSymbol(MCSymbol *&Res);
+
/// Parse up to the end of statement and return the contents from the
/// current token until the end of the statement; the current token on exit
/// will be either the EndOfStatement or EOF.
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp
b/llvm/lib/MC/MCParser/AsmParser.cpp
index 9fd6c05a846db..cea781f9ec36e 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3897,20 +3897,15 @@ bool AsmParser::parseDirectiveCVLoc() {
/// ::= .cv_linetable FunctionId, FnStart, FnEnd
bool AsmParser::parseDirectiveCVLinetable() {
int64_t FunctionId;
- StringRef FnStartName, FnEndName;
+ MCSymbol *FnStartSym, *FnEndSym;
SMLoc Loc = getTok().getLoc();
if (parseCVFunctionId(FunctionId, ".cv_linetable") || parseComma() ||
parseTokenLoc(Loc) ||
- check(parseIdentifier(FnStartName), Loc,
-"expected identifier in directive") ||
+ check(parseSymbol(FnStartSym), Loc, "expected identifier in directive")
||
parseComma() || parseTokenLoc(Loc) ||
- check(parseIdentifier(FnEndName), Loc,
-"expected identifier in directive"))
+ check(parseSymbol(FnEndSym), Loc, "expected identifier in directive"))
return true;
- MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
- MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
-
getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
return false;
}
@@ -3919,7 +3914,7 @@ bool AsmParser::parseDirectiveCVLinetable() {
/// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
bool AsmParser::parseDirectiveCVInlineLinetable() {
int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
- StringRef FnStartName, FnEndName;
+ MCSymbol *FnStartSym, *FnEndSym;
SMLoc Loc = getTok().getLoc();
if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
parseTokenLoc(Loc) ||
@@ -3929,16 +3924,14 @@ bool AsmParser::parseDirectiveCVInlineLinetable() {
parseIntToken(SourceLineNum, "expected SourceLineNum") ||
check(SourceLineNum < 0, Loc, "Line number less than zero") ||
parseTokenLoc(Loc) ||
- check(parseIdentifier(FnStartName), Loc, "expected identifier") ||
+ check(parseSymbol(FnStartSym), Loc, "expected identifier") ||
parseTokenLoc(Loc) ||
- check(parseIdentifier(FnEndName), Loc, "expected identifier"))
+ check(parseSymbol(FnEndSym), Loc, "expected identifier"))
return true;
if (parseEOL())
return true;
- MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
- MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
SourceLineNum, FnStartSym,
FnEndSym);
@@ -3959,16 +3952,14 @@ bool AsmParser::parseDirectiveCVDefRange() {
std::vector> Ranges;
while (getLexer().is(AsmToken::Identifier)) {
Loc = getLexer().getLoc();
-StringRef GapStartName;
-if (parseIdentifier(GapStartName))
+MCSymbol *GapStartSym;
+if (parseSymbol(GapStartSym))
return Error(Loc, "expected identifier in directive");
-MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
Loc = getLexer().getLoc();
-StringRef GapEndName;
-if (parseIdentifier(GapEndN
[llvm-branch-commits] [llvm] release/21.x: MC: Better handle backslash-escaped symbols (PR #159420)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/159420 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lit] Add support for readfile to external shell (PR #159431)
https://github.com/boomanaiden154 created
https://github.com/llvm/llvm-project/pull/159431
This patch adds support for the new lit %{readfile:}
substitution to the external shell. The implementation currently just
appends some test commands to ensure the file exists and uses a subshell
with cat. This is intended to enable running tests using the
substitution in the external shell before we fully switch over to the
internal shell.
This code is designed to be temporary with us deleting it once
everything has migrated over to the internal shell and we are able to
remove the external shell code paths.
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Clang] Rewrite tests using subshells to set env variables (PR #158446)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158446 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lit] Add support for readfile to external shell (PR #159431)
llvmbot wrote:
@llvm/pr-subscribers-testing-tools
Author: Aiden Grossman (boomanaiden154)
Changes
This patch adds support for the new lit %{readfile:}
substitution to the external shell. The implementation currently just
appends some test commands to ensure the file exists and uses a subshell
with cat. This is intended to enable running tests using the
substitution in the external shell before we fully switch over to the
internal shell.
This code is designed to be temporary with us deleting it once
everything has migrated over to the internal shell and we are able to
remove the external shell code paths.
---
Full diff: https://github.com/llvm/llvm-project/pull/159431.diff
4 Files Affected:
- (modified) llvm/utils/lit/lit/TestRunner.py (+18)
- (modified) llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg (+13-1)
- (added) llvm/utils/lit/tests/shtest-readfile-external.py (+21)
- (modified) llvm/utils/lit/tests/shtest-readfile.py (+1-1)
``diff
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 045472429b6e4..53eeb2f85b48b 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -2412,6 +2412,20 @@ def runOnce(
status, output, attempts=i + 1, max_allowed_attempts=attempts
)
+def _expandLateSubstitutionsExternal(commandLine):
+filePaths = []
+def _replaceReadFile(match):
+filePath = match.group(1)
+filePaths.append(filePath)
+return "$(cat %s)" % filePath
+
+commandLine = re.sub(r"%{readfile:([^}]*)}", _replaceReadFile, commandLine)
+# Add test commands before the command to check if the file exists as
+# cat inside a subshell will never return a non-zero exit code outside
+# of the subshell.
+for filePath in filePaths:
+commandLine = "%s && test -e %s" % (commandLine, filePath)
+return commandLine
def executeShTest(
test, litConfig, useExternalSh, extra_substitutions=[],
preamble_commands=[]
@@ -2443,4 +2457,8 @@ def executeShTest(
recursion_limit=test.config.recursiveExpansionLimit,
)
+if useExternalSh:
+for index, command in enumerate(script):
+script[index] = _expandLateSubstitutionsExternal(command)
+
return _runShTest(test, litConfig, useExternalSh, script, tmpBase)
diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
index cf453e1ea786f..ee496674fdb62 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
+++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
@@ -1,7 +1,19 @@
+import os
+
import lit.formats
+import lit.util
config.name = "shtest-readfile"
config.suffixes = [".txt"]
-config.test_format = lit.formats.ShTest(execute_external=False)
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
config.test_source_root = None
config.test_exec_root = None
+
+# If we are testing with the external shell, remove the fake-externals from
+# PATH so that we use mkdir in the tests.
+if not use_lit_shell:
+path_parts = config.environment["PATH"].split(os.path.pathsep)
+path_parts = [path_part for path_part in path_parts if "fake-externals"
not in path_part]
+config.environment["PATH"] = os.path.pathsep.join(path_parts)
diff --git a/llvm/utils/lit/tests/shtest-readfile-external.py
b/llvm/utils/lit/tests/shtest-readfile-external.py
new file mode 100644
index 0..5825ad674ba05
--- /dev/null
+++ b/llvm/utils/lit/tests/shtest-readfile-external.py
@@ -0,0 +1,21 @@
+## Tests the readfile substitution.
+
+# RUN: env LIT_USE_INTERNAL_SHELL=0 not %{lit} -a -v %{inputs}/shtest-readfile
| FileCheck -match-full-lines -DTEMP_PATH=%S/Inputs/shtest-readfile/Output %s
+
+# CHECK: -- Testing: 4 tests{{.*}}
+
+# CHECK-LABEL: FAIL: shtest-readfile :: absolute-paths.txt ({{[^)]*}})
+# CHECK: echo $(cat [[TEMP_PATH]]/absolute-paths.txt.tmp) && test -e
/home/gha/llvm-project/build/utils/lit/tests/Inputs/shtest-readfile/Output/absolute-paths.txt.tmp
{{.*}}
+# CHECK: + echo hello
+
+# CHECK-LABEL: FAIL: shtest-readfile :: file-does-not-exist.txt ({{[^)]*}})
+# CHECK: echo $(cat /file/does/not/exist) && test -e /file/does/not/exist
{{.*}}
+# CHECK: cat: /file/does/not/exist: No such file or directory
+
+# CHECK-LABEL: FAIL: shtest-readfile :: relative-paths.txt ({{[^)]*}})
+# CHECK: echo $(cat rel_path_test_folder/test_file) && test -e
rel_path_test_folder/test_file {{.*}}
+# CHECK: + echo hello
+
+# CHECK-LABEL: FAIL: shtest-readfile :: two-same-line.txt ({{[^)]*}})
+# CHECK: echo $(cat
/home/gha/llvm-project/build/utils/lit/tests/Inputs/shtest-readfile/Output/two-same-line.txt.tmp.1)
$(cat
/home/gha/llvm-project/build/utils/lit/tests/Inputs/shtest-readfile/Output/two-same-line.txt.tmp.2)
&& test -e
/home/gha/llvm-project/build/utils/lit/tests/
[llvm-branch-commits] [clang] [llvm] [lit] Make builtin cat work with stdin (PR #158447)
https://github.com/boomanaiden154 updated
https://github.com/llvm/llvm-project/pull/158447
>From 5bd8d4f925f3b5f82d85ef693861b6b1067d9f38 Mon Sep 17 00:00:00 2001
From: Aiden Grossman
Date: Sat, 13 Sep 2025 22:54:58 +
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6
---
clang/test/Misc/dev-fd-fs.c| 1 -
llvm/utils/lit/lit/builtin_commands/cat.py | 3 +++
llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt | 4
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/clang/test/Misc/dev-fd-fs.c b/clang/test/Misc/dev-fd-fs.c
index ea94d950b0716..b989ab8a439cf 100644
--- a/clang/test/Misc/dev-fd-fs.c
+++ b/clang/test/Misc/dev-fd-fs.c
@@ -1,6 +1,5 @@
// Check that we can operate on files from /dev/fd.
// REQUIRES: dev-fd-fs
-// REQUIRES: shell
// Check reading from named pipes. We cat the input here instead of redirecting
// it to ensure that /dev/fd/0 is a named pipe, not just a redirected file.
diff --git a/llvm/utils/lit/lit/builtin_commands/cat.py
b/llvm/utils/lit/lit/builtin_commands/cat.py
index ddab555662045..2797e0cbb4154 100644
--- a/llvm/utils/lit/lit/builtin_commands/cat.py
+++ b/llvm/utils/lit/lit/builtin_commands/cat.py
@@ -49,6 +49,9 @@ def main(argv):
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+if len(filenames) == 0:
+sys.stdout.write(sys.stdin.read())
+sys.exit(0)
for filename in filenames:
try:
contents = None
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
index 4014b0fca1f24..c5b5d247c2f95 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
@@ -70,3 +70,7 @@
#
NP-CAT-OUTPUT-NEXT:M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
#
NP-CAT-OUTPUT-NEXT:M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
# NP-CAT-OUTPUT-NEXT:M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
+
+## Test that cat will pipe stdin to stdout if no other files are specified.
+# RUN: echo test | cat | FileCheck --check-prefix=CAT-STDIN %s
+# CAT-STDIN: test
>From 572975066e843b76e51020bcf6abc7822d3dfb75 Mon Sep 17 00:00:00 2001
From: Aiden Grossman
Date: Sat, 13 Sep 2025 23:14:52 +
Subject: [PATCH 2/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6
[skip ci]
---
clang/test/ClangScanDeps/pr61006.cppm | 3 ++-
clang/test/ClangScanDeps/resource_directory.c | 9 -
clang/test/Driver/env.c | 5 +++--
clang/test/Driver/program-path-priority.c | 16 +++
clang/test/Modules/relative-resource-dir.m| 6 +++---
llvm/docs/CommandGuide/lit.rst| 1 +
llvm/test/tools/llvm-cgdata/empty.test| 1 +
llvm/utils/lit/lit/TestRunner.py | 20 +++
.../Inputs/shtest-readfile/absolute-paths.txt | 6 ++
.../lit/tests/Inputs/shtest-readfile/lit.cfg | 8
.../Inputs/shtest-readfile/relative-paths.txt | 7 +++
.../Inputs/shtest-readfile/two-same-line.txt | 8
llvm/utils/lit/tests/shtest-readfile.py | 17
13 files changed, 88 insertions(+), 19 deletions(-)
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/absolute-paths.txt
create mode 100644 llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/relative-paths.txt
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/two-same-line.txt
create mode 100644 llvm/utils/lit/tests/shtest-readfile.py
diff --git a/clang/test/ClangScanDeps/pr61006.cppm
b/clang/test/ClangScanDeps/pr61006.cppm
index f75edd38c81ba..f10bc1e673987 100644
--- a/clang/test/ClangScanDeps/pr61006.cppm
+++ b/clang/test/ClangScanDeps/pr61006.cppm
@@ -6,7 +6,8 @@
// RUN: mkdir -p %t
// RUN: split-file %s %t
//
-// RUN: EXPECTED_RESOURCE_DIR=`%clang -print-resource-dir` && \
+// RUN: %clang -print-resource-dir | tr -d '\n' > %t/resource-dir
+// RUN: env EXPECTED_RESOURCE_DIR=%{readfile:%t/resource-dir} && \
// RUN: ln -s %clang++ %t/clang++ && \
// RUN: sed "s|EXPECTED_RESOURCE_DIR|$EXPECTED_RESOURCE_DIR|g; s|DIR|%/t|g"
%t/P1689.json.in > %t/P1689.json && \
// RUN: clang-scan-deps -compilation-database %t/P1689.json -format=p1689 |
FileCheck %t/a.cpp -DPREFIX=%/t && \
diff --git a/clang/test/ClangScanDeps/resource_directory.c
b/clang/test/ClangScanDeps/resource_directory.c
index 55d5d90bbcdea..6183e8aefacfa 100644
--- a/clang/test/ClangScanDeps/resource_directory.c
+++ b/clang/test/ClangScanDeps/resource_directory.c
@@ -12,14 +12,14 @@
//
[llvm-branch-commits] [clang] [llvm] [lit] Make builtin cat work with stdin (PR #158447)
https://github.com/boomanaiden154 updated
https://github.com/llvm/llvm-project/pull/158447
>From 5bd8d4f925f3b5f82d85ef693861b6b1067d9f38 Mon Sep 17 00:00:00 2001
From: Aiden Grossman
Date: Sat, 13 Sep 2025 22:54:58 +
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6
---
clang/test/Misc/dev-fd-fs.c| 1 -
llvm/utils/lit/lit/builtin_commands/cat.py | 3 +++
llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt | 4
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/clang/test/Misc/dev-fd-fs.c b/clang/test/Misc/dev-fd-fs.c
index ea94d950b0716..b989ab8a439cf 100644
--- a/clang/test/Misc/dev-fd-fs.c
+++ b/clang/test/Misc/dev-fd-fs.c
@@ -1,6 +1,5 @@
// Check that we can operate on files from /dev/fd.
// REQUIRES: dev-fd-fs
-// REQUIRES: shell
// Check reading from named pipes. We cat the input here instead of redirecting
// it to ensure that /dev/fd/0 is a named pipe, not just a redirected file.
diff --git a/llvm/utils/lit/lit/builtin_commands/cat.py
b/llvm/utils/lit/lit/builtin_commands/cat.py
index ddab555662045..2797e0cbb4154 100644
--- a/llvm/utils/lit/lit/builtin_commands/cat.py
+++ b/llvm/utils/lit/lit/builtin_commands/cat.py
@@ -49,6 +49,9 @@ def main(argv):
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+if len(filenames) == 0:
+sys.stdout.write(sys.stdin.read())
+sys.exit(0)
for filename in filenames:
try:
contents = None
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
index 4014b0fca1f24..c5b5d247c2f95 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
@@ -70,3 +70,7 @@
#
NP-CAT-OUTPUT-NEXT:M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
#
NP-CAT-OUTPUT-NEXT:M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
# NP-CAT-OUTPUT-NEXT:M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
+
+## Test that cat will pipe stdin to stdout if no other files are specified.
+# RUN: echo test | cat | FileCheck --check-prefix=CAT-STDIN %s
+# CAT-STDIN: test
>From 572975066e843b76e51020bcf6abc7822d3dfb75 Mon Sep 17 00:00:00 2001
From: Aiden Grossman
Date: Sat, 13 Sep 2025 23:14:52 +
Subject: [PATCH 2/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6
[skip ci]
---
clang/test/ClangScanDeps/pr61006.cppm | 3 ++-
clang/test/ClangScanDeps/resource_directory.c | 9 -
clang/test/Driver/env.c | 5 +++--
clang/test/Driver/program-path-priority.c | 16 +++
clang/test/Modules/relative-resource-dir.m| 6 +++---
llvm/docs/CommandGuide/lit.rst| 1 +
llvm/test/tools/llvm-cgdata/empty.test| 1 +
llvm/utils/lit/lit/TestRunner.py | 20 +++
.../Inputs/shtest-readfile/absolute-paths.txt | 6 ++
.../lit/tests/Inputs/shtest-readfile/lit.cfg | 8
.../Inputs/shtest-readfile/relative-paths.txt | 7 +++
.../Inputs/shtest-readfile/two-same-line.txt | 8
llvm/utils/lit/tests/shtest-readfile.py | 17
13 files changed, 88 insertions(+), 19 deletions(-)
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/absolute-paths.txt
create mode 100644 llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/relative-paths.txt
create mode 100644
llvm/utils/lit/tests/Inputs/shtest-readfile/two-same-line.txt
create mode 100644 llvm/utils/lit/tests/shtest-readfile.py
diff --git a/clang/test/ClangScanDeps/pr61006.cppm
b/clang/test/ClangScanDeps/pr61006.cppm
index f75edd38c81ba..f10bc1e673987 100644
--- a/clang/test/ClangScanDeps/pr61006.cppm
+++ b/clang/test/ClangScanDeps/pr61006.cppm
@@ -6,7 +6,8 @@
// RUN: mkdir -p %t
// RUN: split-file %s %t
//
-// RUN: EXPECTED_RESOURCE_DIR=`%clang -print-resource-dir` && \
+// RUN: %clang -print-resource-dir | tr -d '\n' > %t/resource-dir
+// RUN: env EXPECTED_RESOURCE_DIR=%{readfile:%t/resource-dir} && \
// RUN: ln -s %clang++ %t/clang++ && \
// RUN: sed "s|EXPECTED_RESOURCE_DIR|$EXPECTED_RESOURCE_DIR|g; s|DIR|%/t|g"
%t/P1689.json.in > %t/P1689.json && \
// RUN: clang-scan-deps -compilation-database %t/P1689.json -format=p1689 |
FileCheck %t/a.cpp -DPREFIX=%/t && \
diff --git a/clang/test/ClangScanDeps/resource_directory.c
b/clang/test/ClangScanDeps/resource_directory.c
index 55d5d90bbcdea..6183e8aefacfa 100644
--- a/clang/test/ClangScanDeps/resource_directory.c
+++ b/clang/test/ClangScanDeps/resource_directory.c
@@ -12,14 +12,14 @@
//
[llvm-branch-commits] [Clang] Enable lit internal shell by default (PR #158465)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158465 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lit] Add support for deleting symlinks to directories without -r (PR #158464)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158464 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lit] Add support for deleting symlinks to directories without -r (PR #158464)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158464 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Clang] Make rewrite-includes-bom.c work with internal shell (PR #158463)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158463 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Clang] Make rewrite-includes-bom.c work with internal shell (PR #158463)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158463 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lit] Add support for readfile to external shell (PR #159431)
github-actions[bot] wrote: :warning: Python code formatter, darker found issues in your code. :warning: You can test this locally with the following command: ``bash darker --check --diff -r origin/main...HEAD llvm/utils/lit/tests/shtest-readfile-external.py llvm/utils/lit/lit/TestRunner.py llvm/utils/lit/tests/shtest-readfile.py `` :warning: The reproduction instructions above might return results for more than one PR in a stack if you are using a stacked PR workflow. You can limit the results by changing `origin/main` to the base branch/commit you want to compare against. :warning: View the diff from darker here. ``diff --- lit/TestRunner.py 2025-09-17 19:18:54.00 + +++ lit/TestRunner.py 2025-09-17 19:21:02.959668 + @@ -2410,12 +2410,14 @@ return lit.Test.Result( status, output, attempts=i + 1, max_allowed_attempts=attempts ) + def _expandLateSubstitutionsExternal(commandLine): filePaths = [] + def _replaceReadFile(match): filePath = match.group(1) filePaths.append(filePath) return "$(cat %s)" % filePath `` https://github.com/llvm/llvm-project/pull/159431 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][Standalone] test Standalone against install distributions (PR #157944)
https://github.com/makslevental updated
https://github.com/llvm/llvm-project/pull/157944
>From f9dbe48eaa691ca9f8161e9dc3a800bb613d5bc9 Mon Sep 17 00:00:00 2001
From: makslevental
Date: Wed, 10 Sep 2025 12:57:54 -0700
Subject: [PATCH 01/14] [MLIR][Standalone] test Standalone against install
distributions
---
mlir/test/Examples/standalone/lit.local.cfg | 2 ++
.../Examples/standalone/test.toy.install-dir | 16
mlir/test/lit.cfg.py | 3 +++
mlir/test/lit.site.cfg.py.in | 1 +
4 files changed, 22 insertions(+)
create mode 100644 mlir/test/Examples/standalone/test.toy.install-dir
diff --git a/mlir/test/Examples/standalone/lit.local.cfg
b/mlir/test/Examples/standalone/lit.local.cfg
index fe8397c6b9a10..bc9928decf527 100644
--- a/mlir/test/Examples/standalone/lit.local.cfg
+++ b/mlir/test/Examples/standalone/lit.local.cfg
@@ -10,3 +10,5 @@ config.substitutions.append(("%host_cc", config.host_cc))
config.substitutions.append(("%enable_libcxx", config.enable_libcxx))
config.substitutions.append(("%mlir_cmake_dir", config.mlir_cmake_dir))
config.substitutions.append(("%llvm_use_linker", config.llvm_use_linker))
+config.substitutions.append(("%llvm_obj_root", config.llvm_obj_root))
+config.substitutions.append(("%host_cmake_install_prefix",
config.host_cmake_install_prefix))
diff --git a/mlir/test/Examples/standalone/test.toy.install-dir
b/mlir/test/Examples/standalone/test.toy.install-dir
new file mode 100644
index 0..5c33a70491ae1
--- /dev/null
+++ b/mlir/test/Examples/standalone/test.toy.install-dir
@@ -0,0 +1,16 @@
+# REQUIRES: github-actions
+# RUN: "%cmake_exe" --build %llvm_obj_root --target install
+# RUN: "%cmake_exe" "%mlir_src_root/examples/standalone" -G "%cmake_generator"
\
+# RUN: -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc \
+# RUN: -DLLVM_ENABLE_LIBCXX=%enable_libcxx
-DMLIR_DIR=%host_cmake_install_prefix \
+# RUN: -DLLVM_USE_LINKER=%llvm_use_linker \
+# RUN: -DPython3_EXECUTABLE=%python \
+# RUN: -DPython_EXECUTABLE=%python
+# RUN: "%cmake_exe" --build . --target check-standalone | tee %t
+# RUN: FileCheck --input-file=%t %s
+
+# Note: The number of checked tests is not important. The command will fail
+# if any fail.
+# CHECK: Passed
+# CHECK-NOT: Failed
+# UNSUPPORTED: target={{.*(windows|android).*}}
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index f99c24d6e299a..08c7947c1e9a6 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -383,3 +383,6 @@ def have_host_jit_feature_support(feature_name):
if sys.version_info >= (3, 11):
config.available_features.add("python-ge-311")
+
+if "GITHUB_ACTIONS" in os.environ:
+config.available_features.add("github-actions")
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 8a742a227847b..7e22ebf23c773 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -18,6 +18,7 @@ config.host_cxx = "@HOST_CXX@"
config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@"
config.host_cmake = "@CMAKE_COMMAND@"
config.host_cmake_generator = "@CMAKE_GENERATOR@"
+config.host_cmake_install_prefix = "@CMAKE_INSTALL_PREFIX@"
config.llvm_use_linker = "@LLVM_USE_LINKER@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.host_arch = "@HOST_ARCH@"
>From fc56d49c9481aed16b79f391368f4c3824a90695 Mon Sep 17 00:00:00 2001
From: Maksim Levental
Date: Wed, 10 Sep 2025 13:23:07 -0700
Subject: [PATCH 02/14] Update lit.site.cfg.py.in
---
.../standalone/{test.toy.install-dir => test.install-dir.toy}| 0
mlir/test/lit.site.cfg.py.in | 1 +
2 files changed, 1 insertion(+)
rename mlir/test/Examples/standalone/{test.toy.install-dir =>
test.install-dir.toy} (100%)
diff --git a/mlir/test/Examples/standalone/test.toy.install-dir
b/mlir/test/Examples/standalone/test.install-dir.toy
similarity index 100%
rename from mlir/test/Examples/standalone/test.toy.install-dir
rename to mlir/test/Examples/standalone/test.install-dir.toy
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 7e22ebf23c773..eadfd047d15f7 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -3,6 +3,7 @@
import sys
config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
>From 67ce5b688657d38b6792b05712d1de6f56e726fe Mon Sep 17 00:00:00 2001
From: makslevental
Date: Wed, 10 Sep 2025 15:54:54 -0700
Subject: [PATCH 03/14] add test.install-distribution-dir.toy
---
mlir/test/Examples/standalone/lit.local.cfg | 1 +
.../Examples/standalone/test.install-dir.toy| 4 ++--
.../test.install-distribution-dir.toy | 17 +
3 files changed, 20 insertions(+), 2 deletions(-)
create mode 100644
mlir/test/Examples/stand
[llvm-branch-commits] [llvm] [llvm][mustache] Add support for Triple Mustache (PR #159183)
https://github.com/evelez7 approved this pull request. https://github.com/llvm/llvm-project/pull/159183 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/21.x: [lldb][Instrumentation] Set selected frame to outside sanitizer libraries (PR #157568)
https://github.com/Michael137 edited https://github.com/llvm/llvm-project/pull/157568 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Clang] Rewrite tests using subshells to set env variables (PR #158446)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/158446 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/21.x: [RISCV] Reduce RISCV code generation build time (PR #158164)
Steelskin wrote: > This just needs to be updated in CMakeLists.txt Thanks, I think I updated the right thing. https://github.com/llvm/llvm-project/pull/158164 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][Standalone] test Standalone against install distributions (PR #157944)
https://github.com/makslevental updated
https://github.com/llvm/llvm-project/pull/157944
>From f9dbe48eaa691ca9f8161e9dc3a800bb613d5bc9 Mon Sep 17 00:00:00 2001
From: makslevental
Date: Wed, 10 Sep 2025 12:57:54 -0700
Subject: [PATCH 01/13] [MLIR][Standalone] test Standalone against install
distributions
---
mlir/test/Examples/standalone/lit.local.cfg | 2 ++
.../Examples/standalone/test.toy.install-dir | 16
mlir/test/lit.cfg.py | 3 +++
mlir/test/lit.site.cfg.py.in | 1 +
4 files changed, 22 insertions(+)
create mode 100644 mlir/test/Examples/standalone/test.toy.install-dir
diff --git a/mlir/test/Examples/standalone/lit.local.cfg
b/mlir/test/Examples/standalone/lit.local.cfg
index fe8397c6b9a10..bc9928decf527 100644
--- a/mlir/test/Examples/standalone/lit.local.cfg
+++ b/mlir/test/Examples/standalone/lit.local.cfg
@@ -10,3 +10,5 @@ config.substitutions.append(("%host_cc", config.host_cc))
config.substitutions.append(("%enable_libcxx", config.enable_libcxx))
config.substitutions.append(("%mlir_cmake_dir", config.mlir_cmake_dir))
config.substitutions.append(("%llvm_use_linker", config.llvm_use_linker))
+config.substitutions.append(("%llvm_obj_root", config.llvm_obj_root))
+config.substitutions.append(("%host_cmake_install_prefix",
config.host_cmake_install_prefix))
diff --git a/mlir/test/Examples/standalone/test.toy.install-dir
b/mlir/test/Examples/standalone/test.toy.install-dir
new file mode 100644
index 0..5c33a70491ae1
--- /dev/null
+++ b/mlir/test/Examples/standalone/test.toy.install-dir
@@ -0,0 +1,16 @@
+# REQUIRES: github-actions
+# RUN: "%cmake_exe" --build %llvm_obj_root --target install
+# RUN: "%cmake_exe" "%mlir_src_root/examples/standalone" -G "%cmake_generator"
\
+# RUN: -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc \
+# RUN: -DLLVM_ENABLE_LIBCXX=%enable_libcxx
-DMLIR_DIR=%host_cmake_install_prefix \
+# RUN: -DLLVM_USE_LINKER=%llvm_use_linker \
+# RUN: -DPython3_EXECUTABLE=%python \
+# RUN: -DPython_EXECUTABLE=%python
+# RUN: "%cmake_exe" --build . --target check-standalone | tee %t
+# RUN: FileCheck --input-file=%t %s
+
+# Note: The number of checked tests is not important. The command will fail
+# if any fail.
+# CHECK: Passed
+# CHECK-NOT: Failed
+# UNSUPPORTED: target={{.*(windows|android).*}}
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index f99c24d6e299a..08c7947c1e9a6 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -383,3 +383,6 @@ def have_host_jit_feature_support(feature_name):
if sys.version_info >= (3, 11):
config.available_features.add("python-ge-311")
+
+if "GITHUB_ACTIONS" in os.environ:
+config.available_features.add("github-actions")
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 8a742a227847b..7e22ebf23c773 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -18,6 +18,7 @@ config.host_cxx = "@HOST_CXX@"
config.enable_libcxx = "@LLVM_ENABLE_LIBCXX@"
config.host_cmake = "@CMAKE_COMMAND@"
config.host_cmake_generator = "@CMAKE_GENERATOR@"
+config.host_cmake_install_prefix = "@CMAKE_INSTALL_PREFIX@"
config.llvm_use_linker = "@LLVM_USE_LINKER@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.host_arch = "@HOST_ARCH@"
>From fc56d49c9481aed16b79f391368f4c3824a90695 Mon Sep 17 00:00:00 2001
From: Maksim Levental
Date: Wed, 10 Sep 2025 13:23:07 -0700
Subject: [PATCH 02/13] Update lit.site.cfg.py.in
---
.../standalone/{test.toy.install-dir => test.install-dir.toy}| 0
mlir/test/lit.site.cfg.py.in | 1 +
2 files changed, 1 insertion(+)
rename mlir/test/Examples/standalone/{test.toy.install-dir =>
test.install-dir.toy} (100%)
diff --git a/mlir/test/Examples/standalone/test.toy.install-dir
b/mlir/test/Examples/standalone/test.install-dir.toy
similarity index 100%
rename from mlir/test/Examples/standalone/test.toy.install-dir
rename to mlir/test/Examples/standalone/test.install-dir.toy
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 7e22ebf23c773..eadfd047d15f7 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -3,6 +3,7 @@
import sys
config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
>From 67ce5b688657d38b6792b05712d1de6f56e726fe Mon Sep 17 00:00:00 2001
From: makslevental
Date: Wed, 10 Sep 2025 15:54:54 -0700
Subject: [PATCH 03/13] add test.install-distribution-dir.toy
---
mlir/test/Examples/standalone/lit.local.cfg | 1 +
.../Examples/standalone/test.install-dir.toy| 4 ++--
.../test.install-distribution-dir.toy | 17 +
3 files changed, 20 insertions(+), 2 deletions(-)
create mode 100644
mlir/test/Examples/stand
[llvm-branch-commits] [llvm] AMDGPU: Remove unnecessary operand legalization for WMMAs (PR #159370)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/159370?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#159370** https://app.graphite.dev/github/pr/llvm/llvm-project/159370?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/159370?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#159369** https://app.graphite.dev/github/pr/llvm/llvm-project/159369?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/159370 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
https://github.com/mtrofin edited https://github.com/llvm/llvm-project/pull/158376 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -261,55 +262,106 @@ void FlowAwareEmbedder::computeEmbeddings(const
BasicBlock &BB) const {
BBVecMap[&BB] = BBVector;
}
+//
==--===//
+// VocabStorage
+//===--===//
+
+VocabStorage::VocabStorage(std::vector> &&SectionData)
+: Sections(std::move(SectionData)), TotalSize([&] {
+assert(!Sections.empty() && "Vocabulary has no sections");
+assert(!Sections[0].empty() && "First section of vocabulary is empty");
+// Compute total size across all sections
+size_t Size = 0;
+for (const auto &Section : Sections)
+ Size += Section.size();
+return Size;
+ }()),
+ Dimension([&] {
+// Get dimension from the first embedding in the first section - all
+// embeddings must have the same dimension
+assert(!Sections.empty() && "Vocabulary has no sections");
+assert(!Sections[0].empty() && "First section of vocabulary is empty");
+return static_cast(Sections[0][0].size());
+ }()) {}
+
+const Embedding &VocabStorage::const_iterator::operator*() const {
+ assert(SectionId < Storage->Sections.size() && "Invalid section ID");
+ assert(LocalIndex < Storage->Sections[SectionId].size() &&
+ "Local index out of range");
+ return Storage->Sections[SectionId][LocalIndex];
+}
+
+VocabStorage::const_iterator &VocabStorage::const_iterator::operator++() {
+ ++LocalIndex;
+ // Check if we need to move to the next section
+ while (SectionId < Storage->getNumSections() &&
+ LocalIndex >= Storage->Sections[SectionId].size()) {
+LocalIndex = 0;
mtrofin wrote:
This is tricky to read. LocalIndex is constantly reset in the while loop. Can
you rewrite it? Is it a while or an if, actually... I assume all Sections have
non-zero size. Or?
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -144,6 +145,73 @@ struct Embedding {
using InstEmbeddingsMap = DenseMap;
using BBEmbeddingsMap = DenseMap;
+/// Generic storage class for section-based vocabularies.
+/// VocabStorage provides a generic foundation for storing and accessing
+/// embeddings organized into sections.
+class VocabStorage {
+private:
+ /// Section-based storage
+ std::vector> Sections;
+
+ const size_t TotalSize = 0;
mtrofin wrote:
you don't need to init at decl if it's const, in fact, some compilers will
complain. It'll be a compiletime error not to initialize.
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -144,6 +145,73 @@ struct Embedding {
using InstEmbeddingsMap = DenseMap;
using BBEmbeddingsMap = DenseMap;
+/// Generic storage class for section-based vocabularies.
+/// VocabStorage provides a generic foundation for storing and accessing
+/// embeddings organized into sections.
+class VocabStorage {
+private:
+ /// Section-based storage
+ std::vector> Sections;
+
+ size_t TotalSize = 0;
+ unsigned Dimension = 0;
+
+public:
+ /// Default constructor creates empty storage (invalid state)
+ VocabStorage() : Sections(), TotalSize(0), Dimension(0) {}
+
+ /// Create a VocabStorage with pre-organized section data
+ VocabStorage(std::vector> &&SectionData);
+
+ VocabStorage(VocabStorage &&) = default;
+ VocabStorage &operator=(VocabStorage &&Other);
+
+ VocabStorage(const VocabStorage &) = delete;
+ VocabStorage &operator=(const VocabStorage &) = delete;
+
+ /// Get total number of entries across all sections
+ size_t size() const { return TotalSize; }
+
+ /// Get number of sections
+ unsigned getNumSections() const {
+return static_cast(Sections.size());
+ }
+
+ /// Section-based access: Storage[sectionId][localIndex]
+ const std::vector &operator[](unsigned SectionId) const {
+assert(SectionId < Sections.size() && "Invalid section ID");
+return Sections[SectionId];
+ }
+
+ /// Get vocabulary dimension
+ unsigned getDimension() const { return Dimension; }
+
+ /// Check if vocabulary is valid (has data)
+ bool isValid() const { return TotalSize > 0; }
+
+ /// Iterator support for section-based access
mtrofin wrote:
oh, it's an existing iteration pattern (i.e. no source change) - right?
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] 30f88b9 - Revert "[clang-tidy] support query based custom check (#131804)"
Author: Ingo Müller
Date: 2025-09-17T17:12:29+02:00
New Revision: 30f88b9a24f17f079b708431407dd346a7decf07
URL:
https://github.com/llvm/llvm-project/commit/30f88b9a24f17f079b708431407dd346a7decf07
DIFF:
https://github.com/llvm/llvm-project/commit/30f88b9a24f17f079b708431407dd346a7decf07.diff
LOG: Revert "[clang-tidy] support query based custom check (#131804)"
This reverts commit d05b7f1bb3e16ce37c1d17cfb170440e09244ce1.
Added:
Modified:
clang-tools-extra/CMakeLists.txt
clang-tools-extra/clang-tidy/CMakeLists.txt
clang-tools-extra/clang-tidy/ClangTidy.cpp
clang-tools-extra/clang-tidy/ClangTidy.h
clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
clang-tools-extra/clang-tidy/ClangTidyForceLinker.h
clang-tools-extra/clang-tidy/ClangTidyModule.h
clang-tools-extra/clang-tidy/ClangTidyOptions.cpp
clang-tools-extra/clang-tidy/ClangTidyOptions.h
clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake
clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp
clang-tools-extra/docs/ReleaseNotes.rst
clang-tools-extra/docs/clang-tidy/Contributing.rst
clang-tools-extra/docs/clang-tidy/index.rst
clang-tools-extra/test/clang-tidy/check_clang_tidy.py
clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
clang-tools-extra/unittests/clang-tidy/ClangTidyTest.h
Removed:
clang-tools-extra/clang-tidy/custom/CMakeLists.txt
clang-tools-extra/clang-tidy/custom/CustomTidyModule.cpp
clang-tools-extra/clang-tidy/custom/QueryCheck.cpp
clang-tools-extra/clang-tidy/custom/QueryCheck.h
clang-tools-extra/docs/clang-tidy/QueryBasedCustomChecks.rst
clang-tools-extra/test/clang-tidy/checkers/custom/Inputs/clang-tidy.yml
clang-tools-extra/test/clang-tidy/checkers/custom/Inputs/incorrect-clang-tidy.yml
clang-tools-extra/test/clang-tidy/checkers/custom/query-incorrect-query.cpp
clang-tools-extra/test/clang-tidy/checkers/custom/query-partially-active-check.cpp
clang-tools-extra/test/clang-tidy/checkers/custom/query.cpp
clang-tools-extra/test/clang-tidy/infrastructure/Inputs/custom-query-check/append-clang-tidy.yml
clang-tools-extra/test/clang-tidy/infrastructure/Inputs/custom-query-check/empty-clang-tidy.yml
clang-tools-extra/test/clang-tidy/infrastructure/Inputs/custom-query-check/override-clang-tidy.yml
clang-tools-extra/test/clang-tidy/infrastructure/Inputs/custom-query-check/root-clang-tidy.yml
clang-tools-extra/test/clang-tidy/infrastructure/Inputs/custom-query-check/vfsoverlay.yaml
clang-tools-extra/test/clang-tidy/infrastructure/custom-query-check-not-enable.cpp
clang-tools-extra/test/clang-tidy/infrastructure/custom-query-check.cpp
diff --git a/clang-tools-extra/CMakeLists.txt
b/clang-tools-extra/CMakeLists.txt
index 87050db4e0e75..6b6f2b1ca2276 100644
--- a/clang-tools-extra/CMakeLists.txt
+++ b/clang-tools-extra/CMakeLists.txt
@@ -5,8 +5,6 @@ include(GNUInstallDirs)
option(CLANG_TIDY_ENABLE_STATIC_ANALYZER
"Include static analyzer checks in clang-tidy" ON)
-option(CLANG_TIDY_ENABLE_QUERY_BASED_CUSTOM_CHECKS
- "Enable query-based custom checks in clang-tidy" ON)
if(CLANG_INCLUDE_TESTS)
umbrella_lit_testsuite_begin(check-clang-tools)
diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt
b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 153356245cfd1..93117cf1d6373 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -58,7 +58,6 @@ add_subdirectory(bugprone)
add_subdirectory(cert)
add_subdirectory(concurrency)
add_subdirectory(cppcoreguidelines)
-add_subdirectory(custom)
add_subdirectory(darwin)
add_subdirectory(fuchsia)
add_subdirectory(google)
@@ -102,10 +101,6 @@ set(ALL_CLANG_TIDY_CHECKS
clangTidyReadabilityModule
clangTidyZirconModule
)
-
-if(CLANG_TIDY_ENABLE_QUERY_BASED_CUSTOM_CHECKS)
- list(APPEND ALL_CLANG_TIDY_CHECKS clangTidyCustomModule)
-endif()
if(CLANG_TIDY_ENABLE_STATIC_ANALYZER)
list(APPEND ALL_CLANG_TIDY_CHECKS clangTidyMPIModule)
endif()
diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp
b/clang-tools-extra/clang-tidy/ClangTidy.cpp
index db3b9eac53b8f..4c36bbccf44d9 100644
--- a/clang-tools-extra/clang-tidy/ClangTidy.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp
@@ -53,11 +53,6 @@
LLVM_INSTANTIATE_REGISTRY(clang::tidy::ClangTidyModuleRegistry)
namespace clang::tidy {
-namespace custom {
-extern void registerCustomChecks(const ClangTidyOptions &O,
- ClangTidyCheckFactories &Factories);
-} // namespace custom
-
namespace {
#if CLANG_TIDY_ENABLE_STATIC_ANALYZER
#define ANALYZER_CHECK_NAME_PREFIX "clang-analyzer-"
@@ -347,10 +342,6 @@ ClangTidyASTConsumerFactory::ClangTidyASTConsumerFactory(
IntrusiveRefCntPtr OverlayFS)
: Context(Context), OverlayFS(std:
[llvm-branch-commits] [llvm] release/21.x: [Mips] Fix atomic min/max generate mips4 instructions when compiling for mips2 (#149983) (PR #159391)
github-actions[bot] wrote: ⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo. Please turn off [Keep my email addresses private](https://github.com/settings/emails) setting in your account. See [LLVM Developer Policy](https://llvm.org/docs/DeveloperPolicy.html#email-addresses) and [LLVM Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it) for more information. https://github.com/llvm/llvm-project/pull/159391 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
@@ -261,55 +262,106 @@ void FlowAwareEmbedder::computeEmbeddings(const
BasicBlock &BB) const {
BBVecMap[&BB] = BBVector;
}
+//
==--===//
+// VocabStorage
+//===--===//
+
+VocabStorage::VocabStorage(std::vector> &&SectionData)
+: Sections(std::move(SectionData)), TotalSize([&] {
+assert(!Sections.empty() && "Vocabulary has no sections");
+assert(!Sections[0].empty() && "First section of vocabulary is empty");
+// Compute total size across all sections
+size_t Size = 0;
+for (const auto &Section : Sections)
+ Size += Section.size();
+return Size;
+ }()),
+ Dimension([&] {
+// Get dimension from the first embedding in the first section - all
+// embeddings must have the same dimension
+assert(!Sections.empty() && "Vocabulary has no sections");
+assert(!Sections[0].empty() && "First section of vocabulary is empty");
+return static_cast(Sections[0][0].size());
+ }()) {}
+
+const Embedding &VocabStorage::const_iterator::operator*() const {
+ assert(SectionId < Storage->Sections.size() && "Invalid section ID");
+ assert(LocalIndex < Storage->Sections[SectionId].size() &&
+ "Local index out of range");
+ return Storage->Sections[SectionId][LocalIndex];
+}
+
+VocabStorage::const_iterator &VocabStorage::const_iterator::operator++() {
+ ++LocalIndex;
+ // Check if we need to move to the next section
+ while (SectionId < Storage->getNumSections() &&
+ LocalIndex >= Storage->Sections[SectionId].size()) {
+LocalIndex = 0;
+++SectionId;
+ }
+ return *this;
+}
+
+bool VocabStorage::const_iterator::operator==(
+const const_iterator &Other) const {
+ return Storage == Other.Storage && SectionId == Other.SectionId &&
+ LocalIndex == Other.LocalIndex;
+}
+
+bool VocabStorage::const_iterator::operator!=(
+const const_iterator &Other) const {
+ return !(*this == Other);
+}
+
//
==--===//
// Vocabulary
//===--===//
unsigned Vocabulary::getDimension() const {
mtrofin wrote:
you can move trivial accessors to the header. Better performance in non-
[Thin]LTO cases.
https://github.com/llvm/llvm-project/pull/158376
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove unnecessary operand legalization for WMMAs (PR #159370)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
Changes
The operand constraints already express this constraint, and
InstrEmitter will respect them.
---
Full diff: https://github.com/llvm/llvm-project/pull/159370.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (-15)
``diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 37c75fe7f7dfd..a737ad98c1d80 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6547,21 +6547,6 @@ void
SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
!RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
legalizeOpWithMove(MI, VOP3Idx[2]);
- if (isWMMA(MI)) {
-// scale_src has a register class restricted to low 256 VGPRs, we may need
-// to insert a copy to the restricted VGPR class.
-int ScaleSrc0Idx =
-AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::scale_src0);
-if (ScaleSrc0Idx != -1) {
- int ScaleSrc1Idx =
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::scale_src1);
- if (!isOperandLegal(MI, ScaleSrc0Idx))
-legalizeOpWithMove(MI, ScaleSrc0Idx);
- if (!isOperandLegal(MI, ScaleSrc1Idx))
-legalizeOpWithMove(MI, ScaleSrc1Idx);
-}
- }
-
// Fix the register class of packed FP32 instructions on gfx12+. See
// SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information.
if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) {
``
https://github.com/llvm/llvm-project/pull/159370
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove TRI argument from getRegClass (PR #158225)
llvmbot wrote:
@llvm/pr-subscribers-backend-risc-v
Author: Matt Arsenault (arsenm)
Changes
TargetInstrInfo now directly holds a reference to TargetRegisterInfo
and does not need TRI passed in anywhere.
---
Full diff: https://github.com/llvm/llvm-project/pull/158225.diff
45 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetInstrInfo.h (+2-3)
- (modified) llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp (+2-2)
- (modified) llvm/lib/CodeGen/BreakFalseDeps.cpp (+1-1)
- (modified) llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp (+2-2)
- (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+1-1)
- (modified) llvm/lib/CodeGen/InitUndef.cpp (+1-1)
- (modified) llvm/lib/CodeGen/MachineInstr.cpp (+1-1)
- (modified) llvm/lib/CodeGen/MachineLICM.cpp (+1-1)
- (modified) llvm/lib/CodeGen/MachineVerifier.cpp (+4-7)
- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+1-1)
- (modified) llvm/lib/CodeGen/SelectionDAG/FastISel.cpp (+1-1)
- (modified) llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (+4-5)
- (modified) llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (+1-1)
- (modified) llvm/lib/CodeGen/TargetInstrInfo.cpp (+2-3)
- (modified) llvm/lib/CodeGen/TwoAddressInstructionPass.cpp (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp (+4-6)
- (modified) llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
(+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+1-4)
- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+6-6)
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+2-2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+5-6)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2-3)
- (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+2-4)
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp (+2-3)
- (modified) llvm/lib/Target/ARM/ARMFrameLowering.cpp (+1-2)
- (modified) llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (+4-4)
- (modified) llvm/lib/Target/ARM/MLxExpansionPass.cpp (+1-1)
- (modified) llvm/lib/Target/ARM/Thumb2InstrInfo.cpp (+1-1)
- (modified) llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp (+1-1)
- (modified) llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp (+2-2)
- (modified) llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp (+2-2)
- (modified) llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp (+2-2)
- (modified) llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp
(+1-2)
- (modified) llvm/lib/Target/Mips/MipsSEInstrInfo.cpp (+2-2)
- (modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp (+2-2)
- (modified) llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp (+1-2)
- (modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+2-2)
- (modified) llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp (+1-2)
- (modified) llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp (+3-3)
- (modified) llvm/lib/Target/X86/X86DomainReassignment.cpp (+2-2)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+12-14)
- (modified) llvm/lib/Target/X86/X86InstrInfo.h (+2-3)
- (modified) llvm/lib/Target/X86/X86OptimizeLEAs.cpp (+1-1)
- (modified) llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp (+1-1)
``diff
Rate limit · GitHub
body {
background-color: #f6f8fa;
color: rgba(0, 0, 0, 0.5);
font-family: -apple-system,BlinkMacSystemFont,Segoe
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
}
.c { margin: 50px auto; max-width: 600px; text-align: center; padding: 0
24px; }
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { color: #24292e; line-height: 60px; font-size: 48px; font-weight:
300; margin: 0px; }
p { margin: 20px 0 40px; }
#s { margin-top: 35px; }
#s a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
}
Access has been restricted
You have triggered a rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
https://support.github.com";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
``
https://github.com/llvm/llvm-project/pull/158225
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Add `AAAMDGPUClusterDims` (PR #158076)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
Changes
---
Patch is 160.84 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/158076.diff
27 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributes.def (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+163-1)
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
(+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+13-13)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9)
- (modified)
llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+13-13)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
(+1-1)
- (modified)
llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+21-21)
- (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
(+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+2-2)
- (modified)
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2)
- (modified)
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
(+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+1-1)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index 8c1c8219690ba..4c9715e4a1737 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -31,5 +31,8 @@ AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
+AMDGPU_ATTRIBUTE(CLUSTER_ID_X, "amdgpu-no-cluster-id-x")
+AMDGPU_ATTRIBUTE(CLUSTER_ID_Y, "amdgpu-no-cluster-id-y")
+AMDGPU_ATTRIBUTE(CLUSTER_ID_Z, "amdgpu-no-cluster-id-z")
#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f646457f9d76f..49f87513777f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -77,6 +77,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly,
bool &NeedsImplicit,
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return WORKGROUP_ID_Z;
+ case Intrinsic::amdgcn_cluster_id_x:
+NonKernelOnly = true;
+return CLUSTER_ID_X;
+ case Intrinsic::amdgcn_cluster_id_y:
+return CLUSTER_ID_Y;
+ case Intrinsic::amdgcn_cluster_id_z:
+return CLUSTER_ID_Z;
case Intrinsic::amdgcn_lds_kernel_id:
return LDS_KERNEL_ID;
case Intrinsic::amdgcn_dispatch_ptr:
@@ -1296,6 +1303,157 @@ struct AAAMDGPUNoAGPR
const char AAAMDGPUNoAGPR::ID = 0;
+/// An abstract attribute to propagate the function attribute
+/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
+struct AAAMDGPUClusterDims
+: public StateWrapper {
+ using Base = StateWrapper;
+ AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
+Attributor &A);
+
+ /// See AbstractAttribute::getName().
+ StringRef getName() const override { return "AAAMDGPUClusterDims"; }
+
+ /// See AbstractAttribute::getIdAddr().
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDGPUClusterDims.
+ static bool classof(const AbstractAttribute *AA) {
+return (AA->getIdAddr() == &ID);
+ }
+
+ virtual const AMDGPU::ClusterDimsAttr &getClusterDims()
[llvm-branch-commits] [clang] [PAC][clang] Handle pauthtest environment and ABI in Linux-specific code (PR #113151)
https://github.com/kovdan01 updated
https://github.com/llvm/llvm-project/pull/113151
>From 7af8e3d4514626ca2dacde8cbaa33d568b1f4aed Mon Sep 17 00:00:00 2001
From: Daniil Kovalev
Date: Mon, 21 Oct 2024 12:00:19 +0300
Subject: [PATCH 1/2] [PAC][clang] Handle pauthtest environment and ABI in
Linux-specific code
Since pauthtest is a Linux-specific ABI, it should not be handled in
common driver code.
---
clang/lib/Basic/Targets/AArch64.cpp | 9 +-
clang/lib/Basic/Targets/AArch64.h| 11 +++
clang/lib/Basic/Targets/OSTargets.cpp| 1 +
clang/lib/Basic/Targets/OSTargets.h | 6 ++
clang/lib/CodeGen/CodeGenModule.cpp | 2 -
clang/lib/CodeGen/TargetInfo.h | 1 -
clang/lib/Driver/ToolChain.cpp | 1 -
clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 21 -
clang/lib/Driver/ToolChains/Arch/AArch64.h | 2 -
clang/lib/Driver/ToolChains/Clang.cpp| 62 ++---
clang/lib/Driver/ToolChains/Linux.cpp| 96
clang/lib/Driver/ToolChains/Linux.h | 7 ++
clang/test/Driver/aarch64-ptrauth.c | 34 +--
13 files changed, 153 insertions(+), 100 deletions(-)
diff --git a/clang/lib/Basic/Targets/AArch64.cpp
b/clang/lib/Basic/Targets/AArch64.cpp
index 9e03a0846ffba..fef26d3b8f328 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -206,8 +206,7 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple
&Triple,
StringRef AArch64TargetInfo::getABI() const { return ABI; }
bool AArch64TargetInfo::setABI(const std::string &Name) {
- if (Name != "aapcs" && Name != "aapcs-soft" && Name != "darwinpcs" &&
- Name != "pauthtest")
+ if (Name != "aapcs" && Name != "aapcs-soft" && Name != "darwinpcs")
return false;
ABI = Name;
@@ -221,12 +220,6 @@ bool AArch64TargetInfo::validateTarget(DiagnosticsEngine
&Diags) const {
Diags.Report(diag::err_target_unsupported_abi_with_fpu) << ABI;
return false;
}
- if (getTriple().getEnvironment() == llvm::Triple::PAuthTest &&
- getTriple().getOS() != llvm::Triple::Linux) {
-Diags.Report(diag::err_target_unsupported_abi_for_triple)
-<< getTriple().getEnvironmentName() << getTriple().getTriple();
-return false;
- }
return true;
}
diff --git a/clang/lib/Basic/Targets/AArch64.h
b/clang/lib/Basic/Targets/AArch64.h
index dfd89b2f3..dda8b745ff175 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -135,6 +135,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public
TargetInfo {
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
+protected:
std::string ABI;
public:
@@ -277,6 +278,16 @@ class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public
AArch64TargetInfo {
void setDataLayout() override;
};
+template <>
+inline bool
+LinuxTargetInfo::setABI(const std::string &Name) {
+ if (Name == "pauthtest") {
+ABI = Name;
+return true;
+ }
+ return AArch64leTargetInfo::setABI(Name);
+}
+
class LLVM_LIBRARY_VISIBILITY WindowsARM64TargetInfo
: public WindowsTargetInfo {
const llvm::Triple Triple;
diff --git a/clang/lib/Basic/Targets/OSTargets.cpp
b/clang/lib/Basic/Targets/OSTargets.cpp
index e744e84a5b079..e99bbd159929c 100644
--- a/clang/lib/Basic/Targets/OSTargets.cpp
+++ b/clang/lib/Basic/Targets/OSTargets.cpp
@@ -10,6 +10,7 @@
//===--===//
#include "OSTargets.h"
+#include "AArch64.h"
#include "clang/Basic/MacroBuilder.h"
#include "llvm/ADT/StringRef.h"
diff --git a/clang/lib/Basic/Targets/OSTargets.h
b/clang/lib/Basic/Targets/OSTargets.h
index a733f6e97b3a4..82c87847a31d8 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -396,6 +396,12 @@ class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public
OSTargetInfo {
const char *getStaticInitSectionSpecifier() const override {
return ".text.startup";
}
+
+ // This allows template specializations, see
+ // LinuxTargetInfo::setABI
+ bool setABI(const std::string &Name) override {
+return OSTargetInfo::setABI(Name);
+ }
};
// Managarm Target
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp
b/clang/lib/CodeGen/CodeGenModule.cpp
index c0cfc24f02877..779e16fc18e59 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -145,8 +145,6 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
else if (Target.getABI() == "aapcs-soft")
Kind = AArch64ABIKind::AAPCSSoft;
-else if (Target.getABI() == "pauthtest")
- Kind = AArch64ABIKind::PAuthTest;
return createAArch64TargetCodeGenInfo(CGM, Kind);
}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index d0edae1295094..f63e900669d97 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/
[llvm-branch-commits] [NFC][CFI][CodeGen] Move GeneralizeFunctionType out of CreateMetadataIdentifierGeneralized (PR #158190)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/158190 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] PPC: Replace PointerLikeRegClass with RegClassByHwMode (PR #158777)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158777
>From c386096eeb2b87ccd1e5187666cfe11a2be61d6a Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 5 Sep 2025 18:03:59 +0900
Subject: [PATCH 1/3] PPC: Replace PointerLikeRegClass with RegClassByHwMode
---
.../PowerPC/Disassembler/PPCDisassembler.cpp | 3 --
llvm/lib/Target/PowerPC/PPC.td| 6
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 28 ++-
llvm/lib/Target/PowerPC/PPCRegisterInfo.td| 10 +--
4 files changed, 23 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 47586c417cfe3..70e619cc22b19 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -185,9 +185,6 @@ DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
}
-#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
-#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
-
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 386d0f65d1ed1..d491e88b66ad8 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -394,6 +394,12 @@ def NotAIX : Predicate<"!Subtarget->isAIXABI()">;
def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
def IsNotISAFuture : Predicate<"!Subtarget->isISAFuture()">;
+//===--===//
+// HwModes
+//===--===//
+
+defvar PPC32 = DefaultMode;
+def PPC64 : HwMode<[In64BitMode]>;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index db066bc4b7bdd..55e38bcf4afc9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2142,33 +2142,23 @@ bool PPCInstrInfo::onlyFoldImmediate(MachineInstr
&UseMI, MachineInstr &DefMI,
assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
assert(UseIdx < UseMCID.getNumOperands() && "No operand description for
Reg");
- const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
-
// We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
// register (which might also be specified as a pointer class kind).
- if (UseInfo->isLookupPtrRegClass()) {
-if (UseInfo->RegClass /* Kind */ != 1)
- return false;
- } else {
-if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
-UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
- return false;
- }
+
+ const MCOperandInfo &UseInfo = UseMCID.operands()[UseIdx];
+ int16_t RegClass = getOpRegClassID(UseInfo);
+ if (UseInfo.RegClass != PPC::GPRC_NOR0RegClassID &&
+ UseInfo.RegClass != PPC::G8RC_NOX0RegClassID)
+return false;
// Make sure this is not tied to an output register (or otherwise
// constrained). This is true for ST?UX registers, for example, which
// are tied to their output registers.
- if (UseInfo->Constraints != 0)
+ if (UseInfo.Constraints != 0)
return false;
- MCRegister ZeroReg;
- if (UseInfo->isLookupPtrRegClass()) {
-bool isPPC64 = Subtarget.isPPC64();
-ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
- } else {
-ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
- PPC::ZERO8 : PPC::ZERO;
- }
+ MCRegister ZeroReg =
+ RegClass == PPC::G8RC_NOX0RegClassID ? PPC::ZERO8 : PPC::ZERO;
LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
LLVM_DEBUG(UseMI.dump());
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8b690b7b833b3..adda91786d19c 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -868,7 +868,11 @@ def crbitm: Operand {
def PPCRegGxRCNoR0Operand : AsmOperandClass {
let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
}
-def ptr_rc_nor0 : Operand, PointerLikeRegClass<1> {
+
+def ptr_rc_nor0 : Operand,
+ RegClassByHwMode<
+[PPC32, PPC64],
+[GPRC_NOR0, G8RC_NOX0]> {
let ParserMatchClass = PPCRegGxRCNoR0Operand;
}
@@ -902,7 +906,9 @@ def memri34_pcrel : Operand { // memri, imm is a
34-bit value.
def PPCRegGxRCOperand : AsmOperandClass {
let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
}
-def ptr_rc_idx : Operand, PointerLikeRegClass<0> {
+def ptr_rc_idx : Operand,
[llvm-branch-commits] [llvm] SPARC: Use RegClassByHwMode instead of PointerLikeRegClass (PR #158271)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158271
>From 0836893a46f7b835ba658271bed698b8e979a6d1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 8 Sep 2025 14:04:59 +0900
Subject: [PATCH] SPARC: Use RegClassByHwMode instead of PointerLikeRegClass
---
.../Sparc/Disassembler/SparcDisassembler.cpp | 8 ---
llvm/lib/Target/Sparc/SparcInstrInfo.td | 21 +--
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index c3d60f3689e1f..e585e5af42d32 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -159,14 +159,6 @@ static DecodeStatus DecodeI64RegsRegisterClass(MCInst
&Inst, unsigned RegNo,
return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
}
-// This is used for the type "ptr_rc", which is either IntRegs or I64Regs
-// depending on SparcRegisterInfo::getPointerRegClass.
-static DecodeStatus DecodePointerLikeRegClass0(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
-}
-
static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td
b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 53972d6c105a4..97e7fd7769edb 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -95,10 +95,27 @@ def HasFSMULD : Predicate<"!Subtarget->hasNoFSMULD()">;
// will pick deprecated instructions.
def UseDeprecatedInsts : Predicate<"Subtarget->useV8DeprecatedInsts()">;
+//===--===//
+// HwModes Pattern Stuff
+//===--===//
+
+defvar SPARC32 = DefaultMode;
+def SPARC64 : HwMode<[Is64Bit]>;
+
//===--===//
// Instruction Pattern Stuff
//===--===//
+def sparc_ptr_rc : RegClassByHwMode<
+ [SPARC32, SPARC64],
+ [IntRegs, I64Regs]>;
+
+// Both cases can use the same decoder method, so avoid the dispatch
+// by hwmode by setting an explicit DecoderMethod
+def ptr_op : RegisterOperand {
+ let DecoderMethod = "DecodeIntRegsRegisterClass";
+}
+
// FIXME these should have AsmOperandClass.
def uimm3 : PatLeaf<(imm), [{ return isUInt<3>(N->getZExtValue()); }]>;
@@ -178,12 +195,12 @@ def simm13Op : Operand {
def MEMrr : Operand {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
+ let MIOperandInfo = (ops ptr_op, ptr_op);
let ParserMatchClass = SparcMEMrrAsmOperand;
}
def MEMri : Operand {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops ptr_rc, simm13Op);
+ let MIOperandInfo = (ops ptr_op, simm13Op);
let ParserMatchClass = SparcMEMriAsmOperand;
}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Improve StructurizeCFG pass performance by using SSAUpdaterBulk. (PR #150937)
https://github.com/vpykhtin updated
https://github.com/llvm/llvm-project/pull/150937
>From 654a23baa8c2db69a0dc212ec43c2dd6cdb3c0a5 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin
Date: Thu, 10 Apr 2025 11:58:13 +
Subject: [PATCH] amdgpu_use_ssaupdaterbulk_in_structurizecfg
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 25 +++
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2ee91a9b40026..0f3978f56045e 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -47,6 +47,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include
#include
@@ -321,7 +322,7 @@ class StructurizeCFG {
void collectInfos();
- void insertConditions(bool Loops);
+ void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
void simplifyConditions();
@@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() {
}
/// Insert the missing branch conditions
-void StructurizeCFG::insertConditions(bool Loops) {
+void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter)
{
BranchVector &Conds = Loops ? LoopConds : Conditions;
Value *Default = Loops ? BoolTrue : BoolFalse;
- SSAUpdater PhiInserter;
for (BranchInst *Term : Conds) {
assert(Term->isConditional());
@@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
BasicBlock *SuccTrue = Term->getSuccessor(0);
BasicBlock *SuccFalse = Term->getSuccessor(1);
-PhiInserter.Initialize(Boolean, "");
-PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+unsigned Variable = PhiInserter.AddVariable("", Boolean);
+PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
+ Default);
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
@@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
ParentInfo = PI;
break;
}
- PhiInserter.AddAvailableValue(BB, PI.Pred);
+ PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
Dominator.addAndRememberBlock(BB);
}
@@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
} else {
if (!Dominator.resultIsRememberedBlock())
-PhiInserter.AddAvailableValue(Dominator.result(), Default);
+PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
- Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
}
}
}
@@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
orderNodes();
collectInfos();
createFlow();
- insertConditions(false);
- insertConditions(true);
+
+ SSAUpdaterBulk PhiInserter;
+ insertConditions(false, PhiInserter);
+ insertConditions(true, PhiInserter);
+ PhiInserter.RewriteAndOptimizeAllUses(*DT);
+
setPhiValues();
simplifyHoistedPhis();
simplifyConditions();
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Improve StructurizeCFG pass performance by using SSAUpdaterBulk. (PR #150937)
https://github.com/vpykhtin updated
https://github.com/llvm/llvm-project/pull/150937
>From 654a23baa8c2db69a0dc212ec43c2dd6cdb3c0a5 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin
Date: Thu, 10 Apr 2025 11:58:13 +
Subject: [PATCH] amdgpu_use_ssaupdaterbulk_in_structurizecfg
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 25 +++
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2ee91a9b40026..0f3978f56045e 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -47,6 +47,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include
#include
@@ -321,7 +322,7 @@ class StructurizeCFG {
void collectInfos();
- void insertConditions(bool Loops);
+ void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
void simplifyConditions();
@@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() {
}
/// Insert the missing branch conditions
-void StructurizeCFG::insertConditions(bool Loops) {
+void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter)
{
BranchVector &Conds = Loops ? LoopConds : Conditions;
Value *Default = Loops ? BoolTrue : BoolFalse;
- SSAUpdater PhiInserter;
for (BranchInst *Term : Conds) {
assert(Term->isConditional());
@@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
BasicBlock *SuccTrue = Term->getSuccessor(0);
BasicBlock *SuccFalse = Term->getSuccessor(1);
-PhiInserter.Initialize(Boolean, "");
-PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+unsigned Variable = PhiInserter.AddVariable("", Boolean);
+PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
+ Default);
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
@@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
ParentInfo = PI;
break;
}
- PhiInserter.AddAvailableValue(BB, PI.Pred);
+ PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
Dominator.addAndRememberBlock(BB);
}
@@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
} else {
if (!Dominator.resultIsRememberedBlock())
-PhiInserter.AddAvailableValue(Dominator.result(), Default);
+PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
- Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
}
}
}
@@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
orderNodes();
collectInfos();
createFlow();
- insertConditions(false);
- insertConditions(true);
+
+ SSAUpdaterBulk PhiInserter;
+ insertConditions(false, PhiInserter);
+ insertConditions(true, PhiInserter);
+ PhiInserter.RewriteAndOptimizeAllUses(*DT);
+
setPhiValues();
simplifyHoistedPhis();
simplifyConditions();
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Mips: Switch to RegClassByHwMode (PR #158273)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158273
>From 6d6e9fde6d4db07dce3bee1085d230ba9bc7b136 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Sat, 6 Sep 2025 21:14:45 +0900
Subject: [PATCH] Mips: Switch to RegClassByHwMode
---
.../Target/Mips/AsmParser/MipsAsmParser.cpp | 9 +--
.../Mips/Disassembler/MipsDisassembler.cpp| 24 +++
llvm/lib/Target/Mips/MicroMipsInstrInfo.td| 12 +++---
llvm/lib/Target/Mips/Mips.td | 15
llvm/lib/Target/Mips/MipsInstrInfo.td | 20 +++-
llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 16 ++---
llvm/lib/Target/Mips/MipsRegisterInfo.td | 16 +
7 files changed, 76 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 8a5cb517c94c5..ba70c9e6cb9e8 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -3706,7 +3706,9 @@ void MipsAsmParser::expandMem16Inst(MCInst &Inst, SMLoc
IDLoc, MCStreamer &Out,
MCRegister TmpReg = DstReg;
const MCInstrDesc &Desc = MII.get(OpCode);
- int16_t DstRegClass = Desc.operands()[StartOp].RegClass;
+ int16_t DstRegClass =
+ MII.getOpRegClassID(Desc.operands()[StartOp],
+ STI->getHwMode(MCSubtargetInfo::HwMode_RegInfo));
unsigned DstRegClassID =
getContext().getRegisterInfo()->getRegClass(DstRegClass).getID();
bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) ||
@@ -3834,7 +3836,10 @@ void MipsAsmParser::expandMem9Inst(MCInst &Inst, SMLoc
IDLoc, MCStreamer &Out,
MCRegister TmpReg = DstReg;
const MCInstrDesc &Desc = MII.get(OpCode);
- int16_t DstRegClass = Desc.operands()[StartOp].RegClass;
+ int16_t DstRegClass =
+ MII.getOpRegClassID(Desc.operands()[StartOp],
+ STI->getHwMode(MCSubtargetInfo::HwMode_RegInfo));
+
unsigned DstRegClassID =
getContext().getRegisterInfo()->getRegClass(DstRegClass).getID();
bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) ||
diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index c22b8f61b12dc..705695c74803f 100644
--- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -916,6 +916,30 @@ DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned
RegNo, uint64_t Address,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGP32RegisterClass(MCInst &Inst, unsigned RegNo,
+uint64_t Address,
+const MCDisassembler *Decoder) {
+ llvm_unreachable("this is unused");
+}
+
+static DecodeStatus DecodeGP64RegisterClass(MCInst &Inst, unsigned RegNo,
+uint64_t Address,
+const MCDisassembler *Decoder) {
+ llvm_unreachable("this is unused");
+}
+
+static DecodeStatus DecodeSP32RegisterClass(MCInst &Inst, unsigned RegNo,
+uint64_t Address,
+const MCDisassembler *Decoder) {
+ llvm_unreachable("this is unused");
+}
+
+static DecodeStatus DecodeSP64RegisterClass(MCInst &Inst, unsigned RegNo,
+uint64_t Address,
+const MCDisassembler *Decoder) {
+ llvm_unreachable("this is unused");
+}
+
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index b3fd8f422f429..b44bf1391b73e 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -57,12 +57,6 @@ def MicroMipsMemGPRMM16AsmOperand : AsmOperandClass {
let PredicateMethod = "isMemWithGRPMM16Base";
}
-// Define the classes of pointers used by microMIPS.
-// The numbers must match those in MipsRegisterInfo::MipsPtrClass.
-def ptr_gpr16mm_rc : PointerLikeRegClass<1>;
-def ptr_sp_rc : PointerLikeRegClass<2>;
-def ptr_gp_rc : PointerLikeRegClass<3>;
-
class mem_mm_4_generic : Operand {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops ptr_gpr16mm_rc, simm4);
@@ -114,7 +108,7 @@ def mem_mm_gp_simm7_lsl2 : Operand {
def mem_mm_9 : Operand {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops ptr_rc, simm9);
+ let MIOperandInfo = (ops mips_ptr_rc, simm9);
let EncoderMethod = "getMemEncodingMMImm9";
let ParserMatchClass = MipsMemSimmAsmOperand<9>;
let OperandType = "OPERAND_MEMORY";
@@ -130,7 +124,7 @@ def mem_mm_11
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies when no runtime (PR #157754)
https://github.com/jdenny-ornl updated
https://github.com/llvm/llvm-project/pull/157754
>From 75a8df62df2ef7e8c02d7a76120e57e2dd1a1539 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny"
Date: Tue, 9 Sep 2025 17:33:38 -0400
Subject: [PATCH 1/3] [LoopUnroll] Fix block frequencies when no runtime
This patch implements the LoopUnroll changes discussed in [[RFC] Fix
Loop Transformations to Preserve Block
Frequencies](https://discourse.llvm.org/t/rfc-fix-loop-transformations-to-preserve-block-frequencies/85785)
and is thus another step in addressing issue #135812.
In summary, for the case of partial loop unrolling without a runtime,
this patch changes LoopUnroll to:
- Maintain branch weights consistently with the original loop for the
sake of preserving the total frequency of the original loop body.
- Store the new estimated trip count in the
`llvm.loop.estimated_trip_count` metadata, introduced by PR #148758.
- Correct the new estimated trip count (e.g., 3 instead of 2) when the
original estimated trip count (e.g., 10) divided by the unroll count
(e.g., 4) leaves a remainder (e.g., 2).
There are loop unrolling cases this patch does not fully fix, such as
partial unrolling with a runtime and complete unrolling, and there are
two associated tests this patch marks as XFAIL. They will be
addressed in future patches that should land with this patch.
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 36 --
.../peel.ll} | 0
.../branch-weights-freq/unroll-partial.ll | 68 +++
.../LoopUnroll/runtime-loop-branchweight.ll | 1 +
.../LoopUnroll/unroll-heuristics-pgo.ll | 1 +
5 files changed, 100 insertions(+), 6 deletions(-)
rename llvm/test/Transforms/LoopUnroll/{peel-branch-weights-freq.ll =>
branch-weights-freq/peel.ll} (100%)
create mode 100644
llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 8a6c7789d1372..93c43396c54b6 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -499,9 +499,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo
*LI,
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
- unsigned EstimatedLoopInvocationWeight = 0;
std::optional OriginalTripCount =
- llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
+ llvm::getLoopEstimatedTripCount(L);
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
// and will never be executed.
@@ -1130,10 +1129,35 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO,
LoopInfo *LI,
// We shouldn't try to use `L` anymore.
L = nullptr;
} else if (OriginalTripCount) {
-// Update the trip count. Note that the remainder has already logic
-// computing it in `UnrollRuntimeLoopRemainder`.
-setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
- EstimatedLoopInvocationWeight);
+// Update metadata for the estimated trip count.
+//
+// If ULO.Runtime, UnrollRuntimeLoopRemainder handles branch weights for
the
+// remainder loop it creates, and the unrolled loop's branch weights are
+// adjusted below. Otherwise, if unrolled loop iterations' latches become
+// unconditional, branch weights are adjusted above. Otherwise, the
+// original loop's branch weights are correct for the unrolled loop, so do
+// not adjust them.
+// FIXME: Actually handle such unconditional latches and ULO.Runtime.
+//
+// For example, consider what happens if the unroll count is 4 for a loop
+// with an estimated trip count of 10 when we do not create a remainder
loop
+// and all iterations' latches remain conditional. Each unrolled
+// iteration's latch still has the same probability of exiting the loop as
+// it did when in the original loop, and thus it should still have the same
+// branch weights. Each unrolled iteration's non-zero probability of
+// exiting already appropriately reduces the probability of reaching the
+// remaining iterations just as it did in the original loop. Trying to
also
+// adjust the branch weights of the final unrolled iteration's latch (i.e.,
+// the backedge for the unrolled loop as a whole) to reflect its new trip
+// count of 3 will erroneously further reduce its block frequencies.
+// However, in case an analysis later needs to estimate the trip count of
+// the unrolled loop as a whole without considering the branch weights for
+// each unrolled iteration's latch within it, we store the new trip count
as
+// separate metadata.
+unsigned NewTripCount = *OriginalTripCount / ULO.Count;
+if (!ULO.Runtime && *OriginalTripCount % ULO.Count)
+ NewTripCount += 1;
+setLoopEstima
[llvm-branch-commits] [llvm] [LoongArch] Support vector types for hasAndNot to enable more DAG combines (PR #159056)
https://github.com/zhaoqi5 created
https://github.com/llvm/llvm-project/pull/159056
After this commit, DAGCombiner will have more opportunities to optimize vector
types `and+...+not` to `andn`.
Many combines in DAGCombiner will be enabled, but only shows changes after
combining `and(add(not))` to `and(not(sub))` in the tests of this commit.
>From e6f1f61f150808760ea03e16969486b9666254a0 Mon Sep 17 00:00:00 2001
From: Qi Zhao
Date: Tue, 16 Sep 2025 19:51:34 +0800
Subject: [PATCH] [LoongArch] Support vector types for hasAndNot to enable more
DAG combines
After this commit, DAGCombiner will have more opportunities to
optimize vector types `and+...+not` to `andn`.
Many combines in DAGCombiner will be enabled, but only shows
changes after combining `and(add(not))` to `and(not(sub))` in
the tests of this commit.
---
.../LoongArch/LoongArchISelLowering.cpp | 8 +--
.../CodeGen/LoongArch/lasx/and-not-combine.ll | 23 +++
.../CodeGen/LoongArch/lsx/and-not-combine.ll | 23 +++
3 files changed, 22 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..7a4e5b5597f7c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -8249,8 +8249,12 @@ EVT LoongArchTargetLowering::getSetCCResultType(const
DataLayout &DL,
}
bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
- // TODO: Support vectors.
- return Y.getValueType().isScalarInteger() && !isa(Y);
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+return Subtarget.hasExtLSX() && VT.isInteger();
+
+ return VT.isScalarInteger() && !isa(Y);
}
bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
index 75ac4c99ef7c1..67549599db2f3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -8,9 +8,8 @@ define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1,
ptr %a2) nounwind
; CHECK-NEXT:xvld $xr0, $a2, 0
; CHECK-NEXT:xvld $xr1, $a3, 0
; CHECK-NEXT:xvld $xr2, $a1, 0
-; CHECK-NEXT:xvxori.b $xr0, $xr0, 255
-; CHECK-NEXT:xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT:xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT:xvsub.b $xr0, $xr0, $xr1
+; CHECK-NEXT:xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT:xvst $xr0, $a0, 0
; CHECK-NEXT:ret
entry:
@@ -30,10 +29,8 @@ define void @and_not_combine_v16i16(ptr %res, ptr %a0, ptr
%a1, ptr %a2) nounwin
; CHECK-NEXT:xvld $xr0, $a2, 0
; CHECK-NEXT:xvld $xr1, $a3, 0
; CHECK-NEXT:xvld $xr2, $a1, 0
-; CHECK-NEXT:xvrepli.b $xr3, -1
-; CHECK-NEXT:xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT:xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT:xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT:xvsub.h $xr0, $xr0, $xr1
+; CHECK-NEXT:xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT:xvst $xr0, $a0, 0
; CHECK-NEXT:ret
entry:
@@ -53,10 +50,8 @@ define void @and_not_combine_v8i32(ptr %res, ptr %a0, ptr
%a1, ptr %a2) nounwind
; CHECK-NEXT:xvld $xr0, $a2, 0
; CHECK-NEXT:xvld $xr1, $a3, 0
; CHECK-NEXT:xvld $xr2, $a1, 0
-; CHECK-NEXT:xvrepli.b $xr3, -1
-; CHECK-NEXT:xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT:xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT:xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT:xvsub.w $xr0, $xr0, $xr1
+; CHECK-NEXT:xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT:xvst $xr0, $a0, 0
; CHECK-NEXT:ret
entry:
@@ -76,10 +71,8 @@ define void @and_not_combine_v4i64(ptr %res, ptr %a0, ptr
%a1, ptr %a2) nounwind
; CHECK-NEXT:xvld $xr0, $a2, 0
; CHECK-NEXT:xvld $xr1, $a3, 0
; CHECK-NEXT:xvld $xr2, $a1, 0
-; CHECK-NEXT:xvrepli.b $xr3, -1
-; CHECK-NEXT:xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT:xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT:xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT:xvsub.d $xr0, $xr0, $xr1
+; CHECK-NEXT:xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT:xvst $xr0, $a0, 0
; CHECK-NEXT:ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
index 39060bfa92c0d..3c6d34505e114 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -8,9 +8,8 @@ define void @and_not_combine_v16i8(ptr %res, ptr %a0, ptr %a1,
ptr %a2) nounwind
; CHECK-NEXT:vld $vr0, $a2, 0
; CHECK-NEXT:vld $vr1, $a3, 0
; CHECK-NEXT:vld $vr2, $a1, 0
-; CHECK-NEXT:vxori.b $vr0, $vr0, 255
-; CHECK-NEXT:vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT:vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT:vsub.b $vr0, $vr0, $vr1
+; CHECK-NEXT:vandn.v $vr0, $vr0, $vr2
; CHECK-NEXT:vst $vr0, $a0, 0
; CHECK-NEXT:ret
entry:
@@ -30,10 +29,8 @@ define void @and_not_combine_v8i16(p
[llvm-branch-commits] [llvm] [IR2Vec] Refactor vocabulary to use section-based storage (PR #158376)
https://github.com/svkeerthy edited https://github.com/llvm/llvm-project/pull/158376 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Offload] Add GenericPluginTy::get_mem_info (PR #157484)
https://github.com/RossBrunton updated
https://github.com/llvm/llvm-project/pull/157484
>From 7bf7fe1df8a873964df2ebc17328d9bef00f1347 Mon Sep 17 00:00:00 2001
From: Ross Brunton
Date: Mon, 8 Sep 2025 10:45:42 +0100
Subject: [PATCH] [Offload] Add GenericPluginTy::get_mem_info
This takes a pointer allocated by the plugin, and returns a struct
containing important information about it. This is now used in
`olMemFree` instead of using a map to track allocation info.
---
offload/include/omptarget.h | 2 +
offload/liboffload/src/OffloadImpl.cpp| 27 +--
.../amdgpu/dynamic_hsa/hsa.cpp| 1 +
.../amdgpu/dynamic_hsa/hsa_ext_amd.h | 3 +
offload/plugins-nextgen/amdgpu/src/rtl.cpp| 31 ++-
.../common/include/PluginInterface.h | 13 ++
offload/plugins-nextgen/cuda/src/rtl.cpp | 216 +++---
offload/plugins-nextgen/host/src/rtl.cpp | 5 +
8 files changed, 193 insertions(+), 105 deletions(-)
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..197cbd3806d91 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -96,6 +96,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
OMP_REGISTER_REQUIRES = 0x10,
};
+// Note: This type should be no larger than 3 bits, as the amdgpu platform uses
+// the lower 3 bits of a pointer to store it
enum TargetAllocTy : int32_t {
TARGET_ALLOC_DEVICE = 0,
TARGET_ALLOC_HOST,
diff --git a/offload/liboffload/src/OffloadImpl.cpp
b/offload/liboffload/src/OffloadImpl.cpp
index fef3a5669e0d5..9620c35ac5c10 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -201,8 +201,6 @@ struct OffloadContext {
bool TracingEnabled = false;
bool ValidationEnabled = true;
- DenseMap AllocInfoMap{};
- std::mutex AllocInfoMapMutex{};
SmallVector Platforms{};
size_t RefCount;
@@ -624,32 +622,15 @@ Error olMemAlloc_impl(ol_device_handle_t Device,
ol_alloc_type_t Type,
return Alloc.takeError();
*AllocationOut = *Alloc;
- {
-std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex);
-OffloadContext::get().AllocInfoMap.insert_or_assign(
-*Alloc, AllocInfo{Device, Type});
- }
return Error::success();
}
Error olMemFree_impl(ol_platform_handle_t Platform, void *Address) {
- ol_device_handle_t Device;
- ol_alloc_type_t Type;
- {
-std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex);
-if (!OffloadContext::get().AllocInfoMap.contains(Address))
- return createOffloadError(ErrorCode::INVALID_ARGUMENT,
-"address is not a known allocation");
-
-auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
-Device = AllocInfo.Device;
-Type = AllocInfo.Type;
-OffloadContext::get().AllocInfoMap.erase(Address);
- }
- assert(Platform == Device->Platform);
+ auto MemInfo = Platform->Plugin->get_memory_info(Address);
+ if (auto Err = MemInfo.takeError())
+return Err;
- if (auto Res =
- Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type)))
+ if (auto Res = MemInfo->Device->dataDelete(Address, MemInfo->Type))
return Res;
return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
index bc92f4a46a5c0..7f0e75cb9b500 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
@@ -68,6 +68,7 @@ DLWRAP(hsa_amd_register_system_event_handler, 2)
DLWRAP(hsa_amd_signal_create, 5)
DLWRAP(hsa_amd_signal_async_handler, 5)
DLWRAP(hsa_amd_pointer_info, 5)
+DLWRAP(hsa_amd_pointer_info_set_userdata, 2)
DLWRAP(hsa_code_object_reader_create_from_memory, 3)
DLWRAP(hsa_code_object_reader_destroy, 1)
DLWRAP(hsa_executable_load_agent_code_object, 5)
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 29cfe78082dbb..5c2fbd127c86d 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -160,6 +160,7 @@ typedef struct hsa_amd_pointer_info_s {
void* agentBaseAddress;
void* hostBaseAddress;
size_t sizeInBytes;
+ void *userData;
} hsa_amd_pointer_info_t;
hsa_status_t hsa_amd_pointer_info(const void* ptr,
@@ -168,6 +169,8 @@ hsa_status_t hsa_amd_pointer_info(const void* ptr,
uint32_t* num_agents_accessible,
hsa_agent_t** accessible);
+hsa_status_t hsa_amd_pointer_info_set_userdata(const void *ptr, void
*userdata);
+
#ifdef __cplusplus
}
#endif
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c26cfe961aa0e..90d9ca9f787e7 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen
[llvm-branch-commits] [llvm] AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD (PR #157845)
https://github.com/petar-avramovic created
https://github.com/llvm/llvm-project/pull/157845
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD.
Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD
should be always divergent.
>From 8573b1705133c5284a283d170643b8e30bfc4a20 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Wed, 10 Sep 2025 13:04:20 +0200
Subject: [PATCH] AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD.
Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD
should be always divergent.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 15 +++---
.../AMDGPU/MIR/loads-gmir.mir | 20 +++
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5c958dfe6954f..398c99b3bd127 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10281,7 +10281,7 @@ unsigned SIInstrInfo::getInstrLatency(const
InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
- unsigned opcode = MI.getOpcode();
+ unsigned Opcode = MI.getOpcode();
auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
@@ -10301,7 +10301,7 @@ SIInstrInfo::getGenericInstructionUniformity(const
MachineInstr &MI) const {
// If the target supports globally addressable scratch, the mapping from
// scratch memory to the flat aperture changes therefore an address space
cast
// is no longer uniform.
- if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
+ if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
return HandleAddrSpaceCast(MI);
if (auto *GI = dyn_cast(&MI)) {
@@ -10329,7 +10329,8 @@ SIInstrInfo::getGenericInstructionUniformity(const
MachineInstr &MI) const {
//
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
- if (opcode == AMDGPU::G_LOAD) {
+ if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
+ Opcode == AMDGPU::G_SEXTLOAD) {
if (MI.memoperands_empty())
return InstructionUniformity::NeverUniform; // conservative assumption
@@ -10343,10 +10344,10 @@ SIInstrInfo::getGenericInstructionUniformity(const
MachineInstr &MI) const {
return InstructionUniformity::Default;
}
- if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
- AMDGPU::isGenericAtomic(opcode)) {
+ if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
+ AMDGPU::isGenericAtomic(Opcode)) {
return InstructionUniformity::NeverUniform;
}
return InstructionUniformity::Default;
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
index cb3c2de5b8753..d799cd2057f47 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
@@ -46,13 +46,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
-; CHECK-NOT: DIVERGENT
-
+; CHECK: DIVERGENT
+; CHECK-SAME: G_ZEXTLOAD
%0:_(s32) = G_ZEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
-; CHECK-NOT: DIVERGENT
-
+; CHECK: DIVERGENT
+; CHECK-SAME: G_ZEXTLOAD
%2:_(s32) = G_ZEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -60,7 +60,8 @@ body: |
%3:_(s32) = G_ZEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1)
undef`, addrspace 1)
; Private load
-; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT
+; CHECK-SAME: G_ZEXTLOAD
%5:_(s32) = G_ZEXTLOAD %6(p5) :: (volatile load (s16) from `ptr
addrspace(5) undef`, addrspace 5)
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1)
undef`, addrspace 1)
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1)
undef`, addrspace 1)
@@ -80,11 +81,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
-; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT
+; CHECK-SAME: G_SEXTLOAD
%0:_(s32) = G_SEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
-; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT
+; CHECK-SAME: G_SEXTLOAD
%2:_(s32) = G_SEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -92,7 +95,8 @@ body: |
%3:_(s32) = G_SEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1)
undef`, addrspace 1)
[llvm-branch-commits] [llvm] [Offload] Add GenericPluginTy::get_mem_info (PR #157484)
@@ -624,32 +622,15 @@ Error olMemAlloc_impl(ol_device_handle_t Device,
ol_alloc_type_t Type,
return Alloc.takeError();
*AllocationOut = *Alloc;
- {
-std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex);
-OffloadContext::get().AllocInfoMap.insert_or_assign(
-*Alloc, AllocInfo{Device, Type});
- }
return Error::success();
}
Error olMemFree_impl(ol_platform_handle_t Platform, void *Address) {
- ol_device_handle_t Device;
- ol_alloc_type_t Type;
- {
-std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex);
-if (!OffloadContext::get().AllocInfoMap.contains(Address))
- return createOffloadError(ErrorCode::INVALID_ARGUMENT,
-"address is not a known allocation");
-
-auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
-Device = AllocInfo.Device;
-Type = AllocInfo.Type;
-OffloadContext::get().AllocInfoMap.erase(Address);
- }
- assert(Platform == Device->Platform);
+ auto MemInfo = Platform->Plugin->get_memory_info(Address);
arsenm wrote:
No auto
https://github.com/llvm/llvm-project/pull/157484
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] CodeGen: Emit .prefalign directives based on the prefalign attribute. (PR #155529)
pcc wrote: > Can you split "implement basic codegen support for prefalign" (the bits which > don't depend on the .prefalign directive) into a separate patch? It's not > clear what's causing the test changes here. Done: #158368 https://github.com/llvm/llvm-project/pull/155529 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Remove TRI argument from getRegClass (PR #158225)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/158225 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Select VGPR MFMAs by default (PR #159493)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
Changes
AGPRs are undesirable since they are only usable by a
handful instructions like loads, stores and mfmas and everything
else requires copies to/from VGPRs. Using the AGPR form should be
a measure of last resort if we must use more than 256 VGPRs.
---
Patch is 1.30 MiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/159493.diff
28 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
(+128-172)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx90a.mir (+2-2)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/acc-ldst.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll
(+123-125)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll (+147-5)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll (+464-498)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll (+540-740)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll (+730-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll (+352-534)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll (+46-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (+1006-1115)
- (modified)
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
(+168-1050)
- (modified)
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
(+2436-4283)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll (+50-70)
- (modified)
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll
(+553-552)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+2931-2)
- (modified) llvm/test/CodeGen/AMDGPU/mfma-loop.ll (+950-1156)
- (modified) llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll (+462-525)
- (modified) llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll (+15-15)
- (modified)
llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/spill-agpr.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll (+51-53)
``diff
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 908d856d386f5..0077c6915c520 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -37,7 +37,7 @@ static cl::opt MFMAVGPRForm(
"amdgpu-mfma-vgpr-form", cl::Hidden,
cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
"unspecified, default to compiler heuristics"),
-cl::init(false));
+cl::init(true));
const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
const SITargetLowering *TLI = STI->getTargetLowering();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index 5720b882f4e73..2493065806794 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -15,59 +15,42 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr
addrspace(1) %arg) #
; GCN: ; %bb.0: ; %bb
; GCN-NEXT:s_load_dwordx2 s[34:35], s[4:5], 0x24
; GCN-NEXT:s_mov_b64 s[36:37], 1
-; GCN-NEXT:v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
-; GCN-NEXT:s_mov_b32 s38, 2
-; GCN-NEXT:s_mov_b32 s39, s37
+; GCN-NEXT:v_pk_mov_b32 v[32:33], s[36:37], s[36:37] op_sel:[0,1]
+; GCN-NEXT:s_mov_b32 s36, 2
+; GCN-NEXT:v_pk_mov_b32 v[34:35], s[36:37], s[36:37] op_sel:[0,1]
; GCN-NEXT:s_waitcnt lgkmcnt(0)
; GCN-NEXT:s_load_dwordx16 s[0:15], s[34:35], 0x0
; GCN-NEXT:s_load_dwordx16 s[16:31], s[34:35], 0x40
-; GCN-NEXT:v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
; GCN-NEXT:s_waitcnt lgkmcnt(0)
-; GCN-NEXT:v_accvgpr_write_b32 a0, s0
-; GCN-NEXT:v_accvgpr_write_b32 a16, s16
-; GCN-NEXT:v_accvgpr_write_b32 a1, s1
-; GCN-NEXT:v_accvgpr_write_b32 a2, s2
-; GCN-NEXT:v_accvgpr_write_b32 a3, s3
-; GCN-NEXT:v_accvgpr_write_b32 a4, s4
-; GCN-NEXT:v_accvgpr_write_b32 a5, s5
-; GCN-NEXT:v_accvgpr_write_b32 a6, s6
-; GCN-NEXT:v_accvgpr_write_b32 a7, s7
-; GCN-NEXT:v_accvgpr_write_b32 a8, s8
-; GCN-NEXT:v_accvgpr_write_b32 a9, s9
-; GCN-NEXT:v_accvgpr_write_b32 a10, s10
-; G
[llvm-branch-commits] [llvm] release/21.x: [LoongArch] Fix MergeBaseOffset for constant pool index operand (#159336) (PR #159496)
https://github.com/zhaoqi5 approved this pull request. LGTM. Bugfix. https://github.com/llvm/llvm-project/pull/159496 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AllocToken, Clang] Implement TypeHashPointerSplit mode (PR #156840)
https://github.com/melver ready_for_review https://github.com/llvm/llvm-project/pull/156840 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][Standalone] test Standalone against install distributions (PR #157944)
https://github.com/rengolin commented: I don't know much about CMake, so my comments aren't very helpful. https://github.com/llvm/llvm-project/pull/157944 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add test where WeakCrossingSIV misses dependency due to overflow (NFC) (PR #158281)
https://github.com/kasuga-fj updated
https://github.com/llvm/llvm-project/pull/158281
>From a42c8002548c97d6c7755b1db821a5c682881efe Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga
Date: Fri, 12 Sep 2025 11:06:39 +
Subject: [PATCH] [DA] Add test where WeakCrossingSIV misses dependency due to
overflow
---
.../DependenceAnalysis/WeakCrossingSIV.ll | 224 ++
1 file changed, 224 insertions(+)
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index cd044032e34f1..58dded965de27 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by
utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1
\
; RUN: | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print" -da-run-siv-routines-only
2>&1 \
+; RUN: | FileCheck %s --check-prefix=CHECK-SIV-ONLY
; ModuleID = 'WeakCrossingSIV.bc'
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -26,6 +28,20 @@ define void @weakcrossing0(ptr %A, ptr %B, i64 %n) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32
%0, ptr %B.addr.02, align 4
; CHECK-NEXT:da analyze - none!
;
+; CHECK-SIV-ONLY-LABEL: 'weakcrossing0'
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %conv, ptr %arrayidx, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
%0 = load i32, ptr %arrayidx2, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - flow [0|<]!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - confused!
+; CHECK-SIV-ONLY-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst:
%0 = load i32, ptr %arrayidx2, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - confused!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+;
entry:
%cmp1 = icmp eq i64 %n, 0
br i1 %cmp1, label %for.end, label %for.body.preheader
@@ -79,6 +95,21 @@ define void @weakcrossing1(ptr %A, ptr %B, i64 %n) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32
%0, ptr %B.addr.02, align 4
; CHECK-NEXT:da analyze - none!
;
+; CHECK-SIV-ONLY-LABEL: 'weakcrossing1'
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %conv, ptr %arrayidx, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
%0 = load i32, ptr %arrayidx2, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - flow [<>] splitable!
+; CHECK-SIV-ONLY-NEXT:da analyze - split level = 1, iteration = 0!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - confused!
+; CHECK-SIV-ONLY-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst:
%0 = load i32, ptr %arrayidx2, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - confused!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst:
store i32 %0, ptr %B.addr.02, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+;
entry:
%cmp1 = icmp eq i64 %n, 0
br i1 %cmp1, label %for.end, label %for.body.preheader
@@ -130,6 +161,20 @@ define void @weakcrossing2(ptr %A, ptr %B, i64 %n)
nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32
%0, ptr %B.addr.01, align 4
; CHECK-NEXT:da analyze - none!
;
+; CHECK-SIV-ONLY-LABEL: 'weakcrossing2'
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %conv, ptr %arrayidx, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
%0 = load i32, ptr %arrayidx1, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - none!
+; CHECK-SIV-ONLY-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst:
store i32 %0, ptr %B.addr.01, align 4
+; CHECK-SIV-ONLY-NEXT:da analyze - confused!
+; CHECK-SIV-ONLY-NEXT: Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst:
%0 = load i32
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
RKSimon wrote: CC @fhahn who wrote the original fold https://github.com/llvm/llvm-project/pull/159286 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][LoongArch] Introduce LASX and LSX conversion intrinsics (PR #157819)
https://github.com/heiher updated
https://github.com/llvm/llvm-project/pull/157819
>From fc722d5bd2a6d53df029df2396d533d7384194a6 Mon Sep 17 00:00:00 2001
From: WANG Rui
Date: Wed, 10 Sep 2025 17:11:10 +0800
Subject: [PATCH] [clang][LoongArch] Introduce LASX and LSX conversion
intrinsics
This patch introduces the LASX and LSX conversion intrinsics:
- __m256 __lasx_cast_128_s (__m128)
- __m256d __lasx_cast_128_d (__m128d)
- __m256i __lasx_cast_128 (__m128i)
- __m256 __lasx_concat_128_s (__m128, __m128)
- __m256d __lasx_concat_128_d (__m128, __m128d)
- __m256i __lasx_concat_128 (__m128, __m128i)
- __m128 __lasx_extract_128_lo_s (__m256)
- __m128d __lasx_extract_128_lo_d (__m256d)
- __m128i __lasx_extract_128_lo (__m256i)
- __m128 __lasx_extract_128_hi_s (__m256)
- __m128d __lasx_extract_128_hi_d (__m256d)
- __m128i __lasx_extract_128_hi (__m256i)
- __m256 __lasx_insert_128_lo_s (__m256, __m128)
- __m256d __lasx_insert_128_lo_d (__m256d, __m128d)
- __m256i __lasx_insert_128_lo (__m256i, __m128i)
- __m256 __lasx_insert_128_hi_s (__m256, __m128)
- __m256d __lasx_insert_128_hi_d (__m256d, __m128d)
- __m256i __lasx_insert_128_hi (__m256i, __m128i)
---
.../clang/Basic/BuiltinsLoongArchLASX.def | 19 +++
clang/lib/Basic/Targets/LoongArch.cpp | 1 +
clang/lib/Headers/lasxintrin.h| 113 +
.../CodeGen/LoongArch/lasx/builtin-alias.c| 153 +
clang/test/CodeGen/LoongArch/lasx/builtin.c | 157 ++
clang/test/Preprocessor/init-loongarch.c | 3 +
6 files changed, 446 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index c4ea46a3bc5b5..a5eee613d5c9e 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc",
"lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
+
+TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128, "V4LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128, "V4LLiV2LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V2LLiV4LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V2LLiV4LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V4LLiV4LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V4LLiV4LLiV2LLi", "nc", "lasx")
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp
b/clang/lib/Basic/Targets/LoongArch.cpp
index 8e29bb745734b..5863af3f3b920 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const
LangOptions &Opts,
Builder.defineMacro("__loongarch_simd_width", "256");
Builder.defineMacro("__loongarch_sx", Twine(1));
Builder.defineMacro("__loongarch_asx", Twine(1));
+Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
} else if (HasFeatureLSX) {
Builder.defineMacro("__loongarch_simd_width", "128");
Builder.defineMacro("__loongarch_sx", Twine(1));
diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h
index 85020d82829e2..83cc4288a990c 100644
--- a/clang/lib/Headers/lasxintrin.h
+++ b/clang/lib/Headers/lasxintrin.h
@@ -10,6 +10,8 @@
#ifndef _LOONGSON_ASXINTRIN_H
#define _LOONGSON_ASXINTRIN_H 1
+#include
+
#if defined(__loongarch_asx)
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
+#if defined(__loongarch_asx_sx_conv)
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__,
+ __artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
+ return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
+}
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__, __artificial
[llvm-branch-commits] [clang] [clang][LoongArch] Introduce LASX and LSX conversion intrinsics (PR #157819)
https://github.com/heiher updated
https://github.com/llvm/llvm-project/pull/157819
>From fc722d5bd2a6d53df029df2396d533d7384194a6 Mon Sep 17 00:00:00 2001
From: WANG Rui
Date: Wed, 10 Sep 2025 17:11:10 +0800
Subject: [PATCH] [clang][LoongArch] Introduce LASX and LSX conversion
intrinsics
This patch introduces the LASX and LSX conversion intrinsics:
- __m256 __lasx_cast_128_s (__m128)
- __m256d __lasx_cast_128_d (__m128d)
- __m256i __lasx_cast_128 (__m128i)
- __m256 __lasx_concat_128_s (__m128, __m128)
- __m256d __lasx_concat_128_d (__m128, __m128d)
- __m256i __lasx_concat_128 (__m128, __m128i)
- __m128 __lasx_extract_128_lo_s (__m256)
- __m128d __lasx_extract_128_lo_d (__m256d)
- __m128i __lasx_extract_128_lo (__m256i)
- __m128 __lasx_extract_128_hi_s (__m256)
- __m128d __lasx_extract_128_hi_d (__m256d)
- __m128i __lasx_extract_128_hi (__m256i)
- __m256 __lasx_insert_128_lo_s (__m256, __m128)
- __m256d __lasx_insert_128_lo_d (__m256d, __m128d)
- __m256i __lasx_insert_128_lo (__m256i, __m128i)
- __m256 __lasx_insert_128_hi_s (__m256, __m128)
- __m256d __lasx_insert_128_hi_d (__m256d, __m128d)
- __m256i __lasx_insert_128_hi (__m256i, __m128i)
---
.../clang/Basic/BuiltinsLoongArchLASX.def | 19 +++
clang/lib/Basic/Targets/LoongArch.cpp | 1 +
clang/lib/Headers/lasxintrin.h| 113 +
.../CodeGen/LoongArch/lasx/builtin-alias.c| 153 +
clang/test/CodeGen/LoongArch/lasx/builtin.c | 157 ++
clang/test/Preprocessor/init-loongarch.c | 3 +
6 files changed, 446 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index c4ea46a3bc5b5..a5eee613d5c9e 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc",
"lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
+
+TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_cast_128, "V4LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_concat_128, "V4LLiV2LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V2LLiV4LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V2LLiV4LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V4LLiV4LLiV2LLi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V4LLiV4LLiV2LLi", "nc", "lasx")
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp
b/clang/lib/Basic/Targets/LoongArch.cpp
index 8e29bb745734b..5863af3f3b920 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const
LangOptions &Opts,
Builder.defineMacro("__loongarch_simd_width", "256");
Builder.defineMacro("__loongarch_sx", Twine(1));
Builder.defineMacro("__loongarch_asx", Twine(1));
+Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
} else if (HasFeatureLSX) {
Builder.defineMacro("__loongarch_simd_width", "128");
Builder.defineMacro("__loongarch_sx", Twine(1));
diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h
index 85020d82829e2..83cc4288a990c 100644
--- a/clang/lib/Headers/lasxintrin.h
+++ b/clang/lib/Headers/lasxintrin.h
@@ -10,6 +10,8 @@
#ifndef _LOONGSON_ASXINTRIN_H
#define _LOONGSON_ASXINTRIN_H 1
+#include
+
#if defined(__loongarch_asx)
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
+#if defined(__loongarch_asx_sx_conv)
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__,
+ __artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
+ return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
+}
+
+extern __inline
+__attribute__((__gnu_inline__, __always_inline__, __artificial
[llvm-branch-commits] [clang] [llvm] [lit] Make builtin cat work with stdin (PR #158447)
https://github.com/jh7370 approved this pull request. https://github.com/llvm/llvm-project/pull/158447 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
aemerson wrote: > > Final ping. Does anyone have _objections_ to this patch? > > It seems very odd to tackle this in the always inliner, as it adds a new > concern to a fairly scoped pass. Are you sure there are no alternatives, for > instance, having a pre-AlwaysInliner pass to mark off callsites that > shouldn't be inlined, and then re-enabling them afterwards? > > Maybe a RFC would help gather more feedback and consensus? I can see where you're coming from, we'd ideally like to not couple optimizations together. I still don't see an alternative that's any more elegant. The root cause is that we don't have an interleaved inline/optimize pattern like the main inliner. I don't think an RFC is going to yield any more than the reviewers here, but I can re-consider in future if we see more cases of this issue occur. https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Xtensa] Fix lowering FP compare operations. (PR #157520)
llvmbot wrote:
@llvm/pr-subscribers-backend-xtensa
Author: Andrei Safronov (andreisfr)
Changes
Implement lowering of the SETONE/SETOGT/SETOGE/SETUGT/SETUGE operations. This
fixes f32 "copysign" and "ueq" tests.
---
Full diff: https://github.com/llvm/llvm-project/pull/157520.diff
2 Files Affected:
- (modified) llvm/lib/Target/Xtensa/XtensaISelLowering.cpp (+10-6)
- (modified) llvm/test/CodeGen/Xtensa/select-cc-fp.ll (+24-9)
``diff
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index fd42fd2e010ba..f847ddb46af76 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -232,12 +232,6 @@ XtensaTargetLowering::XtensaTargetLowering(const
TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
-
-setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
-setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
-setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
-setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
-setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
} else {
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
@@ -877,6 +871,16 @@ static std::pair
getFPBranchKind(ISD::CondCode Cond) {
return std::make_pair(Xtensa::BF, Xtensa::OLT_S);
case ISD::SETGT:
return std::make_pair(Xtensa::BF, Xtensa::OLE_S);
+ case ISD::SETOGT:
+return std::make_pair(Xtensa::BF, Xtensa::ULE_S);
+ case ISD::SETOGE:
+return std::make_pair(Xtensa::BF, Xtensa::ULT_S);
+ case ISD::SETONE:
+return std::make_pair(Xtensa::BF, Xtensa::UEQ_S);
+ case ISD::SETUGT:
+return std::make_pair(Xtensa::BF, Xtensa::OLE_S);
+ case ISD::SETUGE:
+return std::make_pair(Xtensa::BF, Xtensa::OLT_S);
default:
llvm_unreachable("Invalid condition!");
}
diff --git a/llvm/test/CodeGen/Xtensa/select-cc-fp.ll
b/llvm/test/CodeGen/Xtensa/select-cc-fp.ll
index ee45ef006123c..742770de23f37 100644
--- a/llvm/test/CodeGen/Xtensa/select-cc-fp.ll
+++ b/llvm/test/CodeGen/Xtensa/select-cc-fp.ll
@@ -103,8 +103,8 @@ define float @brcc_olt(float %a, float %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT:wfr f8, a3
; CHECK-NEXT:wfr f9, a2
-; CHECK-NEXT:ule.s b0, f8, f9
-; CHECK-NEXT:bt b0, .LBB3_2
+; CHECK-NEXT:olt.s b0, f9, f8
+; CHECK-NEXT:bf b0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %t1
; CHECK-NEXT:l32r a8, .LCPI3_1
; CHECK-NEXT:wfr f8, a8
@@ -135,8 +135,8 @@ define float @brcc_ole(float %a, float %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT:wfr f8, a3
; CHECK-NEXT:wfr f9, a2
-; CHECK-NEXT:ult.s b0, f8, f9
-; CHECK-NEXT:bt b0, .LBB4_2
+; CHECK-NEXT:ole.s b0, f9, f8
+; CHECK-NEXT:bf b0, .LBB4_2
; CHECK-NEXT: # %bb.1: # %t1
; CHECK-NEXT:l32r a8, .LCPI4_1
; CHECK-NEXT:wfr f8, a8
@@ -232,7 +232,7 @@ define float @brcc_ueq(float %a, float %b) nounwind {
; CHECK-NEXT:wfr f8, a3
; CHECK-NEXT:wfr f9, a2
; CHECK-NEXT:ueq.s b0, f9, f8
-; CHECK-NEXT:bt b0, .LBB7_2
+; CHECK-NEXT:bf b0, .LBB7_2
; CHECK-NEXT: # %bb.1: # %t1
; CHECK-NEXT:l32r a8, .LCPI7_1
; CHECK-NEXT:wfr f8, a8
@@ -327,8 +327,8 @@ define float @brcc_ult(float %a, float %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT:wfr f8, a3
; CHECK-NEXT:wfr f9, a2
-; CHECK-NEXT:ole.s b0, f8, f9
-; CHECK-NEXT:bt b0, .LBB10_2
+; CHECK-NEXT:ult.s b0, f9, f8
+; CHECK-NEXT:bf b0, .LBB10_2
; CHECK-NEXT: # %bb.1: # %t1
; CHECK-NEXT:l32r a8, .LCPI10_1
; CHECK-NEXT:wfr f8, a8
@@ -359,8 +359,8 @@ define float @brcc_ule(float %a, float %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT:wfr f8, a3
; CHECK-NEXT:wfr f9, a2
-; CHECK-NEXT:olt.s b0, f8, f9
-; CHECK-NEXT:bt b0, .LBB11_2
+; CHECK-NEXT:ule.s b0, f9, f8
+; CHECK-NEXT:bf b0, .LBB11_2
; CHECK-NEXT: # %bb.1: # %t1
; CHECK-NEXT:l32r a8, .LCPI11_1
; CHECK-NEXT:wfr f8, a8
@@ -451,6 +451,21 @@ exit:
}
define float @copysign_f32(float %a, float %b) {
+; CHECK-LABEL: copysign_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT:l32r a8, .LCPI14_0
+; CHECK-NEXT:and a8, a3, a8
+; CHECK-NEXT:l32r a9, .LCPI14_1
+; CHECK-NEXT:and a9, a2, a9
+; CHECK-NEXT:wfr f8, a9
+; CHECK-NEXT:movi a9, 0
+; CHECK-NEXT:beq a8, a9, .LBB14_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT:neg.s f8, f8
+; CHECK-NEXT: .LBB14_2: # %entry
+; CHECK-NEXT:rfr a2, f8
+; CHECK-NEXT:ret
entry:
%c = call float @llvm.copysign.f32(float %a, float %b)
ret float %c
``
https://github.com/llvm/llvm-project/pull/157520
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.o
[llvm-branch-commits] [clang] port 5b4819e to release (PR #159209)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: David Blaikie (dwblaikie)
Changes
Applies the fix on top of the backport made in #156664
---
Full diff: https://github.com/llvm/llvm-project/pull/159209.diff
1 Files Affected:
- (modified) clang/test/CodeGenCXX/debug-info-structured-binding.cpp (+1-1)
``diff
diff --git a/clang/test/CodeGenCXX/debug-info-structured-binding.cpp
b/clang/test/CodeGenCXX/debug-info-structured-binding.cpp
index 4a4a4d8bdfaad..8032ce85c9e25 100644
--- a/clang/test/CodeGenCXX/debug-info-structured-binding.cpp
+++ b/clang/test/CodeGenCXX/debug-info-structured-binding.cpp
@@ -10,7 +10,7 @@
// CHECK: getelementptr inbounds nuw %struct.A, ptr {{.*}}, i32 0, i32 1, !dbg
![[Y1_DEBUG_LOC:[0-9]+]]
// CHECK: getelementptr inbounds nuw %struct.A, ptr {{.*}}, i32 0, i32 1, !dbg
![[Y2_DEBUG_LOC:[0-9]+]]
// CHECK: load ptr, ptr %z2, {{.*}}!dbg ![[Z2_DEBUG_LOC:[0-9]+]]
-// CHECK: getelementptr inbounds [2 x i32], ptr {{.*}}, i64 0, i64 1, !dbg
![[A2_DEBUG_LOC:[0-9]+]]
+// CHECK: getelementptr inbounds [2 x i32], ptr {{.*}}, i{{64|32}} 0,
i{{64|32}} 1, !dbg ![[A2_DEBUG_LOC:[0-9]+]]
// CHECK: getelementptr inbounds nuw { i32, i32 }, ptr {{.*}}, i32 0, i32 1,
!dbg ![[C2_DEBUG_LOC:[0-9]+]]
// CHECK: extractelement <2 x i32> {{.*}}, i32 1, !dbg ![[V2_DEBUG_LOC:[0-9]+]]
// CHECK: ![[VAR_0]] = !DILocalVariable(name: "a"
``
https://github.com/llvm/llvm-project/pull/159209
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [MLIR] Add new complex.powi op (PR #158722)
https://github.com/joker-eph edited https://github.com/llvm/llvm-project/pull/158722 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Select VGPR MFMAs by default (PR #159493)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/159493 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Select VGPR MFMAs by default (PR #159493)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/159493?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#159493** https://app.graphite.dev/github/pr/llvm/llvm-project/159493?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/159493?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#159492** https://app.graphite.dev/github/pr/llvm/llvm-project/159492?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/159493 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Remarks] Remove redundant size from StringRefs (NFC) (PR #156357)
https://github.com/tobias-stadler updated
https://github.com/llvm/llvm-project/pull/156357
>From e3951bca5a4a5c169975f13faa679a761455976a Mon Sep 17 00:00:00 2001
From: Tobias Stadler
Date: Mon, 1 Sep 2025 19:02:32 +0100
Subject: [PATCH] fix format
Created using spr 1.3.7-wip
---
llvm/include/llvm/Remarks/BitstreamRemarkContainer.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkContainer.h
b/llvm/include/llvm/Remarks/BitstreamRemarkContainer.h
index 2e378fd755588..48a148a3adc13 100644
--- a/llvm/include/llvm/Remarks/BitstreamRemarkContainer.h
+++ b/llvm/include/llvm/Remarks/BitstreamRemarkContainer.h
@@ -96,7 +96,8 @@ constexpr StringLiteral MetaExternalFileName("External File");
constexpr StringLiteral RemarkHeaderName("Remark header");
constexpr StringLiteral RemarkDebugLocName("Remark debug location");
constexpr StringLiteral RemarkHotnessName("Remark hotness");
-constexpr StringLiteral RemarkArgWithDebugLocName("Argument with debug
location");
+constexpr StringLiteral
+RemarkArgWithDebugLocName("Argument with debug location");
constexpr StringLiteral RemarkArgWithoutDebugLocName("Argument");
} // end namespace remarks
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen: Keep reference to TargetRegisterInfo in TargetInstrInfo (PR #158224)
@@ -1070,8 +1070,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "namespace llvm {\n";
OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
<< " explicit " << ClassName
- << "(const TargetSubtargetInfo &STI, unsigned CFSetupOpcode = ~0u, "
-"unsigned CFDestroyOpcode = ~0u, "
+ << "(const TargetSubtargetInfo &STI, const TargetRegisterInfo &TRI, "
arsenm wrote:
No. Most targets have TRI inside TII. After this PR, they all do. Later we
should probably remove the query for TRI from STI.
https://github.com/llvm/llvm-project/pull/158224
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-mca: Error on MCSubtargetInfo construction failure (PR #159215)
https://github.com/mshockwave approved this pull request. https://github.com/llvm/llvm-project/pull/159215 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] SPARC: Use RegClassByHwMode instead of PointerLikeRegClass (PR #158271)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158271
>From a349f1ad18e539cd9ce1d036faae37710a3e288c Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 8 Sep 2025 14:04:59 +0900
Subject: [PATCH] SPARC: Use RegClassByHwMode instead of PointerLikeRegClass
---
.../Sparc/Disassembler/SparcDisassembler.cpp | 8 ---
llvm/lib/Target/Sparc/SparcInstrInfo.td | 21 +--
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index c3d60f3689e1f..e585e5af42d32 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -159,14 +159,6 @@ static DecodeStatus DecodeI64RegsRegisterClass(MCInst
&Inst, unsigned RegNo,
return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
}
-// This is used for the type "ptr_rc", which is either IntRegs or I64Regs
-// depending on SparcRegisterInfo::getPointerRegClass.
-static DecodeStatus DecodePointerLikeRegClass0(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
-}
-
static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td
b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 53972d6c105a4..97e7fd7769edb 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -95,10 +95,27 @@ def HasFSMULD : Predicate<"!Subtarget->hasNoFSMULD()">;
// will pick deprecated instructions.
def UseDeprecatedInsts : Predicate<"Subtarget->useV8DeprecatedInsts()">;
+//===--===//
+// HwModes Pattern Stuff
+//===--===//
+
+defvar SPARC32 = DefaultMode;
+def SPARC64 : HwMode<[Is64Bit]>;
+
//===--===//
// Instruction Pattern Stuff
//===--===//
+def sparc_ptr_rc : RegClassByHwMode<
+ [SPARC32, SPARC64],
+ [IntRegs, I64Regs]>;
+
+// Both cases can use the same decoder method, so avoid the dispatch
+// by hwmode by setting an explicit DecoderMethod
+def ptr_op : RegisterOperand {
+ let DecoderMethod = "DecodeIntRegsRegisterClass";
+}
+
// FIXME these should have AsmOperandClass.
def uimm3 : PatLeaf<(imm), [{ return isUInt<3>(N->getZExtValue()); }]>;
@@ -178,12 +195,12 @@ def simm13Op : Operand {
def MEMrr : Operand {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
+ let MIOperandInfo = (ops ptr_op, ptr_op);
let ParserMatchClass = SparcMEMrrAsmOperand;
}
def MEMri : Operand {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops ptr_rc, simm13Op);
+ let MIOperandInfo = (ops ptr_op, simm13Op);
let ParserMatchClass = SparcMEMriAsmOperand;
}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Validating Root flags are denying shader stage (PR #153287)
@@ -0,0 +1,20 @@ +; RUN: not opt -S -passes='dxil-post-optimization-validation' %s 2>&1 | FileCheck %s joaosaffran wrote: `opt` doesn't have preprocessing, so I cannot define macros with it :/ https://github.com/llvm/llvm-project/pull/153287 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Generate [x]vldi instructions with special constant splats (PR #159258)
https://github.com/ylzsx updated
https://github.com/llvm/llvm-project/pull/159258
>From e1a23dd6e31734b05af239bb827a280d403564ee Mon Sep 17 00:00:00 2001
From: yangzhaoxin
Date: Wed, 17 Sep 2025 10:20:46 +0800
Subject: [PATCH 1/2] [LoongArch] Generate [x]vldi instructions with special
constant splats
---
.../LoongArch/LoongArchISelDAGToDAG.cpp | 52 +++
.../LoongArch/LoongArchISelLowering.cpp | 87 ++-
.../Target/LoongArch/LoongArchISelLowering.h | 5 ++
.../CodeGen/LoongArch/lasx/build-vector.ll| 80 +
.../lasx/fdiv-reciprocal-estimate.ll | 87 +++
.../lasx/fsqrt-reciprocal-estimate.ll | 39 +++--
llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll | 3 +-
.../LoongArch/lasx/ir-instruction/fdiv.ll | 3 +-
llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 31 +++
.../CodeGen/LoongArch/lsx/build-vector.ll | 77 +---
.../LoongArch/lsx/fdiv-reciprocal-estimate.ll | 87 +++
.../lsx/fsqrt-reciprocal-estimate.ll | 70 +--
llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll | 3 +-
.../LoongArch/lsx/ir-instruction/fdiv.ll | 3 +-
llvm/test/CodeGen/LoongArch/lsx/vselect.ll| 31 +++
15 files changed, 289 insertions(+), 369 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 07e722b9a6591..fda313e693760 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
-unsigned Op;
+unsigned Op = 0;
EVT ResTy = BVN->getValueType(0);
bool Is128Vec = BVN->getValueType(0).is128BitVector();
bool Is256Vec = BVN->getValueType(0).is256BitVector();
+SDNode *Res;
if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
break;
@@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
HasAnyUndefs, 8))
break;
-switch (SplatBitSize) {
-default:
- break;
-case 8:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
- break;
-case 16:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
- break;
-case 32:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
- break;
-case 64:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
- break;
-}
-
-SDNode *Res;
// If we have a signed 10 bit integer, we can splat it directly.
if (SplatValue.isSignedIntN(10)) {
+ switch (SplatBitSize) {
+ default:
+break;
+ case 8:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+break;
+ case 16:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+break;
+ case 32:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+break;
+ case 64:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+break;
+ }
+
EVT EleType = ResTy.getVectorElementType();
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
@@ -151,6 +151,20 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Res);
return;
}
+
+// Select appropriate [x]vldi instructions for some special constant
splats,
+// where the immediate value `imm[12] == 1` for used [x]vldi instructions.
+std::pair ConvertVLDI =
+LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
+SplatBitSize);
+if (ConvertVLDI.first) {
+ Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
+ SDValue Imm = CurDAG->getSignedTargetConstant(
+ SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
+ Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+}
break;
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..460e2d7c87af7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2679,9 +2679,10 @@ SDValue
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
// We can only handle 64-bit elements that are within
- // the signed 10-bit range on 32-bit targets.
+ // the signed 10-bit range or match vldi patterns on 32-bit targets.
// See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
-
[llvm-branch-commits] [llvm] [LoongArch] Split 256-bit build_vector to avoid using LASX element insertion (PR #154918)
zhaoqi5 wrote: ping https://github.com/llvm/llvm-project/pull/154918 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][mustache] Precommit test for StandaloneIndentation (PR #159184)
https://github.com/evelez7 approved this pull request. https://github.com/llvm/llvm-project/pull/159184 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies when no runtime (PR #157754)
@@ -1,4 +1,5 @@ ; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s +; XFAIL: * jdenny-ornl wrote: See last paragraph of https://github.com/llvm/llvm-project/pull/157754#issue-3400082318. https://github.com/llvm/llvm-project/pull/157754 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies when no runtime (PR #157754)
@@ -1,4 +1,5 @@ ; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s +; XFAIL: * jdenny-ornl wrote: > check the actual baseline content Sorry, what do you mean? https://github.com/llvm/llvm-project/pull/157754 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] b5c72af - Revert "[mlir] move if-condition propagation to a standalone pass (#150278)"
Author: Mehdi Amini
Date: 2025-09-17T10:17:15+02:00
New Revision: b5c72affe48e74abdb0898a89aeefa8edfa81065
URL:
https://github.com/llvm/llvm-project/commit/b5c72affe48e74abdb0898a89aeefa8edfa81065
DIFF:
https://github.com/llvm/llvm-project/commit/b5c72affe48e74abdb0898a89aeefa8edfa81065.diff
LOG: Revert "[mlir] move if-condition propagation to a standalone pass
(#150278)"
This reverts commit 9d11accf95db0ed08bd3181c25dd75fc793d089d.
Added:
Modified:
mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
mlir/lib/Dialect/SCF/IR/SCF.cpp
mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
mlir/test/Dialect/SCF/canonicalize.mlir
Removed:
mlir/lib/Dialect/SCF/Transforms/IfConditionPropagation.cpp
mlir/test/Dialect/SCF/if-cond-prop.mlir
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 8b891aa374b58..3ac651f53880c 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -41,12 +41,6 @@ def SCFForLoopSpecialization :
Pass<"scf-for-loop-specialization"> {
let constructor = "mlir::createForLoopSpecializationPass()";
}
-def SCFIfConditionPropagation : Pass<"scf-if-condition-propagation"> {
- let summary = "Replace usages of if condition with true/false constants in "
-"the conditional regions";
- let dependentDialects = ["arith::ArithDialect"];
-}
-
def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> {
let summary = "Fuse adjacent parallel loops";
let constructor = "mlir::createParallelLoopFusionPass()";
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index ae55eaded0554..a9da6c2c8320a 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -2453,6 +2453,65 @@ struct ConvertTrivialIfToSelect : public
OpRewritePattern {
}
};
+/// Allow the true region of an if to assume the condition is true
+/// and vice versa. For example:
+///
+/// scf.if %cmp {
+/// print(%cmp)
+/// }
+///
+/// becomes
+///
+/// scf.if %cmp {
+/// print(true)
+/// }
+///
+struct ConditionPropagation : public OpRewritePattern {
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(IfOp op,
+PatternRewriter &rewriter) const override {
+// Early exit if the condition is constant since replacing a constant
+// in the body with another constant isn't a simplification.
+if (matchPattern(op.getCondition(), m_Constant()))
+ return failure();
+
+bool changed = false;
+mlir::Type i1Ty = rewriter.getI1Type();
+
+// These variables serve to prevent creating duplicate constants
+// and hold constant true or false values.
+Value constantTrue = nullptr;
+Value constantFalse = nullptr;
+
+for (OpOperand &use :
+ llvm::make_early_inc_range(op.getCondition().getUses())) {
+ if (op.getThenRegion().isAncestor(use.getOwner()->getParentRegion())) {
+changed = true;
+
+if (!constantTrue)
+ constantTrue = rewriter.create(
+ op.getLoc(), i1Ty, rewriter.getIntegerAttr(i1Ty, 1));
+
+rewriter.modifyOpInPlace(use.getOwner(),
+ [&]() { use.set(constantTrue); });
+ } else if (op.getElseRegion().isAncestor(
+ use.getOwner()->getParentRegion())) {
+changed = true;
+
+if (!constantFalse)
+ constantFalse = rewriter.create(
+ op.getLoc(), i1Ty, rewriter.getIntegerAttr(i1Ty, 0));
+
+rewriter.modifyOpInPlace(use.getOwner(),
+ [&]() { use.set(constantFalse); });
+ }
+}
+
+return success(changed);
+ }
+};
+
/// Remove any statements from an if that are equivalent to the condition
/// or its negation. For example:
///
@@ -2835,8 +2894,9 @@ struct CombineNestedIfs : public OpRewritePattern {
void IfOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {
- results.add(context);
}
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index a07d9d4953d19..a9ffa9dc208a0 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -4,7 +4,6 @@ add_mlir_dialect_library(MLIRSCFTransforms
ForallToFor.cpp
ForallToParallel.cpp
ForToWhile.cpp
- IfConditionPropagation.cpp
LoopCanonicalization.cpp
LoopPipelining.cpp
LoopRangeFolding.cpp
diff --git a/mlir/lib/Dialect/SCF/Transforms/IfConditionPropagation.cpp
b/mlir/lib/Dialect/SCF/Transforms/IfConditionPropagation.cpp
deleted file mode 100644
index bdc51296ef9f2..0
--- a/mlir/lib/Dialect/SCF/Transforms/IfConditionPropagation.cpp
+++ /dev/null
@@
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
https://github.com/fhahn approved this pull request. LGTM, thanks for the fix + backport! https://github.com/llvm/llvm-project/pull/159286 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Generate [x]vldi instructions with special constant splats (PR #159258)
https://github.com/ylzsx ready_for_review https://github.com/llvm/llvm-project/pull/159258 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Generate [x]vldi instructions with special constant splats (PR #159258)
llvmbot wrote:
@llvm/pr-subscribers-backend-loongarch
Author: Zhaoxin Yang (ylzsx)
Changes
---
Patch is 42.97 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/159258.diff
15 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+33-19)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+85-2)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+5)
- (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+23-57)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
(+29-58)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
(+14-25)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll (+1-2)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll (+1-2)
- (modified) llvm/test/CodeGen/LoongArch/lasx/vselect.ll (+11-20)
- (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+22-55)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll
(+29-58)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll
(+23-47)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll (+1-2)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll (+1-2)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vselect.ll (+11-20)
``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 07e722b9a6591..fda313e693760 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
-unsigned Op;
+unsigned Op = 0;
EVT ResTy = BVN->getValueType(0);
bool Is128Vec = BVN->getValueType(0).is128BitVector();
bool Is256Vec = BVN->getValueType(0).is256BitVector();
+SDNode *Res;
if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
break;
@@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
HasAnyUndefs, 8))
break;
-switch (SplatBitSize) {
-default:
- break;
-case 8:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
- break;
-case 16:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
- break;
-case 32:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
- break;
-case 64:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
- break;
-}
-
-SDNode *Res;
// If we have a signed 10 bit integer, we can splat it directly.
if (SplatValue.isSignedIntN(10)) {
+ switch (SplatBitSize) {
+ default:
+break;
+ case 8:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+break;
+ case 16:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+break;
+ case 32:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+break;
+ case 64:
+Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+break;
+ }
+
EVT EleType = ResTy.getVectorElementType();
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
@@ -151,6 +151,20 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Res);
return;
}
+
+// Select appropriate [x]vldi instructions for some special constant
splats,
+// where the immediate value `imm[12] == 1` for used [x]vldi instructions.
+std::pair ConvertVLDI =
+LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
+SplatBitSize);
+if (ConvertVLDI.first) {
+ Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
+ SDValue Imm = CurDAG->getSignedTargetConstant(
+ SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
+ Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+}
break;
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..460e2d7c87af7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2679,9 +2679,10 @@ SDValue
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
// We can only handle 64-bit elements that are within
- // the signed 10-bit range on 32-bit targets.
+ // the signed 10-bit range or match vldi patterns on 32-bit targets.
//
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/159286 Backport 994a6a39e13dcc335247a127a5da05905d1ac541 Requested by: @RKSimon >From 1cb48878f5e3a7695e7b31ebb883dd449b0a315b Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor <[email protected]> Date: Mon, 15 Sep 2025 06:08:16 -0400 Subject: [PATCH] [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) The scalarizeExtExtract transform assumed little-endian lane ordering, causing miscompiles on big-endian targets such as AIX/PowerPC under -O3 -flto. This patch updates the shift calculation to handle endianness correctly for big-endian targets. No functional change for little-endian targets. Fixes #158197. - Co-authored-by: Simon Pilgrim (cherry picked from commit 994a6a39e13dcc335247a127a5da05905d1ac541) --- .../Transforms/Vectorize/VectorCombine.cpp| 11 -- .../AArch64/scalarize-ext-extract-endian.ll | 36 +++ .../VectorCombine/PowerPC/lit.local.cfg | 2 ++ .../PowerPC/scalarize-ext-extract.ll | 22 4 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 639f8686a271e..ea9cbed0117b9 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1829,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy))); uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType()); uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1; + uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy); + Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits); + Value *Mask = ConstantInt::get(PackedTy, EltBitMask); for (User *U : Ext->users()) { auto *Extract = cast(U); uint64_t Idx = cast(Extract->getIndexOperand())->getZExtValue(); -Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits); -Value *And = Builder.CreateAnd(LShr, EltBitMask); +uint64_t ShiftAmt = +DL->isBigEndian() +? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) +: (Idx * SrcEltSizeInBits); +Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); +Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } return true; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll new file mode 100644 index 0..9796faf2e6feb --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { +; LE-LABEL: @g( +; LE-NEXT:[[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; LE-NEXT:[[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; LE-NEXT:[[TMP3:%.*]] = lshr i64 [[TMP2]], 56 +; LE-NEXT:[[TMP4:%.*]] = and i64 [[TMP2]], 255 +; LE-NEXT:[[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; LE-NEXT:[[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; LE-NEXT:[[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; LE-NEXT:[[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; LE-NEXT:ret i64 [[SUM]] +; +; BE-LABEL: @g( +; BE-NEXT:[[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT:[[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT:[[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT:[[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT:[[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT:[[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT:[[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 +; BE-NEXT:[[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT:ret i64 [[SUM]] +; + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + + + diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg new file mode 100644 index 0..15af315f104fc --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if 'PowerPC' not in config.root.targets: +config.unsupported =
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/159286 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
llvmbot wrote: @RKSimon What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/159286 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Generate [x]vldi instructions with special constant splats (PR #159258)
https://github.com/ylzsx converted_to_draft https://github.com/llvm/llvm-project/pull/159258 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
llvmbot wrote:
@llvm/pr-subscribers-backend-powerpc
Author: None (llvmbot)
Changes
Backport 994a6a39e13dcc335247a127a5da05905d1ac541
Requested by: @RKSimon
---
Full diff: https://github.com/llvm/llvm-project/pull/159286.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-2)
- (added)
llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
(+36)
- (added) llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg (+2)
- (added) llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
(+22)
``diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 639f8686a271e..ea9cbed0117b9 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1829,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I)
{
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
+ uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);
+ Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits);
+ Value *Mask = ConstantInt::get(PackedTy, EltBitMask);
for (User *U : Ext->users()) {
auto *Extract = cast(U);
uint64_t Idx =
cast(Extract->getIndexOperand())->getZExtValue();
-Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits);
-Value *And = Builder.CreateAnd(LShr, EltBitMask);
+uint64_t ShiftAmt =
+DL->isBigEndian()
+? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
+: (Idx * SrcEltSizeInBits);
+Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt);
+Value *And = Builder.CreateAnd(LShr, Mask);
U->replaceAllUsesWith(And);
}
return true;
diff --git
a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
new file mode 100644
index 0..9796faf2e6feb
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s
-o - | FileCheck %s --check-prefix=LE
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu
%s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; LE-LABEL: @g(
+; LE-NEXT:[[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; LE-NEXT:[[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; LE-NEXT:[[TMP3:%.*]] = lshr i64 [[TMP2]], 56
+; LE-NEXT:[[TMP4:%.*]] = and i64 [[TMP2]], 255
+; LE-NEXT:[[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; LE-NEXT:[[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; LE-NEXT:[[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; LE-NEXT:[[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; LE-NEXT:ret i64 [[SUM]]
+;
+; BE-LABEL: @g(
+; BE-NEXT:[[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT:[[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT:[[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT:[[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT:[[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; BE-NEXT:[[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; BE-NEXT:[[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; BE-NEXT:[[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; BE-NEXT:ret i64 [[SUM]]
+;
+ %z = zext <8 x i8> %v to <8 x i64>
+ %e0 = extractelement <8 x i64> %z, i32 0
+ %e7 = extractelement <8 x i64> %z, i32 7
+ %sum = add i64 %e0, %e7
+ ret i64 %sum
+}
+
+
+
diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
new file mode 100644
index 0..15af315f104fc
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'PowerPC' not in config.root.targets:
+config.unsupported = True
diff --git
a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
new file mode 100644
index 0..a9b719920c341
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff %s
-o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; BE-LABEL: @g(
+; BE-NEXT:[[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT:[[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT:[[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT:[[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT:[[Z:%.*]] = zext <8
[llvm-branch-commits] [llvm] release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) (PR #159286)
github-actions[bot] wrote: ⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo. Please turn off [Keep my email addresses private](https://github.com/settings/emails) setting in your account. See [LLVM Developer Policy](https://llvm.org/docs/DeveloperPolicy.html#email-addresses) and [LLVM Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it) for more information. https://github.com/llvm/llvm-project/pull/159286 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CGData] Rewrite tests to not use subshells (PR #157234)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/157234 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MC] Rewrite stdin.s to use python (PR #157232)
@@ -0,0 +1,25 @@ +# RUN: echo "// comment" > %t.input +# RUN: which llvm-mc | %python %s %t petrhosek wrote: I'd make this even more explicit: ```suggestion # RUN: which llvm-mc | %python %s %t.input %t ``` https://github.com/llvm/llvm-project/pull/157232 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Consolidate copy-in/copy-out determination in evaluate framework (PR #155810)
tru wrote: @sscalpone can you weigh in on this - do you think it's a big enough problem to be merged into the release branch? I am wary about the size of this PR. https://github.com/llvm/llvm-project/pull/155810 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] ELF: CFI jump table relaxation. (PR #147424)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/147424
>From 5bce06b0d8db161a2e09709bcfe15b4623e43d01 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne
Date: Mon, 7 Jul 2025 16:41:10 -0700
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
lld/ELF/Arch/X86_64.cpp | 95 +
lld/ELF/Relocations.cpp | 2 +-
lld/ELF/Target.h| 1 +
3 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 488f4803b2cb4..04ca79befdc4a 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -318,6 +318,9 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is,
InputFile *file,
}
bool X86_64::relaxOnce(int pass) const {
+ if (pass == 0)
+relaxJumpTables(ctx);
+
uint64_t minVA = UINT64_MAX, maxVA = 0;
for (OutputSection *osec : ctx.outputSections) {
if (!(osec->flags & SHF_ALLOC))
@@ -1231,6 +1234,98 @@ void X86_64::applyBranchToBranchOpt() const {
redirectControlTransferRelocations);
}
+void elf::relaxJumpTables(Ctx &ctx) {
+ // Relax CFI jump tables.
+ // - Split jump table into pieces and place target functions inside the jump
+ // table if small enough.
+ // - Move jump table before last called function and delete last branch
+ // instruction.
+ std::map> sectionReplacements;
+ SmallVector storage;
+ for (OutputSection *osec : ctx.outputSections) {
+if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+for (InputSection *sec : getInputSections(*osec, storage)) {
+ if (!sec->name.starts_with(".text..L.cfi.jumptable"))
+continue;
+ std::vector replacements;
+ replacements.push_back(sec);
+ auto addSectionSlice = [&](size_t begin, size_t end, Relocation *rbegin,
+ Relocation *rend) {
+if (begin == end)
+ return;
+auto *slice = make(
+sec->file, sec->name, sec->type, sec->flags, 1, sec->entsize,
+sec->contentMaybeDecompress().slice(begin, end - begin));
+for (const Relocation &r : ArrayRef(rbegin, rend)) {
+ slice->relocations.push_back(
+ Relocation{r.expr, r.type, r.offset - begin, r.addend, r.sym});
+}
+replacements.push_back(slice);
+ };
+ auto getMovableSection = [&](Relocation &r) -> InputSection * {
+auto *sym = dyn_cast_or_null(r.sym);
+if (!sym || sym->isPreemptible || sym->isGnuIFunc() || sym->value != 0)
+ return nullptr;
+auto *sec = dyn_cast_or_null(sym->section);
+if (!sec || sectionReplacements.count(sec))
+ return nullptr;
+return sec;
+ };
+ size_t begin = 0;
+ Relocation *rbegin = sec->relocs().begin();
+ for (auto &r : sec->relocs().slice(0, sec->relocs().size() - 1)) {
+auto entrySize = (&r + 1)->offset - r.offset;
+InputSection *target = getMovableSection(r);
+if (!target || target->size > entrySize)
+ continue;
+target->addralign = 1;
+addSectionSlice(begin, r.offset - 1, rbegin, &r);
+replacements.push_back(target);
+sectionReplacements[target] = {};
+begin = r.offset - 1 + target->size;
+rbegin = &r + 1;
+ }
+ InputSection *lastSec = getMovableSection(sec->relocs().back());
+ if (lastSec) {
+lastSec->addralign = 1;
+addSectionSlice(begin, sec->relocs().back().offset - 1, rbegin,
+&sec->relocs().back());
+replacements.push_back(lastSec);
+sectionReplacements[sec] = {};
+sectionReplacements[lastSec] = replacements;
+for (auto *s : replacements)
+ s->parent = lastSec->parent;
+ } else {
+addSectionSlice(begin, sec->size, rbegin, sec->relocs().end());
+sectionReplacements[sec] = replacements;
+for (auto *s : replacements)
+ s->parent = sec->parent;
+ }
+ sec->relocations.clear();
+ sec->size = 0;
+}
+ }
+ for (OutputSection *osec : ctx.outputSections) {
+if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+for (SectionCommand *cmd : osec->commands) {
+ auto *isd = dyn_cast(cmd);
+ if (!isd)
+continue;
+ SmallVector newSections;
+ for (auto *sec : isd->sections) {
+auto i = sectionReplacements.find(sec);
+if (i == sectionReplacements.end())
+ newSections.push_back(sec);
+else
+ newSections.append(i->second.begin(), i->second.end());
+ }
+ isd->sections = std::move(newSections);
+}
+ }
+}
+
// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
// entries containing endbr64 instructions. A PLT entry will be split into two
// par
[llvm-branch-commits] CodeGen: Introduce MachineFunction::getPreferredAlignment(). (PR #158368)
https://github.com/pcc created https://github.com/llvm/llvm-project/pull/158368 MachineFunction can now be queried for the preferred alignment which comes from the function attributes (optsize, minsize, prefalign) and TargetLowering. Part of this RFC: https://discourse.llvm.org/t/rfc-enhancing-function-alignment-attributes/88019 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Sparc: Remove Is64Bit field from SparcTargetMachine (PR #157400)
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/157400
Directly use the triple instead of having an additional field.
>From 27e0075d67fa33345bfb361fecac6ec7636cd442 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 8 Sep 2025 16:35:42 +0900
Subject: [PATCH] Sparc: Remove Is64Bit field from SparcTargetMachine
Directly use the triple instead of having an additional field.
---
llvm/lib/Target/Sparc/SparcSubtarget.cpp | 8 +++
llvm/lib/Target/Sparc/SparcSubtarget.h | 4 ++--
llvm/lib/Target/Sparc/SparcTargetMachine.cpp | 24 ++--
llvm/lib/Target/Sparc/SparcTargetMachine.h | 3 +--
4 files changed, 19 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 5a71e49467b14..f2721ead00697 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -31,7 +31,7 @@ SparcSubtarget
&SparcSubtarget::initializeSubtargetDependencies(
// Determine default and user specified characteristics
std::string CPUName = std::string(CPU);
if (CPUName.empty())
-CPUName = (Is64Bit) ? "v9" : "v8";
+CPUName = getTargetTriple().isSPARC64() ? "v9" : "v8";
if (TuneCPU.empty())
TuneCPU = CPUName;
@@ -47,10 +47,10 @@ SparcSubtarget
&SparcSubtarget::initializeSubtargetDependencies(
}
SparcSubtarget::SparcSubtarget(const StringRef &CPU, const StringRef &TuneCPU,
- const StringRef &FS, const TargetMachine &TM,
- bool is64Bit)
+ const StringRef &FS, const TargetMachine &TM)
: SparcGenSubtargetInfo(TM.getTargetTriple(), CPU, TuneCPU, FS),
- ReserveRegister(TM.getMCRegisterInfo()->getNumRegs()), Is64Bit(is64Bit),
+ ReserveRegister(TM.getMCRegisterInfo()->getNumRegs()),
+ Is64Bit(TM.getTargetTriple().isSPARC64()),
InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
TLInfo(TM, *this), FrameLowering(*this) {
TSInfo = std::make_unique();
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h
b/llvm/lib/Target/Sparc/SparcSubtarget.h
index 502be1e06d41c..f98aef012a867 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -36,7 +36,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
virtual void anchor();
- bool Is64Bit;
+ const bool Is64Bit;
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)
\
bool ATTRIBUTE = DEFAULT;
@@ -49,7 +49,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
public:
SparcSubtarget(const StringRef &CPU, const StringRef &TuneCPU,
- const StringRef &FS, const TargetMachine &TM, bool is64bit);
+ const StringRef &FS, const TargetMachine &TM);
~SparcSubtarget() override;
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 52076a6b4dd22..754c8f63ca4ec 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -38,7 +38,9 @@ static cl::opt
BranchRelaxation("sparc-enable-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
-static std::string computeDataLayout(const Triple &T, bool is64Bit) {
+static std::string computeDataLayout(const Triple &T) {
+ const bool is64Bit = T.isSPARC64();
+
// Sparc is typically big endian, but some are little.
std::string Ret = T.getArch() == Triple::sparcel ? "e" : "E";
Ret += "-m:e";
@@ -107,15 +109,14 @@ SparcTargetMachine::SparcTargetMachine(const Target &T,
const Triple &TT,
const TargetOptions &Options,
std::optional RM,
std::optional CM,
- CodeGenOptLevel OL, bool JIT,
- bool is64bit)
+ CodeGenOptLevel OL, bool JIT)
: CodeGenTargetMachineImpl(
- T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
+ T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
- getEffectiveSparcCodeModel(CM, getEffectiveRelocModel(RM), is64bit,
- JIT),
+ getEffectiveSparcCodeModel(CM, getEffectiveRelocModel(RM),
+ TT.isSPARC64(), JIT),
OL),
- TLOF(std::make_unique()), is64Bit(is64bit) {
+ TLOF(std::make_unique()) {
initAsmInfo();
}
@@ -148,8 +149,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F)
const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
-I = std::make_unique(CPU, TuneCPU, FS, *this,
-
[llvm-branch-commits] [llvm] AMDGPU: Remove getLdStRegisterOperandForSize (PR #157216)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/157216 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca (PR #157810)
https://github.com/ritter-x2a created
https://github.com/llvm/llvm-project/pull/157810
When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.
>From f6a8f012f387b906f845b7a57c4e88bd7f490bcf Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 10 Sep 2025 04:23:59 -0400
Subject: [PATCH] [AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca
When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.
---
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp| 2 +-
llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll | 8
llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll | 4 ++--
.../CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll| 6 +++---
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bb77cdff778c0..7dbe1235a98b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -478,7 +478,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP,
AllocaInst *Alloca,
ConstantInt *ConstIndex =
ConstantInt::get(OffsetType, IndexQuot.getSExtValue());
- Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
+ Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
if (Instruction *NewInst = dyn_cast(IndexAdd))
NewInsts.push_back(NewInst);
return IndexAdd;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index d72f158763c61..63622e67e7d0b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -312,7 +312,7 @@ define amdgpu_kernel void
@i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
; CHECK-NEXT:[[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32
4
; CHECK-NEXT:[[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32
5
; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT:[[TMP2:%.*]] = add i64 6, [[TMP1]]
+; CHECK-NEXT:[[TMP2:%.*]] = add i64 [[TMP1]], 6
; CHECK-NEXT:[[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64
[[TMP2]]
; CHECK-NEXT:[[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]],
i64 0
; CHECK-NEXT:[[TMP5:%.*]] = add i64 [[TMP2]], 1
@@ -464,7 +464,7 @@ define amdgpu_kernel void @i16_2d_load_store(ptr %out, i32
%sel) {
; CHECK-NEXT:[[TMP4:%.*]] = insertelement <6 x i16> [[TMP3]], i16 3, i32 3
; CHECK-NEXT:[[TMP5:%.*]] = insertelement <6 x i16> [[TMP4]], i16 4, i32 4
; CHECK-NEXT:[[TMP6:%.*]] = insertelement <6 x i16> [[TMP5]], i16 5, i32 5
-; CHECK-NEXT:[[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[SEL]], 3
; CHECK-NEXT:[[TMP2:%.*]] = extractelement <6 x i16> [[TMP6]], i32 [[TMP1]]
; CHECK-NEXT:store i16 [[TMP2]], ptr [[OUT]], align 2
; CHECK-NEXT:ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @float_2d_load_store(ptr %out,
i32 %sel) {
; CHECK-NEXT:[[TMP4:%.*]] = insertelement <6 x float> [[TMP3]], float
3.00e+00, i32 3
; CHECK-NEXT:[[TMP5:%.*]] = insertelement <6 x float> [[TMP4]], float
4.00e+00, i32 4
; CHECK-NEXT:[[TMP6:%.*]] = insertelement <6 x float> [[TMP5]], float
5.00e+00, i32 5
-; CHECK-NEXT:[[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[SEL]], 3
; CHECK-NEXT:[[TMP2:%.*]] = extractelement <6 x float> [[TMP6]], i32
[[TMP1]]
; CHECK-NEXT:store float [[TMP2]], ptr [[OUT]], align 4
; CHECK-NEXT:ret void
@@ -538,7 +538,7 @@ define amdgpu_kernel void @ptr_2d_load_store(ptr %out, i32
%sel) {
; CHECK-NEXT:[[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr
[[PTR_3]], i32 3
; CHECK-NEXT:[[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr
[[PTR_4]], i32 4
; CHECK-NEXT:[[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr
[[PTR_5]], i32 5
-; CHECK-NEXT:[[TMP7:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:[[TMP7:%.*]] = add i32 [[SEL]], 3
; CHECK-NEXT:[[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
; CHECK-NEXT:store ptr [[TMP8]], ptr [[OUT]], align 8
; CHECK-NEXT:ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
index 1b6ac0bd93c19..a865bf5058d6a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64
%offset) {
; CHECK-NEXT:[[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
; CHECK-NEXT:[[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
; CHECK-NEX
